Repository: unslothai/unsloth
Branch: main
Commit: d0e5a1d61e5c
Files: 759
Total size: 5.8 MB

Directory structure:
gitextract_mbg5_5ju/

├── .gitattributes
├── .github/
│   ├── CODEOWNERS
│   ├── FUNDING.yml
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug---issue.md
│   │   └── feature-request.md
│   └── workflows/
│       └── stale.yml
├── .gitignore
├── .pre-commit-ci.yaml
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── COPYING
├── LICENSE
├── README.md
├── build.sh
├── cli.py
├── install.ps1
├── install.sh
├── pyproject.toml
├── scripts/
│   ├── enforce_kwargs_spacing.py
│   └── run_ruff_format.py
├── studio/
│   ├── LICENSE.AGPL-3.0
│   ├── Unsloth_Studio_Colab.ipynb
│   ├── __init__.py
│   ├── backend/
│   │   ├── __init__.py
│   │   ├── assets/
│   │   │   ├── __init__.py
│   │   │   └── configs/
│   │   │       ├── __init__.py
│   │   │       ├── full_finetune.yaml
│   │   │       ├── inference_defaults.json
│   │   │       ├── lora_text.yaml
│   │   │       ├── model_defaults/
│   │   │       │   ├── default.yaml
│   │   │       │   ├── embedding/
│   │   │       │   │   ├── unsloth_Qwen3-Embedding-0.6B.yaml
│   │   │       │   │   ├── unsloth_all-MiniLM-L6-v2.yaml
│   │   │       │   │   ├── unsloth_bge-m3.yaml
│   │   │       │   │   ├── unsloth_embeddinggemma-300m.yaml
│   │   │       │   │   └── unsloth_gte-modernbert-base.yaml
│   │   │       │   ├── ernie/
│   │   │       │   │   ├── unsloth_ERNIE-4.5-21B-A3B-PT.yaml
│   │   │       │   │   └── unsloth_ERNIE-4.5-VL-28B-A3B-PT.yaml
│   │   │       │   ├── falcon/
│   │   │       │   │   └── tiiuae_Falcon-H1-0.5B-Instruct.yaml
│   │   │       │   ├── gemma/
│   │   │       │   │   ├── unsloth_codegemma-7b-bnb-4bit.yaml
│   │   │       │   │   ├── unsloth_functiongemma-270m-it.yaml
│   │   │       │   │   ├── unsloth_gemma-2-27b-bnb-4bit.yaml
│   │   │       │   │   ├── unsloth_gemma-2-2b.yaml
│   │   │       │   │   ├── unsloth_gemma-3-270m-it.yaml
│   │   │       │   │   ├── unsloth_gemma-3-27b-it.yaml
│   │   │       │   │   ├── unsloth_gemma-3-4b-it.yaml
│   │   │       │   │   ├── unsloth_gemma-3-4b-pt.yaml
│   │   │       │   │   ├── unsloth_gemma-3n-E4B-it.yaml
│   │   │       │   │   └── unsloth_gemma-3n-E4B.yaml
│   │   │       │   ├── gpt-oss/
│   │   │       │   │   ├── unsloth_gpt-oss-120b.yaml
│   │   │       │   │   └── unsloth_gpt-oss-20b.yaml
│   │   │       │   ├── granite/
│   │   │       │   │   ├── unsloth_granite-4.0-350m-unsloth-bnb-4bit.yaml
│   │   │       │   │   └── unsloth_granite-4.0-h-micro.yaml
│   │   │       │   ├── llama/
│   │   │       │   │   ├── unsloth_Llama-3.2-11B-Vision-Instruct.yaml
│   │   │       │   │   ├── unsloth_Llama-3.2-1B-Instruct.yaml
│   │   │       │   │   ├── unsloth_Llama-3.2-3B-Instruct.yaml
│   │   │       │   │   ├── unsloth_Llama-3.3-70B-Instruct.yaml
│   │   │       │   │   ├── unsloth_Meta-Llama-3.1-70B-bnb-4bit.yaml
│   │   │       │   │   ├── unsloth_Meta-Llama-3.1-8B-Instruct-bnb-4bit.yaml
│   │   │       │   │   ├── unsloth_llama-3-8b-Instruct-bnb-4bit.yaml
│   │   │       │   │   └── unsloth_llama-3-8b-bnb-4bit.yaml
│   │   │       │   ├── llasa/
│   │   │       │   │   └── unsloth_Llasa-3B.yaml
│   │   │       │   ├── mistral/
│   │   │       │   │   ├── unsloth_Magistral-Small-2509-unsloth-bnb-4bit.yaml
│   │   │       │   │   ├── unsloth_Ministral-3-3B-Instruct-2512.yaml
│   │   │       │   │   ├── unsloth_Mistral-Nemo-Base-2407-bnb-4bit.yaml
│   │   │       │   │   ├── unsloth_Mistral-Small-Instruct-2409.yaml
│   │   │       │   │   ├── unsloth_Pixtral-12B-2409.yaml
│   │   │       │   │   ├── unsloth_mistral-7b-instruct-v0.3-bnb-4bit.yaml
│   │   │       │   │   └── unsloth_mistral-7b-v0.3-bnb-4bit.yaml
│   │   │       │   ├── other/
│   │   │       │   │   ├── OuteAI_Llama-OuteTTS-1.0-1B.yaml
│   │   │       │   │   ├── Spark-TTS-0.5B_LLM.yaml
│   │   │       │   │   ├── sesame_csm-1b.yaml
│   │   │       │   │   ├── unsloth_GLM-4.7-Flash.yaml
│   │   │       │   │   ├── unsloth_LFM2-1.2B.yaml
│   │   │       │   │   ├── unsloth_Nemotron-3-Nano-30B-A3B.yaml
│   │   │       │   │   ├── unsloth_PaddleOCR-VL.yaml
│   │   │       │   │   ├── unsloth_answerdotai_ModernBERT-large.yaml
│   │   │       │   │   ├── unsloth_orpheus-3b-0.1-ft.yaml
│   │   │       │   │   ├── unsloth_tinyllama-bnb-4bit.yaml
│   │   │       │   │   └── unsloth_whisper-large-v3.yaml
│   │   │       │   ├── phi/
│   │   │       │   │   ├── unsloth_Phi-3-medium-4k-instruct.yaml
│   │   │       │   │   ├── unsloth_Phi-3.5-mini-instruct.yaml
│   │   │       │   │   └── unsloth_Phi-4.yaml
│   │   │       │   └── qwen/
│   │   │       │       ├── imdatta0_tiny_qwen3_moe_2.8B_0.7B.yaml
│   │   │       │       ├── unsloth_Qwen2-7B.yaml
│   │   │       │       ├── unsloth_Qwen2-VL-7B-Instruct.yaml
│   │   │       │       ├── unsloth_Qwen2.5-1.5B-Instruct.yaml
│   │   │       │       ├── unsloth_Qwen2.5-7B.yaml
│   │   │       │       ├── unsloth_Qwen2.5-Coder-1.5B-Instruct.yaml
│   │   │       │       ├── unsloth_Qwen2.5-Coder-14B-Instruct.yaml
│   │   │       │       ├── unsloth_Qwen2.5-Coder-7B-Instruct-bnb-4bit.yaml
│   │   │       │       ├── unsloth_Qwen2.5-VL-7B-Instruct-bnb-4bit.yaml
│   │   │       │       ├── unsloth_Qwen3-0.6B.yaml
│   │   │       │       ├── unsloth_Qwen3-14B-Base-unsloth-bnb-4bit.yaml
│   │   │       │       ├── unsloth_Qwen3-14B.yaml
│   │   │       │       ├── unsloth_Qwen3-30B-A3B-Instruct-2507.yaml
│   │   │       │       ├── unsloth_Qwen3-32B.yaml
│   │   │       │       ├── unsloth_Qwen3-4B-Instruct-2507.yaml
│   │   │       │       ├── unsloth_Qwen3-4B-Thinking-2507.yaml
│   │   │       │       └── unsloth_Qwen3-VL-8B-Instruct-unsloth-bnb-4bit.yaml
│   │   │       └── vision_lora.yaml
│   │   ├── auth/
│   │   │   ├── .gitkeep
│   │   │   ├── __init__.py
│   │   │   ├── authentication.py
│   │   │   ├── hashing.py
│   │   │   └── storage.py
│   │   ├── colab.py
│   │   ├── core/
│   │   │   ├── __init__.py
│   │   │   ├── data_recipe/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── huggingface.py
│   │   │   │   ├── jobs/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── constants.py
│   │   │   │   │   ├── manager.py
│   │   │   │   │   ├── parse.py
│   │   │   │   │   ├── types.py
│   │   │   │   │   └── worker.py
│   │   │   │   ├── jsonable.py
│   │   │   │   ├── local_callable_validators.py
│   │   │   │   ├── oxc-validator/
│   │   │   │   │   ├── package.json
│   │   │   │   │   └── validate.mjs
│   │   │   │   └── service.py
│   │   │   ├── export/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── export.py
│   │   │   │   ├── orchestrator.py
│   │   │   │   └── worker.py
│   │   │   ├── inference/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── audio_codecs.py
│   │   │   │   ├── defaults.py
│   │   │   │   ├── inference.py
│   │   │   │   ├── llama_cpp.py
│   │   │   │   ├── orchestrator.py
│   │   │   │   ├── tools.py
│   │   │   │   └── worker.py
│   │   │   └── training/
│   │   │       ├── __init__.py
│   │   │       ├── trainer.py
│   │   │       ├── training.py
│   │   │       └── worker.py
│   │   ├── loggers/
│   │   │   ├── .gitkeep
│   │   │   ├── __init__.py
│   │   │   ├── config.py
│   │   │   └── handlers.py
│   │   ├── main.py
│   │   ├── models/
│   │   │   ├── .gitkeep
│   │   │   ├── __init__.py
│   │   │   ├── auth.py
│   │   │   ├── data_recipe.py
│   │   │   ├── datasets.py
│   │   │   ├── export.py
│   │   │   ├── inference.py
│   │   │   ├── models.py
│   │   │   ├── responses.py
│   │   │   ├── training.py
│   │   │   └── users.py
│   │   ├── plugins/
│   │   │   ├── __init__.py
│   │   │   └── data-designer-unstructured-seed/
│   │   │       ├── __init__.py
│   │   │       ├── pyproject.toml
│   │   │       └── src/
│   │   │           └── data_designer_unstructured_seed/
│   │   │               ├── __init__.py
│   │   │               ├── chunking.py
│   │   │               ├── config.py
│   │   │               ├── impl.py
│   │   │               └── plugin.py
│   │   ├── requirements/
│   │   │   ├── __init__.py
│   │   │   ├── base.txt
│   │   │   ├── extras-no-deps.txt
│   │   │   ├── extras.txt
│   │   │   ├── overrides.txt
│   │   │   ├── single-env/
│   │   │   │   ├── constraints.txt
│   │   │   │   ├── data-designer-deps.txt
│   │   │   │   ├── data-designer.txt
│   │   │   │   └── patch_metadata.py
│   │   │   ├── studio.txt
│   │   │   └── triton-kernels.txt
│   │   ├── routes/
│   │   │   ├── .gitkeep
│   │   │   ├── __init__.py
│   │   │   ├── auth.py
│   │   │   ├── data_recipe/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── jobs.py
│   │   │   │   ├── mcp.py
│   │   │   │   ├── seed.py
│   │   │   │   └── validate.py
│   │   │   ├── datasets.py
│   │   │   ├── export.py
│   │   │   ├── inference.py
│   │   │   ├── models.py
│   │   │   └── training.py
│   │   ├── run.py
│   │   ├── state/
│   │   │   ├── .gitkeep
│   │   │   └── __init__.py
│   │   ├── tests/
│   │   │   ├── __init__.py
│   │   │   ├── conftest.py
│   │   │   ├── test_data_recipe_seed.py
│   │   │   └── test_utils.py
│   │   └── utils/
│   │       ├── .gitkeep
│   │       ├── __init__.py
│   │       ├── cache_cleanup.py
│   │       ├── datasets/
│   │       │   ├── __init__.py
│   │       │   ├── chat_templates.py
│   │       │   ├── data_collators.py
│   │       │   ├── dataset_utils.py
│   │       │   ├── format_conversion.py
│   │       │   ├── format_detection.py
│   │       │   ├── llm_assist.py
│   │       │   ├── model_mappings.py
│   │       │   └── vlm_processing.py
│   │       ├── hardware/
│   │       │   ├── __init__.py
│   │       │   └── hardware.py
│   │       ├── inference/
│   │       │   ├── __init__.py
│   │       │   └── inference_config.py
│   │       ├── models/
│   │       │   ├── __init__.py
│   │       │   ├── checkpoints.py
│   │       │   └── model_config.py
│   │       ├── paths/
│   │       │   ├── __init__.py
│   │       │   ├── path_utils.py
│   │       │   └── storage_roots.py
│   │       ├── transformers_version.py
│   │       └── utils.py
│   ├── frontend/
│   │   ├── .gitignore
│   │   ├── .gitkeep
│   │   ├── biome.json
│   │   ├── components.json
│   │   ├── data-designer.openapi (1).yaml
│   │   ├── eslint.config.js
│   │   ├── index.html
│   │   ├── package.json
│   │   ├── public/
│   │   │   └── Hellix font official/
│   │   │       └── OTF/
│   │   │           └── Hellix-SemiBold.otf
│   │   ├── src/
│   │   │   ├── app/
│   │   │   │   ├── app.tsx
│   │   │   │   ├── auth-guards.ts
│   │   │   │   ├── provider.tsx
│   │   │   │   ├── router.tsx
│   │   │   │   └── routes/
│   │   │   │       ├── __root.tsx
│   │   │   │       ├── change-password.tsx
│   │   │   │       ├── chat.tsx
│   │   │   │       ├── data-recipes.$recipeId.tsx
│   │   │   │       ├── data-recipes.tsx
│   │   │   │       ├── export.tsx
│   │   │   │       ├── grid-test.tsx
│   │   │   │       ├── index.tsx
│   │   │   │       ├── login.tsx
│   │   │   │       ├── onboarding.tsx
│   │   │   │       └── studio.tsx
│   │   │   ├── components/
│   │   │   │   ├── assistant-ui/
│   │   │   │   │   ├── attachment.tsx
│   │   │   │   │   ├── audio-player.tsx
│   │   │   │   │   ├── badge.tsx
│   │   │   │   │   ├── markdown-text.tsx
│   │   │   │   │   ├── message-timing.tsx
│   │   │   │   │   ├── model-selector/
│   │   │   │   │   │   ├── pickers.tsx
│   │   │   │   │   │   └── types.ts
│   │   │   │   │   ├── model-selector.tsx
│   │   │   │   │   ├── reasoning.tsx
│   │   │   │   │   ├── sources.tsx
│   │   │   │   │   ├── thread.tsx
│   │   │   │   │   ├── tool-fallback.tsx
│   │   │   │   │   ├── tool-group.tsx
│   │   │   │   │   ├── tool-ui-python.tsx
│   │   │   │   │   ├── tool-ui-terminal.tsx
│   │   │   │   │   ├── tool-ui-web-search.tsx
│   │   │   │   │   └── tooltip-icon-button.tsx
│   │   │   │   ├── example.tsx
│   │   │   │   ├── layout/
│   │   │   │   │   ├── dashboard-grid.tsx
│   │   │   │   │   ├── dashboard-layout.tsx
│   │   │   │   │   └── index.ts
│   │   │   │   ├── markdown/
│   │   │   │   │   ├── markdown-preview.tsx
│   │   │   │   │   └── mermaid-error.tsx
│   │   │   │   ├── navbar.tsx
│   │   │   │   ├── section-card.tsx
│   │   │   │   └── ui/
│   │   │   │       ├── accordion.tsx
│   │   │   │       ├── alert-dialog.tsx
│   │   │   │       ├── alert.tsx
│   │   │   │       ├── animated-shiny-text.tsx
│   │   │   │       ├── animated-theme-toggler.tsx
│   │   │   │       ├── aspect-ratio.tsx
│   │   │   │       ├── avatar.tsx
│   │   │   │       ├── badge.tsx
│   │   │   │       ├── breadcrumb.tsx
│   │   │   │       ├── button.tsx
│   │   │   │       ├── calendar.tsx
│   │   │   │       ├── card.tsx
│   │   │   │       ├── chart.tsx
│   │   │   │       ├── checkbox.tsx
│   │   │   │       ├── collapsible.tsx
│   │   │   │       ├── combobox.tsx
│   │   │   │       ├── command.tsx
│   │   │   │       ├── confetti.tsx
│   │   │   │       ├── context-menu.tsx
│   │   │   │       ├── data-table.tsx
│   │   │   │       ├── dialog.tsx
│   │   │   │       ├── dropdown-menu.tsx
│   │   │   │       ├── empty.tsx
│   │   │   │       ├── field.tsx
│   │   │   │       ├── hover-card.tsx
│   │   │   │       ├── input-group.tsx
│   │   │   │       ├── input.tsx
│   │   │   │       ├── label.tsx
│   │   │   │       ├── light-rays.tsx
│   │   │   │       ├── menubar.tsx
│   │   │   │       ├── navigation-menu.tsx
│   │   │   │       ├── pagination.tsx
│   │   │   │       ├── popover.tsx
│   │   │   │       ├── progress.tsx
│   │   │   │       ├── radio-group.tsx
│   │   │   │       ├── resizable.tsx
│   │   │   │       ├── scroll-area.tsx
│   │   │   │       ├── select.tsx
│   │   │   │       ├── separator.tsx
│   │   │   │       ├── sheet.tsx
│   │   │   │       ├── shine-border.tsx
│   │   │   │       ├── sidebar.tsx
│   │   │   │       ├── skeleton.tsx
│   │   │   │       ├── slider.tsx
│   │   │   │       ├── sonner.tsx
│   │   │   │       ├── sparkles-text.tsx
│   │   │   │       ├── spinner.tsx
│   │   │   │       ├── switch.tsx
│   │   │   │       ├── table.tsx
│   │   │   │       ├── tabs.tsx
│   │   │   │       ├── terminal.tsx
│   │   │   │       ├── textarea.tsx
│   │   │   │       ├── toggle-group.tsx
│   │   │   │       ├── toggle.tsx
│   │   │   │       └── tooltip.tsx
│   │   │   ├── config/
│   │   │   │   ├── env.ts
│   │   │   │   └── training.ts
│   │   │   ├── features/
│   │   │   │   ├── auth/
│   │   │   │   │   ├── api.ts
│   │   │   │   │   ├── change-password-page.tsx
│   │   │   │   │   ├── components/
│   │   │   │   │   │   └── auth-form.tsx
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── login-page.tsx
│   │   │   │   │   └── session.ts
│   │   │   │   ├── chat/
│   │   │   │   │   ├── api/
│   │   │   │   │   │   ├── chat-adapter.ts
│   │   │   │   │   │   └── chat-api.ts
│   │   │   │   │   ├── chat-page.tsx
│   │   │   │   │   ├── chat-settings-sheet.tsx
│   │   │   │   │   ├── components/
│   │   │   │   │   │   └── model-load-status.tsx
│   │   │   │   │   ├── db.ts
│   │   │   │   │   ├── hooks/
│   │   │   │   │   │   └── use-chat-model-runtime.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── runtime-provider.tsx
│   │   │   │   │   ├── shared-composer.tsx
│   │   │   │   │   ├── stores/
│   │   │   │   │   │   └── chat-runtime-store.ts
│   │   │   │   │   ├── thread-sidebar.tsx
│   │   │   │   │   ├── tour/
│   │   │   │   │   │   ├── index.ts
│   │   │   │   │   │   └── steps.tsx
│   │   │   │   │   ├── types/
│   │   │   │   │   │   ├── api.ts
│   │   │   │   │   │   └── runtime.ts
│   │   │   │   │   ├── types.ts
│   │   │   │   │   └── utils/
│   │   │   │   │       └── parse-assistant-content.ts
│   │   │   │   ├── data-recipes/
│   │   │   │   │   ├── data/
│   │   │   │   │   │   └── recipes-db.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── learning-recipes/
│   │   │   │   │   │   ├── conversation.json
│   │   │   │   │   │   ├── index.ts
│   │   │   │   │   │   ├── instruction-from-answer.json
│   │   │   │   │   │   ├── ocr-document-extraction.json
│   │   │   │   │   │   ├── pdf-grounded-qa.json
│   │   │   │   │   │   ├── structured-outputs-jinja.json
│   │   │   │   │   │   ├── text-to-python.json
│   │   │   │   │   │   └── text-to-sql.json
│   │   │   │   │   ├── pages/
│   │   │   │   │   │   ├── data-recipes-page.tsx
│   │   │   │   │   │   └── edit-recipe-page.tsx
│   │   │   │   │   └── types.ts
│   │   │   │   ├── export/
│   │   │   │   │   ├── anim.ts
│   │   │   │   │   ├── api/
│   │   │   │   │   │   └── export-api.ts
│   │   │   │   │   ├── components/
│   │   │   │   │   │   ├── export-dialog.tsx
│   │   │   │   │   │   ├── method-picker.tsx
│   │   │   │   │   │   └── quant-picker.tsx
│   │   │   │   │   ├── constants.ts
│   │   │   │   │   ├── export-page.tsx
│   │   │   │   │   ├── index.ts
│   │   │   │   │   └── tour/
│   │   │   │   │       ├── index.ts
│   │   │   │   │       └── steps.tsx
│   │   │   │   ├── onboarding/
│   │   │   │   │   ├── components/
│   │   │   │   │   │   ├── splash-screen.tsx
│   │   │   │   │   │   ├── steps/
│   │   │   │   │   │   │   ├── dataset-step.tsx
│   │   │   │   │   │   │   ├── hyperparameters-step.tsx
│   │   │   │   │   │   │   ├── model-selection-step.tsx
│   │   │   │   │   │   │   ├── model-type-step.tsx
│   │   │   │   │   │   │   └── summary-step.tsx
│   │   │   │   │   │   ├── wizard-content.tsx
│   │   │   │   │   │   ├── wizard-footer.tsx
│   │   │   │   │   │   ├── wizard-layout.tsx
│   │   │   │   │   │   ├── wizard-sidebar.tsx
│   │   │   │   │   │   └── wizard-step-item.tsx
│   │   │   │   │   └── index.ts
│   │   │   │   ├── recipe-studio/
│   │   │   │   │   ├── api/
│   │   │   │   │   │   └── index.ts
│   │   │   │   │   ├── blocks/
│   │   │   │   │   │   ├── definitions.ts
│   │   │   │   │   │   ├── registry.ts
│   │   │   │   │   │   └── render-dialog.tsx
│   │   │   │   │   ├── components/
│   │   │   │   │   │   ├── block-sheet.tsx
│   │   │   │   │   │   ├── chip-input.tsx
│   │   │   │   │   │   ├── controls/
│   │   │   │   │   │   │   ├── layout-controls.tsx
│   │   │   │   │   │   │   ├── run-validate-floating-controls.tsx
│   │   │   │   │   │   │   └── viewport-controls.tsx
│   │   │   │   │   │   ├── executions/
│   │   │   │   │   │   │   ├── execution-columns-tab.tsx
│   │   │   │   │   │   │   ├── execution-data-tab.tsx
│   │   │   │   │   │   │   ├── execution-overview-tab.tsx
│   │   │   │   │   │   │   ├── execution-raw-tab.tsx
│   │   │   │   │   │   │   ├── execution-sidebar.tsx
│   │   │   │   │   │   │   ├── executions-view-helpers.ts
│   │   │   │   │   │   │   ├── executions-view.tsx
│   │   │   │   │   │   │   └── publish-execution-dialog.tsx
│   │   │   │   │   │   ├── graph/
│   │   │   │   │   │   │   └── internals-sync.tsx
│   │   │   │   │   │   ├── inline/
│   │   │   │   │   │   │   ├── inline-category-badges.tsx
│   │   │   │   │   │   │   ├── inline-expression.tsx
│   │   │   │   │   │   │   ├── inline-field.tsx
│   │   │   │   │   │   │   ├── inline-llm.tsx
│   │   │   │   │   │   │   ├── inline-model.tsx
│   │   │   │   │   │   │   ├── inline-policy.ts
│   │   │   │   │   │   │   ├── inline-sampler.tsx
│   │   │   │   │   │   │   └── inline-seed.tsx
│   │   │   │   │   │   ├── recipe-floating-icon-button-class.ts
│   │   │   │   │   │   ├── recipe-graph-aux-node.tsx
│   │   │   │   │   │   ├── recipe-graph-node.tsx
│   │   │   │   │   │   ├── recipe-graph-semantic-edge.tsx
│   │   │   │   │   │   ├── recipe-studio-header.tsx
│   │   │   │   │   │   ├── rf-ui/
│   │   │   │   │   │   │   ├── base-handle.tsx
│   │   │   │   │   │   │   ├── base-node.tsx
│   │   │   │   │   │   │   ├── data-edge.tsx
│   │   │   │   │   │   │   └── labeled-handle.tsx
│   │   │   │   │   │   ├── runtime/
│   │   │   │   │   │   │   └── execution-progress-island.tsx
│   │   │   │   │   │   └── shared/
│   │   │   │   │   │       ├── available-references-inline.tsx
│   │   │   │   │   │       └── hf-dataset-combobox.tsx
│   │   │   │   │   ├── constants.ts
│   │   │   │   │   ├── data/
│   │   │   │   │   │   └── executions-db.ts
│   │   │   │   │   ├── dialogs/
│   │   │   │   │   │   ├── config-dialog.tsx
│   │   │   │   │   │   ├── expression/
│   │   │   │   │   │   │   └── expression-dialog.tsx
│   │   │   │   │   │   ├── import-dialog.tsx
│   │   │   │   │   │   ├── llm/
│   │   │   │   │   │   │   ├── general-tab.tsx
│   │   │   │   │   │   │   ├── llm-dialog.tsx
│   │   │   │   │   │   │   └── scores-tab.tsx
│   │   │   │   │   │   ├── markdown-note/
│   │   │   │   │   │   │   └── markdown-note-dialog.tsx
│   │   │   │   │   │   ├── models/
│   │   │   │   │   │   │   ├── model-config-dialog.tsx
│   │   │   │   │   │   │   └── model-provider-dialog.tsx
│   │   │   │   │   │   ├── preview-dialog.tsx
│   │   │   │   │   │   ├── processors-dialog.tsx
│   │   │   │   │   │   ├── samplers/
│   │   │   │   │   │   │   ├── bernoulli-dialog.tsx
│   │   │   │   │   │   │   ├── category-dialog.tsx
│   │   │   │   │   │   │   ├── datetime-dialog.tsx
│   │   │   │   │   │   │   ├── gaussian-dialog.tsx
│   │   │   │   │   │   │   ├── person-dialog.tsx
│   │   │   │   │   │   │   ├── subcategory-dialog.tsx
│   │   │   │   │   │   │   ├── timedelta-dialog.tsx
│   │   │   │   │   │   │   ├── uniform-dialog.tsx
│   │   │   │   │   │   │   └── uuid-dialog.tsx
│   │   │   │   │   │   ├── seed/
│   │   │   │   │   │   │   └── seed-dialog.tsx
│   │   │   │   │   │   ├── shared/
│   │   │   │   │   │   │   ├── available-variables.tsx
│   │   │   │   │   │   │   ├── collapsible-section-trigger.tsx
│   │   │   │   │   │   │   ├── dialog-shell.tsx
│   │   │   │   │   │   │   ├── field-label.tsx
│   │   │   │   │   │   │   ├── name-field.tsx
│   │   │   │   │   │   │   └── validation-banner.tsx
│   │   │   │   │   │   ├── tool-profile/
│   │   │   │   │   │   │   ├── helpers.ts
│   │   │   │   │   │   │   └── tool-profile-dialog.tsx
│   │   │   │   │   │   └── validators/
│   │   │   │   │   │       └── validator-dialog.tsx
│   │   │   │   │   ├── execution-types.ts
│   │   │   │   │   ├── executions/
│   │   │   │   │   │   ├── execution-helpers.ts
│   │   │   │   │   │   ├── hydration.ts
│   │   │   │   │   │   ├── run-settings.ts
│   │   │   │   │   │   ├── runtime.ts
│   │   │   │   │   │   └── tracker.ts
│   │   │   │   │   ├── hooks/
│   │   │   │   │   │   ├── use-node-connection-status.ts
│   │   │   │   │   │   ├── use-recipe-editor-graph.ts
│   │   │   │   │   │   ├── use-recipe-executions.ts
│   │   │   │   │   │   ├── use-recipe-persistence.ts
│   │   │   │   │   │   ├── use-recipe-runtime-visuals.ts
│   │   │   │   │   │   └── use-recipe-studio-actions.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── recipe-studio-page.tsx
│   │   │   │   │   ├── stores/
│   │   │   │   │   │   ├── helpers/
│   │   │   │   │   │   │   ├── edge-sync.ts
│   │   │   │   │   │   │   ├── model-infra-layout.ts
│   │   │   │   │   │   │   ├── node-updates.ts
│   │   │   │   │   │   │   ├── reference-sync.ts
│   │   │   │   │   │   │   └── removals.ts
│   │   │   │   │   │   ├── recipe-executions.ts
│   │   │   │   │   │   ├── recipe-studio-helpers.ts
│   │   │   │   │   │   └── recipe-studio.ts
│   │   │   │   │   ├── types/
│   │   │   │   │   │   └── index.ts
│   │   │   │   │   └── utils/
│   │   │   │   │       ├── config-factories.ts
│   │   │   │   │       ├── config-labels.ts
│   │   │   │   │       ├── config-type-guards.ts
│   │   │   │   │       ├── graph/
│   │   │   │   │       │   ├── derive-display-graph.ts
│   │   │   │   │       │   ├── fit-view.ts
│   │   │   │   │       │   ├── recipe-graph-connection.ts
│   │   │   │   │       │   ├── relations.ts
│   │   │   │   │       │   └── runtime-visual-state.ts
│   │   │   │   │       ├── graph-warnings.ts
│   │   │   │   │       ├── graph.ts
│   │   │   │   │       ├── handle-layout.ts
│   │   │   │   │       ├── handles.ts
│   │   │   │   │       ├── image-preview.ts
│   │   │   │   │       ├── import/
│   │   │   │   │       │   ├── edges.ts
│   │   │   │   │       │   ├── helpers.ts
│   │   │   │   │       │   ├── importer.ts
│   │   │   │   │       │   ├── index.ts
│   │   │   │   │       │   ├── parsers/
│   │   │   │   │       │   │   ├── expression-parser.ts
│   │   │   │   │       │   │   ├── llm-parser.ts
│   │   │   │   │       │   │   ├── model-parser.ts
│   │   │   │   │       │   │   ├── sampler-parser.ts
│   │   │   │   │       │   │   ├── seed-config-parser.ts
│   │   │   │   │       │   │   └── validator-parser.ts
│   │   │   │   │       │   ├── parsers.ts
│   │   │   │   │       │   ├── types.ts
│   │   │   │   │       │   └── ui.ts
│   │   │   │   │       ├── index.ts
│   │   │   │   │       ├── layout.ts
│   │   │   │   │       ├── naming.ts
│   │   │   │   │       ├── node-data.ts
│   │   │   │   │       ├── parse.ts
│   │   │   │   │       ├── payload/
│   │   │   │   │       │   ├── build-payload.ts
│   │   │   │   │       │   ├── builders-llm.ts
│   │   │   │   │       │   ├── builders-model.ts
│   │   │   │   │       │   ├── builders-processors.ts
│   │   │   │   │       │   ├── builders-sampler.ts
│   │   │   │   │       │   ├── builders-seed.ts
│   │   │   │   │       │   ├── builders-validator.ts
│   │   │   │   │       │   ├── builders.ts
│   │   │   │   │       │   ├── empty.ts
│   │   │   │   │       │   ├── index.ts
│   │   │   │   │       │   ├── parse.ts
│   │   │   │   │       │   ├── types.ts
│   │   │   │   │       │   └── validate.ts
│   │   │   │   │       ├── processors.ts
│   │   │   │   │       ├── reactflow-changes.ts
│   │   │   │   │       ├── recipe-studio-view.ts
│   │   │   │   │       ├── refs.ts
│   │   │   │   │       ├── rf-node-dimensions.ts
│   │   │   │   │       ├── ui-tones.ts
│   │   │   │   │       ├── validation.ts
│   │   │   │   │       ├── validators/
│   │   │   │   │       │   ├── code-lang.ts
│   │   │   │   │       │   ├── oxc-code-shape.ts
│   │   │   │   │       │   └── oxc-mode.ts
│   │   │   │   │       └── variables.ts
│   │   │   │   ├── studio/
│   │   │   │   │   ├── index.ts
│   │   │   │   │   ├── sections/
│   │   │   │   │   │   ├── charts/
│   │   │   │   │   │   │   ├── chart-preferences-store.ts
│   │   │   │   │   │   │   ├── chart-settings-sheet.tsx
│   │   │   │   │   │   │   ├── eval-loss-chart-card.tsx
│   │   │   │   │   │   │   ├── grad-norm-chart-card.tsx
│   │   │   │   │   │   │   ├── learning-rate-chart-card.tsx
│   │   │   │   │   │   │   ├── training-loss-chart-card.tsx
│   │   │   │   │   │   │   ├── types.ts
│   │   │   │   │   │   │   └── utils.ts
│   │   │   │   │   │   ├── charts-content.tsx
│   │   │   │   │   │   ├── charts-section.tsx
│   │   │   │   │   │   ├── dataset-preview-dialog-mapping.tsx
│   │   │   │   │   │   ├── dataset-preview-dialog-utils.ts
│   │   │   │   │   │   ├── dataset-preview-dialog.tsx
│   │   │   │   │   │   ├── dataset-section.tsx
│   │   │   │   │   │   ├── document-upload-redirect-dialog.tsx
│   │   │   │   │   │   ├── model-section.tsx
│   │   │   │   │   │   ├── params-section.tsx
│   │   │   │   │   │   ├── progress-section-lib.ts
│   │   │   │   │   │   ├── progress-section.tsx
│   │   │   │   │   │   └── training-section.tsx
│   │   │   │   │   ├── studio-page.tsx
│   │   │   │   │   ├── tour/
│   │   │   │   │   │   ├── index.ts
│   │   │   │   │   │   ├── steps/
│   │   │   │   │   │   │   ├── base-model.tsx
│   │   │   │   │   │   │   ├── dataset.tsx
│   │   │   │   │   │   │   ├── index.tsx
│   │   │   │   │   │   │   ├── local-model.tsx
│   │   │   │   │   │   │   ├── method.tsx
│   │   │   │   │   │   │   ├── nav.tsx
│   │   │   │   │   │   │   ├── params.tsx
│   │   │   │   │   │   │   ├── save.tsx
│   │   │   │   │   │   │   └── start.tsx
│   │   │   │   │   │   └── training/
│   │   │   │   │   │       ├── index.ts
│   │   │   │   │   │       └── steps.tsx
│   │   │   │   │   ├── training-start-overlay.tsx
│   │   │   │   │   └── training-view.tsx
│   │   │   │   ├── tour/
│   │   │   │   │   ├── components/
│   │   │   │   │   │   ├── guided-tour.tsx
│   │   │   │   │   │   ├── read-more.tsx
│   │   │   │   │   │   └── spotlight-overlay.tsx
│   │   │   │   │   ├── hooks/
│   │   │   │   │   │   └── use-guided-tour-controller.ts
│   │   │   │   │   ├── index.ts
│   │   │   │   │   └── types.ts
│   │   │   │   └── training/
│   │   │   │       ├── api/
│   │   │   │       │   ├── datasets-api.ts
│   │   │   │       │   ├── mappers.ts
│   │   │   │       │   ├── models-api.ts
│   │   │   │       │   └── train-api.ts
│   │   │   │       ├── components/
│   │   │   │       │   └── hf-dataset-subset-split-selectors.tsx
│   │   │   │       ├── hooks/
│   │   │   │       │   ├── use-max-steps-epochs-toggle.ts
│   │   │   │       │   ├── use-training-actions.ts
│   │   │   │       │   └── use-training-runtime-lifecycle.ts
│   │   │   │       ├── index.ts
│   │   │   │       ├── stores/
│   │   │   │       │   ├── dataset-preview-dialog-store.ts
│   │   │   │       │   ├── training-config-store.ts
│   │   │   │       │   └── training-runtime-store.ts
│   │   │   │       └── types/
│   │   │   │           ├── api.ts
│   │   │   │           ├── config.ts
│   │   │   │           ├── datasets.ts
│   │   │   │           └── runtime.ts
│   │   │   ├── hooks/
│   │   │   │   ├── index.ts
│   │   │   │   ├── use-debounced-value.ts
│   │   │   │   ├── use-gpu-info.ts
│   │   │   │   ├── use-gpu-utilization.ts
│   │   │   │   ├── use-hardware-info.ts
│   │   │   │   ├── use-hf-dataset-search.ts
│   │   │   │   ├── use-hf-dataset-splits.ts
│   │   │   │   ├── use-hf-model-search.ts
│   │   │   │   ├── use-hf-paginated-search.ts
│   │   │   │   ├── use-hf-token-validation.ts
│   │   │   │   ├── use-infinite-scroll.ts
│   │   │   │   ├── use-mobile.ts
│   │   │   │   └── use-recommended-model-vram.ts
│   │   │   ├── index.css
│   │   │   ├── main.tsx
│   │   │   ├── shared/
│   │   │   │   └── toast.ts
│   │   │   ├── speech-recognition.d.ts
│   │   │   ├── stores/
│   │   │   │   ├── index.ts
│   │   │   │   └── training.ts
│   │   │   ├── types/
│   │   │   │   ├── index.ts
│   │   │   │   └── training.ts
│   │   │   └── utils/
│   │   │       ├── index.ts
│   │   │       └── strings.ts
│   │   ├── tsconfig.app.json
│   │   ├── tsconfig.json
│   │   ├── tsconfig.node.json
│   │   └── vite.config.ts
│   ├── install_python_stack.py
│   ├── setup.bat
│   ├── setup.ps1
│   └── setup.sh
├── tests/
│   ├── __init__.py
│   ├── qlora/
│   │   ├── README.md
│   │   ├── test_hf_qlora_train_and_merge.py
│   │   └── test_unsloth_qlora_train_and_merge.py
│   ├── saving/
│   │   ├── gpt-oss-merge/
│   │   │   ├── run_test.sh
│   │   │   ├── test_merged_model.py
│   │   │   └── train_and_merge.py
│   │   ├── language_models/
│   │   │   ├── test_merge_4bit_validation.py
│   │   │   ├── test_merge_model_perplexity_llama-3.2.py
│   │   │   ├── test_merge_model_perplexity_mistral.py
│   │   │   ├── test_merge_model_perplexity_phi_4.py
│   │   │   ├── test_merged_model_perplexity_llama-3.1-8b.py
│   │   │   ├── test_merged_model_perplexity_qwen_2.5.py
│   │   │   ├── test_push_to_hub_merged.py
│   │   │   ├── test_push_to_hub_merged_sharded_index_file.py
│   │   │   └── test_save_merged_grpo_model.py
│   │   ├── non_peft/
│   │   │   ├── test_mistral_non_peft.py
│   │   │   └── test_whisper_non_peft.py
│   │   ├── test_unsloth_save.py
│   │   ├── text_to_speech_models/
│   │   │   ├── test_csm.py
│   │   │   ├── test_lasa.py
│   │   │   ├── test_orpheus.py
│   │   │   └── test_whisper.py
│   │   └── vision_models/
│   │       ├── test_index_file_sharded_model.py
│   │       ├── test_push_to_hub_merged.py
│   │       ├── test_save_merge_qwen2.5vl32B_model_ocr_benchmark.py
│   │       └── test_save_merge_vision_model_ocr_benchmark.py
│   ├── test_get_model_name.py
│   ├── test_model_registry.py
│   ├── test_raw_text.py
│   └── utils/
│       ├── __init__.py
│       ├── aime_eval.md
│       ├── aime_eval.py
│       ├── cleanup_utils.py
│       ├── data_utils.py
│       ├── hf_utils.py
│       ├── ocr_eval.md
│       ├── ocr_eval.py
│       ├── os_utils.py
│       ├── perplexity_eval.md
│       ├── perplexity_eval.py
│       ├── test_attention_masks.py
│       ├── test_packing.py
│       ├── test_qat.py
│       └── test_trunc_normal_patch.py
├── unsloth/
│   ├── __init__.py
│   ├── _auto_install.py
│   ├── chat_templates.py
│   ├── dataprep/
│   │   ├── __init__.py
│   │   ├── raw_text.py
│   │   ├── synthetic.py
│   │   └── synthetic_configs.py
│   ├── device_type.py
│   ├── import_fixes.py
│   ├── kernels/
│   │   ├── __init__.py
│   │   ├── cross_entropy_loss.py
│   │   ├── fast_lora.py
│   │   ├── flex_attention.py
│   │   ├── fp8.py
│   │   ├── geglu.py
│   │   ├── layernorm.py
│   │   ├── moe/
│   │   │   ├── LICENSE
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── autotune_cache.py
│   │   │   ├── benchmark/
│   │   │   │   ├── benchmark_fused_moe.py
│   │   │   │   └── utils.py
│   │   │   ├── grouped_gemm/
│   │   │   │   ├── LICENSE
│   │   │   │   ├── __init__.py
│   │   │   │   ├── interface.py
│   │   │   │   ├── kernels/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── autotuning.py
│   │   │   │   │   ├── backward.py
│   │   │   │   │   ├── forward.py
│   │   │   │   │   └── tuning.py
│   │   │   │   └── reference/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── layers/
│   │   │   │       │   ├── llama4_moe.py
│   │   │   │       │   └── qwen3_moe.py
│   │   │   │       ├── moe_block.py
│   │   │   │       └── moe_ops.py
│   │   │   ├── requirements.txt
│   │   │   └── tests/
│   │   │       ├── __init__.py
│   │   │       ├── common.py
│   │   │       ├── moe_utils.py
│   │   │       ├── run_qwen3_moe_tests.sh
│   │   │       ├── test_grouped_gemm.py
│   │   │       ├── test_llama4_moe.py
│   │   │       └── test_qwen3_moe.py
│   │   ├── rms_layernorm.py
│   │   ├── rope_embedding.py
│   │   ├── swiglu.py
│   │   └── utils.py
│   ├── models/
│   │   ├── __init__.py
│   │   ├── _utils.py
│   │   ├── cohere.py
│   │   ├── dpo.py
│   │   ├── falcon_h1.py
│   │   ├── gemma.py
│   │   ├── gemma2.py
│   │   ├── glm4_moe.py
│   │   ├── granite.py
│   │   ├── llama.py
│   │   ├── llama4.py
│   │   ├── loader.py
│   │   ├── loader_utils.py
│   │   ├── mapper.py
│   │   ├── mistral.py
│   │   ├── qwen2.py
│   │   ├── qwen3.py
│   │   ├── qwen3_moe.py
│   │   ├── rl.py
│   │   ├── rl_replacements.py
│   │   ├── sentence_transformer.py
│   │   └── vision.py
│   ├── ollama_template_mappers.py
│   ├── registry/
│   │   ├── REGISTRY.md
│   │   ├── __init__.py
│   │   ├── _deepseek.py
│   │   ├── _gemma.py
│   │   ├── _llama.py
│   │   ├── _mistral.py
│   │   ├── _phi.py
│   │   ├── _qwen.py
│   │   └── registry.py
│   ├── save.py
│   ├── tokenizer_utils.py
│   ├── trainer.py
│   └── utils/
│       ├── __init__.py
│       ├── attention_dispatch.py
│       ├── hf_hub.py
│       └── packing.py
├── unsloth-cli.py
└── unsloth_cli/
    ├── __init__.py
    ├── commands/
    │   ├── __init__.py
    │   ├── export.py
    │   ├── inference.py
    │   ├── studio.py
    │   ├── train.py
    │   └── ui.py
    ├── config.py
    └── options.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitattributes
================================================
# Normalize Python files to LF line endings
*.py text eol=lf


================================================
FILE: .github/CODEOWNERS
================================================
# Inspired from https://github.com/vllm-project/vllm/blob/main/.github/CODEOWNERS

/unsloth/models/loader.py @danielhanchen @mmathew23
/unsloth/models/llama.py @Datta0 @danielhanchen @mmathew23
/unsloth/models/rl.py @Datta0 @pluesclues @danielhanchen
/unsloth/models/rl_replacements.py @Datta0 @pluesclues @danielhanchen
/unsloth/trainer.py @danielhanchen
/unsloth/models/sentence_transformer.py @Etherll @danielhanchen
/unsloth/save.py @rolandtannous @danielhanchen
/unsloth/tokenizer_utils.py @mmathew23 @danielhanchen
/unsloth/chat_templates.py @rolandtannous @danielhanchen
/unsloth/ollama_template_mappers.py @rolandtannous @danielhanchen
/unsloth/kernels/moe/*.py @Datta0
/unsloth/import_fixes.py @danielhanchen
/unsloth/device_type.py @danielhanchen
/unsloth/_auto_install.py @danielhanchen
/unsloth/dataprep/*.py @danielhanchen
/unsloth/kernels/cross_entropy_loss.py @danielhanchen
/unsloth/kernels/fast_lora.py @danielhanchen
/unsloth/kernels/flex_attention.py @danielhanchen
/unsloth/kernels/fp8.py @Datta0
/unsloth/kernels/geglu.py @danielhanchen
/unsloth/kernels/layernorm.py @danielhanchen
/unsloth/kernels/rms_layernorm.py @danielhanchen
/unsloth/kernels/rope_embedding.py @danielhanchen
/unsloth/kernels/swiglu.py @danielhanchen
/unsloth/kernels/utils.py @danielhanchen @Datta0
/unsloth/models/_utils.py @danielhanchen @mmathew23
/unsloth/models/cohere.py @danielhanchen
/unsloth/models/dpo.py @danielhanchen
/unsloth/models/falcon_h1.py @danielhanchen
/unsloth/models/gemma.py @danielhanchen
/unsloth/models/gemma2.py @danielhanchen
/unsloth/models/glm4_moe.py @Datta0
/unsloth/models/granite.py @danielhanchen
/unsloth/models/llama4.py @danielhanchen
/unsloth/models/loader_utils.py @Datta0 @danielhanchen
/unsloth/models/mapper.py @danielhanchen
/unsloth/models/mistral.py @danielhanchen
/unsloth/models/qwen2.py @danielhanchen
/unsloth/models/qwen3.py @Datta0
/unsloth/models/qwen3_moe.py @Datta0
/unsloth/models/vision.py @mmathew23 @danielhanchen
/unsloth/utils/attention_dispatch.py @mmathew23
/unsloth/utils/hf_hub.py @mmathew23
/unsloth/utils/packing.py @mmathew23

/cli/ @rolandtannous @Manan17
/studio/frontend/ @Shine1i @rolandtannous @Manan17
/studio/frontend/public/ @Shine1i
/studio/backend/ @rolandtannous
/studio/backend/core/data_recipe/ @rolandtannous
/studio/backend/tests/ @rolandtannous @danielhanchen
/tests/ @rolandtannous @danielhanchen
/scripts/ @rolandtannous @danielhanchen


================================================
FILE: .github/FUNDING.yml
================================================
# These are supported funding model platforms

github: unslothai
patreon: # Replace with a single Patreon username
open_collective: # Replace with a single Open Collective username
ko_fi: # unsloth
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
otechie: # Replace with a single Otechie username
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']


================================================
FILE: .github/ISSUE_TEMPLATE/bug---issue.md
================================================
---
name: Bug / Issue
about: Bug / Issue
title: "[Bug] Please fill in your issue title here."
labels: bug
assignees: ''

---

1. Did you update? `pip install --upgrade unsloth unsloth_zoo`
2. `Colab` or `Kaggle` or local / cloud
3. Number GPUs used, use `nvidia-smi`
4. Which notebook? Please link!
5. Which Unsloth version, TRL version, transformers version, PyTorch version?
6. Which trainer? `SFTTrainer`, `GRPOTrainer` etc

```python
Put Minimal code to reproduce error here ###Remove Hugging Face token###
```

🦥 You can also ask via our Reddit page: https://reddit.com/r/unsloth/


================================================
FILE: .github/ISSUE_TEMPLATE/feature-request.md
================================================
---
name: Feature Request
about: New features, model support, ideas
title: "[Feature]"
labels: feature request
assignees: ''

---

For new models, have you tried:
```python
from unsloth import FastModel
model, tokenizer = FastModel.from_pretrained(
    "microsoft/Phi-4-multimodal-instruct",
    trust_remote_code = True,
)
from transformers import AutoModelForSequenceClassification
model, tokenizer = FastModel.from_pretrained(
    auto_model = AutoModelForSequenceClassification,
)
```


================================================
FILE: .github/workflows/stale.yml
================================================
name: 'Inactive Issue Pinger'

on:
  schedule:
    - cron: '30 5 * * *' # Runs at 5:30 UTC every day

jobs:
  stale:
    runs-on: ubuntu-latest
    permissions:
      issues: write

    steps:
      - uses: actions/stale@v10
        with:
          # The message to post on stale issues.
          # This message will ping the issue author.
          # Note: The stale bot action does not currently support a direct placeholder for the last commenter.
          # As a workaround, this message encourages any participant to reply.
          stale-issue-message: >
            Is this issue still important to you?
            Apologies in advance we might have missed this issue as well.
            For faster response times, please post on our Reddit server - https://www.reddit.com/r/unsloth or our Discord - https://discord.com/invite/unsloth 

          # The number of days of inactivity before an issue is considered stale.
          days-before-issue-stale: 9999

          # Set to -1 to never close stale issues.
          days-before-issue-close: -1

          # A label to apply to stale issues.
          stale-issue-label: 'inactive'

          # The number of operations to perform per run to avoid rate limiting.
          operations-per-run: 500

          enable-statistics: false


================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*.class
unsloth_compiled_cache/
# ML artifacts (large files)
feature/
outputs/
exports/
/datasets/
studio/backend/assets/datasets/
unsloth_training_checkpoints/
*.gguf
*.safetensors

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# UV
#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
#   This is especially recommended for binary packages to ensure reproducibility, and is more
#   commonly ignored for libraries.
#uv.lock

# poetry
#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
#   This is especially recommended for binary packages to ensure reproducibility, and is more
#   commonly ignored for libraries.
#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
#   in version control.
#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
.venv_overlay/
.venv_t5/
environment.yaml

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# Ruff stuff:
.ruff_cache/
.pre-commit-cache/

# PyPI configuration file and IDE/Editors
.pypirc
.vscode
.idea/
.claude/
*.swp
*.swo

# oh-my-codex
.omx/

# Firebase
firebase-debug.log

# Other
resources/
tmp/
**/node_modules/
auth.db

# Local working docs
**/CLAUDE.md
**/claude.md
**/AGENT.md
**/agent.md
docs/canvas-lab-architecture.md
log_rtx.txt
log.txt
setup_leo.sh
server.pid
*.log
package-lock.json


================================================
FILE: .pre-commit-ci.yaml
================================================
ci:
  autofix_prs: true
  autofix_prs_limit: 5
  autoupdate_schedule: monthly
  autoupdate_commit_msg: "chore: pre-commit autoupdate"
  skip: []


================================================
FILE: .pre-commit-config.yaml
================================================
repos:
  - repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.15.6
    hooks:
      - id: ruff
        args:
          - --fix
          - --exit-non-zero-on-fix
  - repo: local
    hooks:
      - id: ruff-format-with-kwargs
        name: Ruff format with kwarg spacing
        entry: scripts/run_ruff_format.py
        language: python
        types: [python]
        additional_dependencies:
          - ruff==0.6.9


================================================
FILE: CODE_OF_CONDUCT.md
================================================

# Contributor Covenant Code of Conduct

## Our Pledge

We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, caste, color, religion, or sexual
identity and orientation.

We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.

## Our Standards

Examples of behavior that contributes to a positive environment for our
community include:

* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
  and learning from the experience
* Focusing on what is best not just for us as individuals, but for the overall
  community

Examples of unacceptable behavior include:

* The use of sexualized language or imagery, and sexual attention or advances of
  any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email address,
  without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
  professional setting

## Enforcement Responsibilities

Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.

Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.

## Scope

This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at support@unsloth.ai.
All complaints will be reviewed and investigated promptly and fairly.

All community leaders are obligated to respect the privacy and security of the
reporter of any incident.

## Enforcement Guidelines

Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:

### 1. Correction

**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.

**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.

### 2. Warning

**Community Impact**: A violation through a single incident or series of
actions.

**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or permanent
ban.

### 3. Temporary Ban

**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.

**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.

### 4. Permanent Ban

**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.

**Consequence**: A permanent ban from any sort of public interaction within the
community.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.1, available at
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].

Community Impact Guidelines were inspired by
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].

For answers to common questions about this code of conduct, see the FAQ at
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
[https://www.contributor-covenant.org/translations][translations].

[homepage]: https://www.contributor-covenant.org
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
[Mozilla CoC]: https://github.com/mozilla/diversity
[FAQ]: https://www.contributor-covenant.org/faq
[translations]: https://www.contributor-covenant.org/translations


================================================
FILE: CONTRIBUTING.md
================================================
# 🦥 Contributing to Unsloth

Thank you for not only using Unsloth but also for being interested in helping out! We value all contributions, whether they come in the form of code, ideas, support for others or just by simply spreading the word of Unsloth! 💕

- **[Support the Community](https://github.com/unslothai/unsloth/issues)**: Answer questions, review pull requests, or assist others in discussions.
- **Fix Bugs**: Identify and resolve issues with the existing codebase.
- **Submit Ideas**: Request new features or share enhancements you'd like to see.
- **Develop Features**: Implement new functionality or improve existing tools which can be done via PRs.
- **[Improve Documentation](https://docs.unsloth.ai/)**: Help by creating guides, FAQs, or enhancing clarity.

One of the best ways to support us is by spreading the word about Unsloth! Share how it’s powering your amazing projects in blog posts or social media, and inspire others to explore its potential. Even a simple star on our repo goes a long way in showing your support and helping the community grow. 🌟

## Submitting Issues
If you find a bug or have a feature idea, we’d love to hear from you! Here’s how to make your submission stand out:

### Reporting Bugs
1. **Search First**: Check if the issue has already been reported using GitHub’s search bar under Issues.
2. **Details Matter**: Is this on Google Colab, Kaggle, or on another platform service? Are you using Unsloth's official notebook? Include your OS, Python version, and other relevant details. For bugs, a concise code snippet that reproduces the issue is incredibly helpful.
3. **Be Thorough**: Attach screenshots, traceback logs, or any additional information that might speed up resolution.

## Spread the Word
Your support extends beyond code:
- Spread the word by writing about Unsloth in blogs or social media.
- Share how Unsloth powers your projects.
- Star our repository to show your appreciation.

Finally, please be mindful of our [Code of Conduct](https://github.com/unslothai/unsloth/blob/main/CODE_OF_CONDUCT.md) to ensure a welcoming and inclusive environment for everyone.

Thank you so much for reading and we hope you have lots of fun using Unsloth! 🦥


================================================
FILE: COPYING
================================================
                    GNU AFFERO GENERAL PUBLIC LICENSE
                       Version 3, 19 November 2007

 Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.

                            Preamble

  The GNU Affero General Public License is a free, copyleft license for
software and other kinds of works, specifically designed to ensure
cooperation with the community in the case of network server software.

  The licenses for most software and other practical works are designed
to take away your freedom to share and change the works.  By contrast,
our General Public Licenses are intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users.

  When we speak of free software, we are referring to freedom, not
price.  Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.

  Developers that use our General Public Licenses protect your rights
with two steps: (1) assert copyright on the software, and (2) offer
you this License which gives you legal permission to copy, distribute
and/or modify the software.

  A secondary benefit of defending all users' freedom is that
improvements made in alternate versions of the program, if they
receive widespread use, become available for other developers to
incorporate.  Many developers of free software are heartened and
encouraged by the resulting cooperation.  However, in the case of
software used on network servers, this result may fail to come about.
The GNU General Public License permits making a modified version and
letting the public access it on a server without ever releasing its
source code to the public.

  The GNU Affero General Public License is designed specifically to
ensure that, in such cases, the modified source code becomes available
to the community.  It requires the operator of a network server to
provide the source code of the modified version running there to the
users of that server.  Therefore, public use of a modified version, on
a publicly accessible server, gives the public access to the source
code of the modified version.

  An older license, called the Affero General Public License and
published by Affero, was designed to accomplish similar goals.  This is
a different license, not a version of the Affero GPL, but Affero has
released a new version of the Affero GPL which permits relicensing under
this license.

  The precise terms and conditions for copying, distribution and
modification follow.

                       TERMS AND CONDITIONS

  0. Definitions.

  "This License" refers to version 3 of the GNU Affero General Public License.

  "Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.

  "The Program" refers to any copyrightable work licensed under this
License.  Each licensee is addressed as "you".  "Licensees" and
"recipients" may be individuals or organizations.

  To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy.  The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.

  A "covered work" means either the unmodified Program or a work based
on the Program.

  To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy.  Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.

  To "convey" a work means any kind of propagation that enables other
parties to make or receive copies.  Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.

  An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License.  If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.

  1. Source Code.

  The "source code" for a work means the preferred form of the work
for making modifications to it.  "Object code" means any non-source
form of a work.

  A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.

  The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form.  A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.

  The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities.  However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work.  For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.

  The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.

  The Corresponding Source for a work in source code form is that
same work.

  2. Basic Permissions.

  All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met.  This License explicitly affirms your unlimited
permission to run the unmodified Program.  The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work.  This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.

  You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force.  You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright.  Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.

  Conveying under any other circumstances is permitted solely under
the conditions stated below.  Sublicensing is not allowed; section 10
makes it unnecessary.

  3. Protecting Users' Legal Rights From Anti-Circumvention Law.

  No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.

  When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.

  4. Conveying Verbatim Copies.

  You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.

  You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.

  5. Conveying Modified Source Versions.

  You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:

    a) The work must carry prominent notices stating that you modified
    it, and giving a relevant date.

    b) The work must carry prominent notices stating that it is
    released under this License and any conditions added under section
    7.  This requirement modifies the requirement in section 4 to
    "keep intact all notices".

    c) You must license the entire work, as a whole, under this
    License to anyone who comes into possession of a copy.  This
    License will therefore apply, along with any applicable section 7
    additional terms, to the whole of the work, and all its parts,
    regardless of how they are packaged.  This License gives no
    permission to license the work in any other way, but it does not
    invalidate such permission if you have separately received it.

    d) If the work has interactive user interfaces, each must display
    Appropriate Legal Notices; however, if the Program has interactive
    interfaces that do not display Appropriate Legal Notices, your
    work need not make them do so.

  A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit.  Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.

  6. Conveying Non-Source Forms.

  You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:

    a) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by the
    Corresponding Source fixed on a durable physical medium
    customarily used for software interchange.

    b) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by a
    written offer, valid for at least three years and valid for as
    long as you offer spare parts or customer support for that product
    model, to give anyone who possesses the object code either (1) a
    copy of the Corresponding Source for all the software in the
    product that is covered by this License, on a durable physical
    medium customarily used for software interchange, for a price no
    more than your reasonable cost of physically performing this
    conveying of source, or (2) access to copy the
    Corresponding Source from a network server at no charge.

    c) Convey individual copies of the object code with a copy of the
    written offer to provide the Corresponding Source.  This
    alternative is allowed only occasionally and noncommercially, and
    only if you received the object code with such an offer, in accord
    with subsection 6b.

    d) Convey the object code by offering access from a designated
    place (gratis or for a charge), and offer equivalent access to the
    Corresponding Source in the same way through the same place at no
    further charge.  You need not require recipients to copy the
    Corresponding Source along with the object code.  If the place to
    copy the object code is a network server, the Corresponding Source
    may be on a different server (operated by you or a third party)
    that supports equivalent copying facilities, provided you maintain
    clear directions next to the object code saying where to find the
    Corresponding Source.  Regardless of what server hosts the
    Corresponding Source, you remain obligated to ensure that it is
    available for as long as needed to satisfy these requirements.

    e) Convey the object code using peer-to-peer transmission, provided
    you inform other peers where the object code and Corresponding
    Source of the work are being offered to the general public at no
    charge under subsection 6d.

  A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.

  A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling.  In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage.  For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product.  A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.

  "Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source.  The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.

  If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information.  But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).

  The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed.  Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.

  Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.

  7. Additional Terms.

  "Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law.  If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.

  When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it.  (Additional permissions may be written to require their own
removal in certain cases when you modify the work.)  You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.

  Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:

    a) Disclaiming warranty or limiting liability differently from the
    terms of sections 15 and 16 of this License; or

    b) Requiring preservation of specified reasonable legal notices or
    author attributions in that material or in the Appropriate Legal
    Notices displayed by works containing it; or

    c) Prohibiting misrepresentation of the origin of that material, or
    requiring that modified versions of such material be marked in
    reasonable ways as different from the original version; or

    d) Limiting the use for publicity purposes of names of licensors or
    authors of the material; or

    e) Declining to grant rights under trademark law for use of some
    trade names, trademarks, or service marks; or

    f) Requiring indemnification of licensors and authors of that
    material by anyone who conveys the material (or modified versions of
    it) with contractual assumptions of liability to the recipient, for
    any liability that these contractual assumptions directly impose on
    those licensors and authors.

  All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10.  If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term.  If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.

  If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.

  Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.

  8. Termination.

  You may not propagate or modify a covered work except as expressly
provided under this License.  Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).

  However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.

  Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.

  Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License.  If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.

  9. Acceptance Not Required for Having Copies.

  You are not required to accept this License in order to receive or
run a copy of the Program.  Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance.  However,
nothing other than this License grants you permission to propagate or
modify any covered work.  These actions infringe copyright if you do
not accept this License.  Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.

  10. Automatic Licensing of Downstream Recipients.

  Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License.  You are not responsible
for enforcing compliance by third parties with this License.

  An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations.  If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.

  You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License.  For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.

  11. Patents.

  A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based.  The
work thus licensed is called the contributor's "contributor version".

  A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version.  For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.

  Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.

  In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement).  To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.

  If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients.  "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.

  If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.

  A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License.  You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.

  Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.

  12. No Surrender of Others' Freedom.

  If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License.  If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all.  For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.

  13. Remote Network Interaction; Use with the GNU General Public License.

  Notwithstanding any other provision of this License, if you modify the
Program, your modified version must prominently offer all users
interacting with it remotely through a computer network (if your version
supports such interaction) an opportunity to receive the Corresponding
Source of your version by providing access to the Corresponding Source
from a network server at no charge, through some standard or customary
means of facilitating copying of software.  This Corresponding Source
shall include the Corresponding Source for any work covered by version 3
of the GNU General Public License that is incorporated pursuant to the
following paragraph.

  Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU General Public License into a single
combined work, and to convey the resulting work.  The terms of this
License will continue to apply to the part which is the covered work,
but the work with which it is combined will remain governed by version
3 of the GNU General Public License.

  14. Revised Versions of this License.

  The Free Software Foundation may publish revised and/or new versions of
the GNU Affero General Public License from time to time.  Such new versions
will be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.

  Each version is given a distinguishing version number.  If the
Program specifies that a certain numbered version of the GNU Affero General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation.  If the Program does not specify a version number of the
GNU Affero General Public License, you may choose any version ever published
by the Free Software Foundation.

  If the Program specifies that a proxy can decide which future
versions of the GNU Affero General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.

  Later license versions may give you additional or different
permissions.  However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.

  15. Disclaimer of Warranty.

  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.

  16. Limitation of Liability.

  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.

  17. Interpretation of Sections 15 and 16.

  If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.

                     END OF TERMS AND CONDITIONS

            How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.

  To do so, attach the following notices to the program.  It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.

    <one line to give the program's name and a brief idea of what it does.>
    Copyright (C) <year>  <name of author>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published
    by the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.

Also add information on how to contact you by electronic and paper mail.

  If your software can interact with users remotely through a computer
network, you should also make sure that it provides a way for users to
get its source.  For example, if your program is a web application, its
interface could display a "Source" link that leads users to an archive
of the code.  There are many ways you could offer source, and different
solutions will be better for different programs; see section 13 for the
specific requirements.

  You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU AGPL, see
<https://www.gnu.org/licenses/>.

Files under unsloth/*, tests/*, scripts/* are Apache 2.0 licensed.
Files under studio/*, unsloth_cli/* which is optional to install are AGPLv3 licensed.

================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [2024-] [Unsloth AI. Inc team, Daniel Han-Chen & Michael Han-Chen]
   Files under unsloth/*, tests/*, scripts/* are Apache 2.0 licensed.
   Files under studio/*, unsloth_cli/* which is optional to install are AGPLv3 licensed.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: README.md
================================================
<h1 align="center" style="margin:0;">
  <a href="https://unsloth.ai/docs"><picture>
    <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/unslothai/unsloth/main/images/STUDIO%20WHITE%20LOGO.png">
    <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/unslothai/unsloth/main/images/STUDIO%20BLACK%20LOGO.png">
    <img alt="Unsloth logo" src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/STUDIO%20BLACK%20LOGO.png" height="60" style="max-width:100%;">
  </picture></a>
</h1>
<h3 align="center" style="margin: 0; margin-top: 0;">
Run and train AI models with a unified local interface.
</h3>

<p align="center">
  <a href="#-features">Features</a> •
  <a href="#-quickstart">Quickstart</a> •
  <a href="#-free-notebooks">Notebooks</a> •
  <a href="https://unsloth.ai/docs">Documentation</a> •
  <a href="https://discord.com/invite/unsloth">Discord</a>
</p>
 <a href="https://unsloth.ai/docs/new/studio">
<img alt="unsloth studio ui homepage" src="https://raw.githubusercontent.com/unslothai/unsloth/main/studio/frontend/public/studio%20github%20landscape%20colab%20display.png" style="max-width: 100%; margin-bottom: 0;"></a>

Unsloth Studio (Beta) lets you run and train text, [audio](https://unsloth.ai/docs/basics/text-to-speech-tts-fine-tuning), [embedding](https://unsloth.ai/docs/new/embedding-finetuning), [vision](https://unsloth.ai/docs/basics/vision-fine-tuning) models on Windows, Linux and macOS.

## ⭐ Features
Unsloth provides several key features for both inference and training:
### Inference
* **Search + download + run models** including GGUF, LoRA adapters, safetensors
* **Export models**: [Save or export](https://unsloth.ai/docs/new/studio/export) models to GGUF, 16-bit safetensors and other formats.
* **Tool calling**: Support for [self-healing tool calling](https://unsloth.ai/docs/new/studio/chat#auto-healing-tool-calling) and web search
* **[Code execution](https://unsloth.ai/docs/new/studio/chat#code-execution)**: lets LLMs test code in Claude artifacts and sandbox environments
* [Auto-tune inference parameters](https://unsloth.ai/docs/new/studio/chat#auto-parameter-tuning) and customize chat templates.
* Upload images, audio, PDFs, code, DOCX and more file types to chat with.
### Training
* Train **500+ models** up to **2x faster** with up to **70% less VRAM**, with no accuracy loss.
* Supports full fine-tuning, pretraining, 4-bit, 16-bit and, FP8 training.
* **Observability**: Monitor training live, track loss and GPU usage and customize graphs.
* **Data Recipes**: [Auto-create datasets](https://unsloth.ai/docs/new/studio/data-recipe) from **PDF, CSV, DOCX** etc. Edit data in a visual-node workflow.
* **Reinforcement Learning**: The most efficient [RL](https://unsloth.ai/docs/get-started/reinforcement-learning-rl-guide) library, using **80% less VRAM** for GRPO, [FP8](https://unsloth.ai/docs/get-started/reinforcement-learning-rl-guide/fp8-reinforcement-learning) etc.
* [Multi-GPU](https://unsloth.ai/docs/basics/multi-gpu-training-with-unsloth) training is supported, with major improvements coming soon.

## ⚡ Quickstart
Unsloth can be used in two ways: through **[Unsloth Studio](https://unsloth.ai/docs/new/studio/)**, the web UI, or through **Unsloth Core**, the code-based version. Each has different requirements.

### Unsloth Studio (web UI)
Unsloth Studio (Beta) works on **Windows, Linux, WSL** and **macOS**.

* **CPU:** Supported for Chat and Data Recipes currently
* **NVIDIA:** Training works on RTX 30/40/50, Blackwell, DGX Spark, Station and more
* **macOS:** Currently supports chat and Data Recipes. **MLX training** is coming very soon
* **AMD:** Chat works. Train with [Unsloth Core](#unsloth-core-code-based). Studio support is coming soon.
* **Coming soon:** Training support for Apple MLX, AMD, and Intel.
* **Multi-GPU:** Available now, with a major upgrade on the way

#### MacOS, Linux, WSL Setup:
```bash
curl -fsSL https://raw.githubusercontent.com/unslothai/unsloth/main/install.sh | sh
```
If you don't have `curl`, use `wget`. Then to launch after setup:
```bash
source unsloth_studio/bin/activate
unsloth studio -H 0.0.0.0 -p 8888
```

#### Windows PowerShell Setup:
```powershell
irm https://raw.githubusercontent.com/unslothai/unsloth/main/install.ps1 | iex
```
Then to launch after setup:
```powershell
& .\unsloth_studio\Scripts\unsloth.exe studio -H 0.0.0.0 -p 8888
```

#### MacOS, Linux, WSL developer installs:
```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
uv venv unsloth_studio --python 3.13
source unsloth_studio/bin/activate
uv pip install unsloth --torch-backend=auto
unsloth studio setup
unsloth studio -H 0.0.0.0 -p 8888
```

#### Windows PowerShell developer installs:
```powershell
winget install -e --id Python.Python.3.13
winget install --id=astral-sh.uv  -e
uv venv unsloth_studio --python 3.13
.\unsloth_studio\Scripts\activate
uv pip install unsloth --torch-backend=auto
unsloth studio setup
unsloth studio -H 0.0.0.0 -p 8888
```

#### Docker
Use our [Docker image](https://hub.docker.com/r/unsloth/unsloth) ```unsloth/unsloth``` container. Run:
```bash
docker run -d -e JUPYTER_PASSWORD="mypassword" \
  -p 8888:8888 -p 8000:8000 -p 2222:22 \
  -v $(pwd)/work:/workspace/work \
  --gpus all \
  unsloth/unsloth
  ```

#### Nightly Install - MacOS, Linux, WSL:
```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
git clone --filter=blob:none https://github.com/unslothai/unsloth.git unsloth_studio
cd unsloth_studio
uv venv --python 3.13
source .venv/bin/activate
uv pip install -e . --torch-backend=auto
unsloth studio setup
unsloth studio -H 0.0.0.0 -p 8888
```
Then to launch every time:
```bash
cd unsloth_studio
source .venv/bin/activate
unsloth studio -H 0.0.0.0 -p 8888
```

#### Nightly Install - Windows:
Run in Windows Powershell:
```bash
winget install -e --id Python.Python.3.13
winget install --id=astral-sh.uv  -e
git clone --filter=blob:none https://github.com/unslothai/unsloth.git unsloth_studio
cd unsloth_studio
uv venv --python 3.13
.\.venv\Scripts\activate
uv pip install -e . --torch-backend=auto
unsloth studio setup
unsloth studio -H 0.0.0.0 -p 8888
```
Then to launch every time:
```bash
cd unsloth_studio
.\.venv\Scripts\activate
unsloth studio -H 0.0.0.0 -p 8888
```

### Unsloth Core (code-based)
#### Linux, WSL
```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
uv venv unsloth_env --python 3.13
source unsloth_env/bin/activate
uv pip install unsloth --torch-backend=auto
```
#### Windows Powershell
```bash
winget install -e --id Python.Python.3.13
winget install --id=astral-sh.uv  -e
uv venv unsloth_env --python 3.13
.\unsloth_env\Scripts\activate
uv pip install unsloth --torch-backend=auto
```
For Windows, `pip install unsloth` works only if you have Pytorch installed. Read our [Windows Guide](https://unsloth.ai/docs/get-started/install/windows-installation).
You can use the same Docker image as Unsloth Studio.

#### AMD, Intel
For RTX 50x, B200, 6000 GPUs: `uv pip install unsloth --torch-backend=auto`. Read our guides for: [Blackwell](https://unsloth.ai/docs/blog/fine-tuning-llms-with-blackwell-rtx-50-series-and-unsloth) and [DGX Spark](https://unsloth.ai/docs/blog/fine-tuning-llms-with-nvidia-dgx-spark-and-unsloth). <br>
To install Unsloth on **AMD** and **Intel** GPUs, follow our [AMD Guide](https://unsloth.ai/docs/get-started/install/amd) and [Intel Guide](https://unsloth.ai/docs/get-started/install/intel).

## ✨ Free Notebooks

Train for free with our notebooks. Read our [guide](https://unsloth.ai/docs/get-started/fine-tuning-llms-guide). Add dataset, run, then deploy your trained model.

| Model | Free Notebooks | Performance | Memory use |
|-----------|---------|--------|----------|
| **Qwen3.5 (4B)**      | [▶️ Start for free](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen3_5_(4B)_Vision.ipynb)               | 1.5x faster | 60% less |
| **gpt-oss (20B)**      | [▶️ Start for free](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/gpt-oss-(20B)-Fine-tuning.ipynb)               | 2x faster | 70% less |
| **gpt-oss (20B): GRPO**      | [▶️ Start for free](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/gpt-oss-(20B)-GRPO.ipynb)               | 2x faster | 80% less |
| **Qwen3: Advanced GRPO**      | [▶️ Start for free](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen3_(4B)-GRPO.ipynb)               | 2x faster | 50% less |
| **Gemma 3 (4B) Vision** | [▶️ Start for free](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Gemma3_(4B)-Vision.ipynb)               | 1.7x faster | 60% less |
| **embeddinggemma (300M)**    | [▶️ Start for free](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/EmbeddingGemma_(300M).ipynb)               | 2x faster | 20% less |
| **Mistral Ministral 3 (3B)**      | [▶️ Start for free](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Ministral_3_VL_(3B)_Vision.ipynb)               | 1.5x faster | 60% less |
| **Llama 3.1 (8B) Alpaca**      | [▶️ Start for free](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.1_(8B)-Alpaca.ipynb)               | 2x faster | 70% less |
| **Llama 3.2 Conversational**      | [▶️ Start for free](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(1B_and_3B)-Conversational.ipynb)               | 2x faster | 70% less |
| **Orpheus-TTS (3B)**     | [▶️ Start for free](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Orpheus_(3B)-TTS.ipynb)               | 1.5x faster | 50% less |

- See all our notebooks for: [Kaggle](https://github.com/unslothai/notebooks?tab=readme-ov-file#-kaggle-notebooks), [GRPO](https://unsloth.ai/docs/get-started/unsloth-notebooks#grpo-reasoning-rl-notebooks), [TTS](https://unsloth.ai/docs/get-started/unsloth-notebooks#text-to-speech-tts-notebooks), [embedding](https://unsloth.ai/docs/new/embedding-finetuning) & [Vision](https://unsloth.ai/docs/get-started/unsloth-notebooks#vision-multimodal-notebooks)
- See [all our models](https://unsloth.ai/docs/get-started/unsloth-model-catalog) and [all our notebooks](https://unsloth.ai/docs/get-started/unsloth-notebooks)
- See detailed documentation for Unsloth [here](https://unsloth.ai/docs)

## 🦥 Unsloth News
- **Introducing Unsloth Studio**: our new web UI for running and training LLMs. [Blog](https://unsloth.ai/docs/new/studio)
- **Qwen3.5** - 0.8B, 2B, 4B, 9B, 27B, 35-A3B, 112B-A10B are now supported. [Guide + notebooks](https://unsloth.ai/docs/models/qwen3.5/fine-tune)
- Train **MoE LLMs 12x faster** with 35% less VRAM - DeepSeek, GLM, Qwen and gpt-oss. [Blog](https://unsloth.ai/docs/new/faster-moe)
- **Embedding models**: Unsloth now supports ~1.8-3.3x faster embedding fine-tuning. [Blog](https://unsloth.ai/docs/new/embedding-finetuning) • [Notebooks](https://unsloth.ai/docs/get-started/unsloth-notebooks#embedding-models)
- New **7x longer context RL** vs. all other setups, via our new batching algorithms. [Blog](https://unsloth.ai/docs/new/grpo-long-context)
- New RoPE & MLP **Triton Kernels** & **Padding Free + Packing**: 3x faster training & 30% less VRAM. [Blog](https://unsloth.ai/docs/new/3x-faster-training-packing)
- **500K Context**: Training a 20B model with >500K context is now possible on an 80GB GPU. [Blog](https://unsloth.ai/docs/blog/500k-context-length-fine-tuning)
- **FP8 & Vision RL**: You can now do FP8 & VLM GRPO on consumer GPUs. [FP8 Blog](https://unsloth.ai/docs/get-started/reinforcement-learning-rl-guide/fp8-reinforcement-learning) • [Vision RL](https://unsloth.ai/docs/get-started/reinforcement-learning-rl-guide/vision-reinforcement-learning-vlm-rl)
- **gpt-oss** by OpenAI: Read our [RL blog](https://unsloth.ai/docs/models/gpt-oss-how-to-run-and-fine-tune/gpt-oss-reinforcement-learning), [Flex Attention](https://unsloth.ai/docs/models/gpt-oss-how-to-run-and-fine-tune/long-context-gpt-oss-training) blog and [Guide](https://unsloth.ai/docs/models/gpt-oss-how-to-run-and-fine-tune).

## 🔗 Links and Resources
| Type                                                                                                                                      | Links                                                                          |
| ----------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------ |
| <img width="15" src="https://redditinc.com/hs-fs/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" />  **r/unsloth Reddit**                       | [Join Reddit community](https://reddit.com/r/unsloth)                          |
| 📚 **Documentation & Wiki**                                                                                                               | [Read Our Docs](https://unsloth.ai/docs)                                       |
| <img width="13" src="https://upload.wikimedia.org/wikipedia/commons/0/09/X_(formerly_Twitter)_logo_late_2025.svg" />  **Twitter (aka X)** | [Follow us on X](https://twitter.com/unslothai)                                |
| 💾 **Installation**                                                                                                                       | [Pip & Docker Install](https://unsloth.ai/docs/get-started/install) |
| 🔮 **Our Models**                                                                                                                         | [Unsloth Catalog](https://unsloth.ai/docs/get-started/unsloth-model-catalog)   |
| ✍️ **Blog**                                                                                                                               | [Read our Blogs](https://unsloth.ai/blog)                                      |

### Citation

You can cite the Unsloth repo as follows:
```bibtex
@software{unsloth,
  author = {Daniel Han, Michael Han and Unsloth team},
  title = {Unsloth},
  url = {https://github.com/unslothai/unsloth},
  year = {2023}
}
```
If you trained a model with 🦥Unsloth, you can use this cool sticker!   <img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/made with unsloth.png" width="200" align="center" />

### License
Unsloth uses a dual-licensing model of Apache 2.0 and AGPL-3.0. The core Unsloth package remains licensed under **[Apache 2.0](https://github.com/unslothai/unsloth?tab=Apache-2.0-1-ov-file)**, while certain optional components, such as the Unsloth Studio UI are licensed under the open-source license **[AGPL-3.0](https://github.com/unslothai/unsloth?tab=AGPL-3.0-2-ov-file)**.

This structure helps support ongoing Unsloth development while keeping the project open source and enabling the broader ecosystem to continue growing.

### Thank You to
- The [llama.cpp library](https://github.com/ggml-org/llama.cpp) that lets users run and save models with Unsloth
- The Hugging Face team and their libraries: [transformers](https://github.com/huggingface/transformers) and [TRL](https://github.com/huggingface/trl)
- The Pytorch and [Torch AO](https://github.com/unslothai/unsloth/pull/3391) team for their contributions
- And of course for every single person who has contributed or has used Unsloth!


================================================
FILE: build.sh
================================================
#!/usr/bin/env bash

set -euo pipefail

# 1. Build frontend (Vite outputs to dist/)
cd studio/frontend

# Clean stale dist to force a full rebuild
rm -rf dist

# Tailwind v4's oxide scanner respects .gitignore in parent directories.
# Python venvs create a .gitignore with "*" (ignore everything), which
# prevents Tailwind from scanning .tsx source files for class names.
# Temporarily hide any such .gitignore during the build, then restore it.
_HIDDEN_GITIGNORES=()
_dir="$(pwd)"
while [ "$_dir" != "/" ]; do
    _dir="$(dirname "$_dir")"
    if [ -f "$_dir/.gitignore" ] && grep -qx '\*' "$_dir/.gitignore" 2>/dev/null; then
        mv "$_dir/.gitignore" "$_dir/.gitignore._twbuild"
        _HIDDEN_GITIGNORES+=("$_dir/.gitignore")
    fi
done

_restore_gitignores() {
    for _gi in "${_HIDDEN_GITIGNORES[@]+"${_HIDDEN_GITIGNORES[@]}"}"; do
        mv "${_gi}._twbuild" "$_gi" 2>/dev/null || true
    done
}
trap _restore_gitignores EXIT

npm install
npm run build       # outputs to studio/frontend/dist/

_restore_gitignores
trap - EXIT

# Validate CSS output -- catch truncated Tailwind builds before packaging
MAX_CSS_SIZE=$(find dist/assets -name '*.css' -exec wc -c {} + 2>/dev/null | sort -n | tail -1 | awk '{print $1}')
if [ -z "$MAX_CSS_SIZE" ]; then
    echo "❌ ERROR: No CSS files were emitted into dist/assets."
    echo "   The frontend build may have failed silently."
    exit 1
fi
if [ "$MAX_CSS_SIZE" -lt 100000 ]; then
    echo "❌ ERROR: Largest CSS file is only $((MAX_CSS_SIZE / 1024))KB (expected >100KB)."
    echo "   Tailwind may not have scanned all source files."
    echo "   Check for .gitignore files blocking the Tailwind oxide scanner."
    exit 1
fi
echo "✅ Frontend CSS validated (${MAX_CSS_SIZE} bytes)"

cd ../..

# 2. Clean old artifacts
rm -rf build dist *.egg-info

# 3. Build wheel
python -m build

# 4. Optionally publish
if [ "${1:-}" = "publish" ]; then
    python -m twine upload dist/*
fi


================================================
FILE: cli.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from unsloth_cli import app

if __name__ == "__main__":
    app()


================================================
FILE: install.ps1
================================================
# Unsloth Studio Installer for Windows PowerShell
# Usage:  irm https://raw.githubusercontent.com/unslothai/unsloth/main/install.ps1 | iex
# Local:  Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass; .\install.ps1

function Install-UnslothStudio {
    $ErrorActionPreference = "Stop"

    $VenvName = "unsloth_studio"
    $PythonVersion = "3.13"

    Write-Host ""
    Write-Host "========================================="
    Write-Host "   Unsloth Studio Installer (Windows)"
    Write-Host "========================================="
    Write-Host ""

    # ── Helper: refresh PATH from registry (preserving current session entries) ──
    function Refresh-SessionPath {
        $machine = [System.Environment]::GetEnvironmentVariable("Path", "Machine")
        $user    = [System.Environment]::GetEnvironmentVariable("Path", "User")
        $env:Path = "$machine;$user;$env:Path"
    }

    # ── Check winget ──
    if (-not (Get-Command winget -ErrorAction SilentlyContinue)) {
        Write-Host "Error: winget is not available." -ForegroundColor Red
        Write-Host "       Install it from https://aka.ms/getwinget" -ForegroundColor Yellow
        Write-Host "       or install Python $PythonVersion and uv manually, then re-run." -ForegroundColor Yellow
        return
    }

    # ── Install Python if no compatible version (3.11-3.13) found ──
    $DetectedPythonVersion = ""
    if (Get-Command python -ErrorAction SilentlyContinue) {
        $pyVer = python --version 2>&1
        if ($pyVer -match "Python (3\.1[1-3])\.\d+") {
            Write-Host "==> Python already installed: $pyVer"
            $DetectedPythonVersion = $Matches[1]
        }
    }
    if (-not $DetectedPythonVersion) {
        Write-Host "==> Installing Python ${PythonVersion}..."
        winget install -e --id Python.Python.3.13 --accept-package-agreements --accept-source-agreements
        Refresh-SessionPath
        if ($LASTEXITCODE -ne 0) {
            # winget returns non-zero for "already installed" -- only fail if python is truly missing
            if (-not (Get-Command python -ErrorAction SilentlyContinue)) {
                Write-Host "[ERROR] Python installation failed (exit code $LASTEXITCODE)" -ForegroundColor Red
                return
            }
        }
        $DetectedPythonVersion = $PythonVersion
    }

    # ── Install uv if not present ──
    if (-not (Get-Command uv -ErrorAction SilentlyContinue)) {
        Write-Host "==> Installing uv package manager..."
        winget install --id=astral-sh.uv -e --accept-package-agreements --accept-source-agreements
        Refresh-SessionPath
        # Fallback: if winget didn't put uv on PATH, try the PowerShell installer
        if (-not (Get-Command uv -ErrorAction SilentlyContinue)) {
            Write-Host "    Trying alternative uv installer..."
            powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
            Refresh-SessionPath
        }
    }

    if (-not (Get-Command uv -ErrorAction SilentlyContinue)) {
        Write-Host "Error: uv could not be installed." -ForegroundColor Red
        Write-Host "       Install it from https://docs.astral.sh/uv/" -ForegroundColor Yellow
        return
    }

    # ── Create venv (skip if it already exists and has a valid interpreter) ──
    $VenvPython = Join-Path $VenvName "Scripts\python.exe"
    if (-not (Test-Path $VenvPython)) {
        if (Test-Path $VenvName) { Remove-Item -Recurse -Force $VenvName }
        Write-Host "==> Creating Python ${DetectedPythonVersion} virtual environment (${VenvName})..."
        uv venv $VenvName --python $DetectedPythonVersion
        if ($LASTEXITCODE -ne 0) {
            Write-Host "[ERROR] Failed to create virtual environment (exit code $LASTEXITCODE)" -ForegroundColor Red
            return
        }
    } else {
        Write-Host "==> Virtual environment ${VenvName} already exists, skipping creation."
    }

    # ── Install unsloth directly into the venv (no activation needed) ──
    Write-Host "==> Installing unsloth (this may take a few minutes)..."
    uv pip install --python $VenvPython unsloth --torch-backend=auto
    if ($LASTEXITCODE -ne 0) {
        Write-Host "[ERROR] Failed to install unsloth (exit code $LASTEXITCODE)" -ForegroundColor Red
        return
    }

    # ── Run studio setup ──
    # setup.ps1 will handle installing Git, CMake, Visual Studio Build Tools,
    # CUDA Toolkit, Node.js, and other dependencies automatically via winget.
    Write-Host "==> Running unsloth studio setup..."
    $UnslothExe = Join-Path $VenvName "Scripts\unsloth.exe"
    & $UnslothExe studio setup
    if ($LASTEXITCODE -ne 0) {
        Write-Host "[ERROR] unsloth studio setup failed (exit code $LASTEXITCODE)" -ForegroundColor Red
        return
    }

    Write-Host ""
    Write-Host "========================================="
    Write-Host "   Unsloth Studio installed!"
    Write-Host "========================================="
    Write-Host ""
    Write-Host "  To launch, run:"
    Write-Host ""
    Write-Host "    .\${VenvName}\Scripts\activate"
    Write-Host "    unsloth studio -H 0.0.0.0 -p 8888"
    Write-Host ""
}

Install-UnslothStudio


================================================
FILE: install.sh
================================================
#!/bin/sh
# Unsloth Studio Installer
# Usage (curl): curl -fsSL https://raw.githubusercontent.com/unslothai/unsloth/main/install.sh | sh
# Usage (wget): wget -qO- https://raw.githubusercontent.com/unslothai/unsloth/main/install.sh | sh
set -e

VENV_NAME="unsloth_studio"
PYTHON_VERSION="3.13"

# ── Helper: download a URL to a file (supports curl and wget) ──
download() {
    if command -v curl >/dev/null 2>&1; then
        curl -LsSf "$1" -o "$2"
    elif command -v wget >/dev/null 2>&1; then
        wget -qO "$2" "$1"
    else
        echo "Error: neither curl nor wget found. Install one and re-run."
        exit 1
    fi
}

# ── Helper: check if a single package is available on the system ──
_is_pkg_installed() {
    case "$1" in
        build-essential) command -v gcc >/dev/null 2>&1 ;;
        libcurl4-openssl-dev)
            command -v dpkg >/dev/null 2>&1 && dpkg -s "$1" >/dev/null 2>&1 ;;
        pciutils)
            command -v lspci >/dev/null 2>&1 ;;
        *) command -v "$1" >/dev/null 2>&1 ;;
    esac
}

# ── Helper: install packages via apt, escalating to sudo only if needed ──
# Usage: _smart_apt_install pkg1 pkg2 pkg3 ...
_smart_apt_install() {
    _PKGS="$*"

    # Step 1: Try installing without sudo (works when already root)
    apt-get update -y </dev/null >/dev/null 2>&1 || true
    apt-get install -y $_PKGS </dev/null >/dev/null 2>&1 || true

    # Step 2: Check which packages are still missing
    _STILL_MISSING=""
    for _pkg in $_PKGS; do
        if ! _is_pkg_installed "$_pkg"; then
            _STILL_MISSING="$_STILL_MISSING $_pkg"
        fi
    done
    _STILL_MISSING=$(echo "$_STILL_MISSING" | sed 's/^ *//')

    if [ -z "$_STILL_MISSING" ]; then
        return 0
    fi

    # Step 3: Escalate -- need elevated permissions for remaining packages
    if command -v sudo >/dev/null 2>&1; then
        echo ""
        echo "    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
        echo "    WARNING: We require sudo elevated permissions to install:"
        echo "    $_STILL_MISSING"
        echo "    If you accept, we'll run sudo now, and it'll prompt your password."
        echo "    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
        echo ""
        printf "    Accept? [Y/n] "
        if [ -r /dev/tty ]; then
            read -r REPLY </dev/tty || REPLY="y"
        else
            REPLY="y"
        fi
        case "$REPLY" in
            [nN]*)
                echo ""
                echo "    Please install these packages first, then re-run Unsloth Studio setup:"
                echo "    sudo apt-get update -y && sudo apt-get install -y $_STILL_MISSING"
                exit 1
                ;;
            *)
                sudo apt-get update -y </dev/null
                sudo apt-get install -y $_STILL_MISSING </dev/null
                ;;
        esac
    else
        echo ""
        echo "    sudo is not available on this system."
        echo "    Please install these packages as root, then re-run Unsloth Studio setup:"
        echo "    apt-get update -y && apt-get install -y $_STILL_MISSING"
        exit 1
    fi
}

echo ""
echo "========================================="
echo "   Unsloth Studio Installer"
echo "========================================="
echo ""

# ── Detect platform ──
OS="linux"
if [ "$(uname)" = "Darwin" ]; then
    OS="macos"
elif grep -qi microsoft /proc/version 2>/dev/null; then
    OS="wsl"
fi
echo "==> Platform: $OS"

# ── Check system dependencies ──
# cmake and git are needed by unsloth studio setup to build the GGUF inference
# engine (llama.cpp). build-essential and libcurl-dev are also needed on Linux.
MISSING=""

command -v cmake >/dev/null 2>&1 || MISSING="$MISSING cmake"
command -v git   >/dev/null 2>&1 || MISSING="$MISSING git"

case "$OS" in
    macos)
        # Xcode Command Line Tools provide the C/C++ compiler
        if ! xcode-select -p >/dev/null 2>&1; then
            echo ""
            echo "==> Xcode Command Line Tools are required."
            echo "    Installing (a system dialog will appear)..."
            xcode-select --install </dev/null 2>/dev/null || true
            echo "    After the installation completes, please re-run this script."
            exit 1
        fi
        ;;
    linux|wsl)
        # curl or wget is needed for downloads; check both
        if ! command -v curl >/dev/null 2>&1 && ! command -v wget >/dev/null 2>&1; then
            MISSING="$MISSING curl"
        fi
        command -v gcc  >/dev/null 2>&1 || MISSING="$MISSING build-essential"
        # libcurl dev headers for llama.cpp HTTPS support
        if command -v dpkg >/dev/null 2>&1; then
            dpkg -s libcurl4-openssl-dev >/dev/null 2>&1 || MISSING="$MISSING libcurl4-openssl-dev"
        fi
        ;;
esac

MISSING=$(echo "$MISSING" | sed 's/^ *//')

if [ -n "$MISSING" ]; then
    echo ""
    echo "==> Unsloth Studio needs these packages: $MISSING"
    echo "    These are needed to build the GGUF inference engine."

    case "$OS" in
        macos)
            if ! command -v brew >/dev/null 2>&1; then
                echo ""
                echo "    Homebrew is required to install them."
                echo "    Install Homebrew from https://brew.sh then re-run this script."
                exit 1
            fi
            brew install $MISSING </dev/null
            ;;
        linux|wsl)
            if command -v apt-get >/dev/null 2>&1; then
                _smart_apt_install $MISSING
            else
                echo "    apt-get is not available. Please install with your package manager:"
                echo "    $MISSING"
                echo "    Then re-run Unsloth Studio setup."
                exit 1
            fi
            ;;
    esac
    echo ""
else
    echo "==> All system dependencies found."
fi

# ── Install uv ──
if ! command -v uv >/dev/null 2>&1; then
    echo "==> Installing uv package manager..."
    _uv_tmp=$(mktemp)
    download "https://astral.sh/uv/install.sh" "$_uv_tmp"
    sh "$_uv_tmp" </dev/null
    rm -f "$_uv_tmp"
    if [ -f "$HOME/.local/bin/env" ]; then
        . "$HOME/.local/bin/env"
    fi
    export PATH="$HOME/.local/bin:$PATH"
fi

# ── Create venv (skip if it already exists and has a valid interpreter) ──
if [ ! -x "$VENV_NAME/bin/python" ]; then
    [ -e "$VENV_NAME" ] && rm -rf "$VENV_NAME"
    echo "==> Creating Python ${PYTHON_VERSION} virtual environment (${VENV_NAME})..."
    uv venv "$VENV_NAME" --python "$PYTHON_VERSION"
else
    echo "==> Virtual environment ${VENV_NAME} already exists, skipping creation."
fi

# ── Install unsloth directly into the venv (no activation needed) ──
echo "==> Installing unsloth (this may take a few minutes)..."
uv pip install --python "$VENV_NAME/bin/python" unsloth --torch-backend=auto

# ── Run studio setup ──
# Ensure the venv's Python is on PATH for setup.sh's Python discovery.
# On macOS the system Python may be outside the 3.11-3.13 range that
# setup.sh requires, but uv already installed a compatible interpreter
# inside the venv.
VENV_ABS_BIN="$(cd "$VENV_NAME/bin" && pwd)"
if [ -n "$VENV_ABS_BIN" ]; then
    export PATH="$VENV_ABS_BIN:$PATH"
fi

echo "==> Running unsloth studio setup..."
"$VENV_NAME/bin/unsloth" studio setup </dev/null

echo ""
echo "========================================="
echo "   Unsloth Studio installed!"
echo "========================================="
echo ""
echo "  To launch, run:"
echo ""
echo "    source ${VENV_NAME}/bin/activate"
echo "    unsloth studio -H 0.0.0.0 -p 8888"
echo ""


================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["setuptools==80.9.0", "setuptools-scm==9.2.0"]
build-backend = "setuptools.build_meta"

[project]
name = "unsloth"
dynamic = ["version"]
description = "2-5X faster training, reinforcement learning & finetuning"
readme = "README.md"
requires-python = ">=3.9,<3.15"
license = "Apache-2.0"
keywords = ["ai", "llm", "reinforcement learning", "machine learning", "artificial intelligence", "pytorch"]
authors = [
    {email = "info@unsloth.ai"},
    {name = "Unsloth AI team"},
]
maintainers = [
    {name = "Daniel Han", email = "daniel@unsloth.ai"},
    {name = "Michael Han", email = "info@unsloth.ai"},
]
classifiers = [
    "Programming Language :: Python",
    "Environment :: GPU",
    "Environment :: GPU :: NVIDIA CUDA",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
]
dependencies = [
    "typer",
    "pydantic",
    "pyyaml",
    "nest-asyncio",
]

[project.scripts]
unsloth = "unsloth_cli:app"

[tool.setuptools.dynamic]
version = {attr = "unsloth.models._utils.__version__"}

[tool.setuptools]
include-package-data = true

[tool.setuptools.package-data]
studio = [
    "*.sh",
    "*.ps1",
    "*.bat",
    "frontend/dist/**/*",
    "frontend/public/**/*",
    "frontend/src/**/*",
    "frontend/*.json",
    "frontend/*.ts",
    "frontend/*.js",
    "frontend/*.lock",
    "frontend/*.html",
    "frontend/*.yaml",
    "frontend/.git*",
    "backend/requirements/**/*",
    "backend/core/data_recipe/oxc-validator/*.json",
    "backend/core/data_recipe/oxc-validator/*.mjs",
]

[tool.setuptools.packages.find]
exclude = ["images*", "tests*", "kernels/moe*"]

[project.optional-dependencies]
triton = [
    "triton>=3.0.0 ; ('linux' in sys_platform)",
    "triton-windows ; (sys_platform == 'win32') and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
]

huggingfacenotorch = [
    "wheel>=0.42.0",
    "packaging",
    "numpy",
    "tqdm",
    "psutil",
    "tyro",
    "protobuf",
    "sentencepiece>=0.2.0",
    "datasets>=3.4.1,!=4.0.*,!=4.1.0,<4.4.0",
    "accelerate>=0.34.1",
    "peft>=0.18.0,!=0.11.0",
    "huggingface_hub>=0.34.0",
    "hf_transfer",
    "diffusers",
    "transformers>=4.51.3,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,!=4.57.0,!=4.57.4,!=4.57.5,!=5.0.0,!=5.1.0,<=5.3.0",
    "trl>=0.18.2,!=0.19.0,<=0.24.0",
    "sentence-transformers",
]
huggingface = [
    "unsloth[huggingfacenotorch]",
    "unsloth_zoo>=2026.3.4",
    "torchvision",
    "unsloth[triton]",
]
windows = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0 ; (sys_platform == 'win32')",
    "xformers>=0.0.22.post7 ; (sys_platform == 'win32')",
]
base = [
    "unsloth[huggingface]",
]
cu118only = [
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.22.post7%2Bcu118-cp39-cp39-manylinux2014_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.22.post7%2Bcu118-cp310-cp310-manylinux2014_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.22.post7%2Bcu118-cp311-cp311-manylinux2014_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
]
cu121only = [
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.22.post7-cp39-cp39-manylinux2014_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.22.post7-cp310-cp310-manylinux2014_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.22.post7-cp311-cp311-manylinux2014_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
]
cu118onlytorch211 = [
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.23%2Bcu118-cp39-cp39-manylinux2014_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.23%2Bcu118-cp310-cp310-manylinux2014_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.23%2Bcu118-cp311-cp311-manylinux2014_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
]
cu121onlytorch211 = [
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.23-cp39-cp39-manylinux2014_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.23-cp310-cp310-manylinux2014_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.23-cp311-cp311-manylinux2014_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
]
cu118onlytorch212 = [
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.23.post1%2Bcu118-cp39-cp39-manylinux2014_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.23.post1%2Bcu118-cp310-cp310-manylinux2014_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.23.post1%2Bcu118-cp311-cp311-manylinux2014_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
]
cu121onlytorch212 = [
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.23.post1-cp39-cp39-manylinux2014_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.23.post1-cp310-cp310-manylinux2014_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.23.post1-cp311-cp311-manylinux2014_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
]
cu118onlytorch220 = [
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.24%2Bcu118-cp39-cp39-manylinux2014_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.24%2Bcu118-cp310-cp310-manylinux2014_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.24%2Bcu118-cp311-cp311-manylinux2014_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
]
cu121onlytorch220 = [
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.24-cp39-cp39-manylinux2014_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.24-cp310-cp310-manylinux2014_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.24-cp311-cp311-manylinux2014_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
]
cu118onlytorch230 = [
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.27%2Bcu118-cp39-cp39-manylinux2014_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.27%2Bcu118-cp310-cp310-manylinux2014_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.27%2Bcu118-cp311-cp311-manylinux2014_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.27%2Bcu118-cp312-cp312-manylinux2014_x86_64.whl ; python_version=='3.12' and ('linux' in sys_platform)",
]
cu121onlytorch230 = [
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.27-cp39-cp39-manylinux2014_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.27-cp310-cp310-manylinux2014_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.27-cp311-cp311-manylinux2014_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.27-cp312-cp312-manylinux2014_x86_64.whl ; python_version=='3.12' and ('linux' in sys_platform)",
]
cu118onlytorch240 = [
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.27.post2%2Bcu118-cp39-cp39-manylinux2014_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.27.post2%2Bcu118-cp310-cp310-manylinux2014_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.27.post2%2Bcu118-cp311-cp311-manylinux2014_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.27.post2%2Bcu118-cp312-cp312-manylinux2014_x86_64.whl ; python_version=='3.12' and ('linux' in sys_platform)",
]
cu121onlytorch240 = [
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.28.post1-cp39-cp39-manylinux_2_28_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.28.post1-cp310-cp310-manylinux_2_28_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.28.post1-cp311-cp311-manylinux_2_28_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.28.post1-cp312-cp312-manylinux_2_28_x86_64.whl ; python_version=='3.12' and ('linux' in sys_platform)",
]
cu124onlytorch240 = [
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.28.post1-cp39-cp39-manylinux_2_28_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.28.post1-cp310-cp310-manylinux_2_28_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.28.post1-cp311-cp311-manylinux_2_28_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.28.post1-cp312-cp312-manylinux_2_28_x86_64.whl ; python_version=='3.12' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.28.post1-cp39-cp39-win_amd64.whl ; python_version=='3.9' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.28.post1-cp310-cp310-win_amd64.whl ; python_version=='3.10' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.28.post1-cp311-cp311-win_amd64.whl ; python_version=='3.11' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.28.post1-cp312-cp312-win_amd64.whl ; python_version=='3.12' and (sys_platform == 'win32')",
]
cu118onlytorch250 = [
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.28.post2-cp39-cp39-manylinux_2_28_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.28.post2-cp310-cp310-manylinux_2_28_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.28.post2-cp311-cp311-manylinux_2_28_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.28.post2-cp312-cp312-manylinux_2_28_x86_64.whl ; python_version=='3.12' and ('linux' in sys_platform)",
]
cu121onlytorch250 = [
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.28.post2-cp39-cp39-manylinux_2_28_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.28.post2-cp310-cp310-manylinux_2_28_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.28.post2-cp311-cp311-manylinux_2_28_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.28.post2-cp312-cp312-manylinux_2_28_x86_64.whl ; python_version=='3.12' and ('linux' in sys_platform)",
]
cu124onlytorch250 = [
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.28.post2-cp39-cp39-manylinux_2_28_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.28.post2-cp310-cp310-manylinux_2_28_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.28.post2-cp311-cp311-manylinux_2_28_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.28.post2-cp312-cp312-manylinux_2_28_x86_64.whl ; python_version=='3.12' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.28.post2-cp39-cp39-win_amd64.whl ; python_version=='3.9' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.28.post2-cp310-cp310-win_amd64.whl ; python_version=='3.10' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.28.post2-cp311-cp311-win_amd64.whl ; python_version=='3.11' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.28.post2-cp312-cp312-win_amd64.whl ; python_version=='3.12' and (sys_platform == 'win32')",
]
cu118onlytorch251 = [
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.29.post1-cp39-cp39-manylinux_2_28_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.29.post1-cp310-cp310-manylinux_2_28_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.29.post1-cp312-cp312-manylinux_2_28_x86_64.whl ; python_version=='3.12' and ('linux' in sys_platform)",
]
cu121onlytorch251 = [
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.29.post1-cp39-cp39-manylinux_2_28_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.29.post1-cp310-cp310-manylinux_2_28_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu121/xformers-0.0.29.post1-cp312-cp312-manylinux_2_28_x86_64.whl ; python_version=='3.12' and ('linux' in sys_platform)",
]
cu124onlytorch251 = [
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.29.post1-cp39-cp39-manylinux_2_28_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.29.post1-cp310-cp310-manylinux_2_28_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.29.post1-cp311-cp311-manylinux_2_28_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.29.post1-cp312-cp312-manylinux_2_28_x86_64.whl ; python_version=='3.12' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.29.post1-cp39-cp39-win_amd64.whl ; python_version=='3.9' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.29.post1-cp310-cp310-win_amd64.whl ; python_version=='3.10' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.29.post1-cp311-cp311-win_amd64.whl ; python_version=='3.11' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.29.post1-cp312-cp312-win_amd64.whl ; python_version=='3.12' and (sys_platform == 'win32')",
]
cu118onlytorch260 = [
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.29.post3-cp39-cp39-manylinux_2_28_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.29.post3-cp310-cp310-manylinux_2_28_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.29.post3-cp311-cp311-manylinux_2_28_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.29.post3-cp312-cp312-manylinux_2_28_x86_64.whl ; python_version=='3.12' and ('linux' in sys_platform)",
]
cu124onlytorch260 = [
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.29.post3-cp39-cp39-manylinux_2_28_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.29.post3-cp310-cp310-manylinux_2_28_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.29.post3-cp311-cp311-manylinux_2_28_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.29.post3-cp312-cp312-manylinux_2_28_x86_64.whl ; python_version=='3.12' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.29.post3-cp39-cp39-win_amd64.whl ; python_version=='3.9' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.29.post3-cp310-cp310-win_amd64.whl ; python_version=='3.10' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.29.post3-cp311-cp311-win_amd64.whl ; python_version=='3.11' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu124/xformers-0.0.29.post3-cp312-cp312-win_amd64.whl ; python_version=='3.12' and (sys_platform == 'win32')",
]
cu126onlytorch260 = [
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.29.post3-cp39-cp39-manylinux_2_28_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.29.post3-cp310-cp310-manylinux_2_28_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.29.post3-cp311-cp311-manylinux_2_28_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.29.post3-cp312-cp312-manylinux_2_28_x86_64.whl ; python_version=='3.12' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.29.post3-cp39-cp39-win_amd64.whl ; python_version=='3.9' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.29.post3-cp310-cp310-win_amd64.whl ; python_version=='3.10' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.29.post3-cp311-cp311-win_amd64.whl ; python_version=='3.11' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.29.post3-cp312-cp312-win_amd64.whl ; python_version=='3.12' and (sys_platform == 'win32')",
]
cu118onlytorch270 = [
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp39-cp39-manylinux_2_28_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp310-cp310-manylinux_2_28_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp311-cp311-manylinux_2_28_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp312-cp312-manylinux_2_28_x86_64.whl ; python_version=='3.12' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp39-cp39-win_amd64.whl ; python_version=='3.9' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp310-cp310-win_amd64.whl ; python_version=='3.10' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp311-cp311-win_amd64.whl ; python_version=='3.11' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.30-cp312-cp312-win_amd64.whl ; python_version=='3.12' and (sys_platform == 'win32')",
]
cu126onlytorch270 = [
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.30-cp39-cp39-manylinux_2_28_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.30-cp310-cp310-manylinux_2_28_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.30-cp311-cp311-manylinux_2_28_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.30-cp312-cp312-manylinux_2_28_x86_64.whl ; python_version=='3.12' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.30-cp39-cp39-win_amd64.whl ; python_version=='3.9' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.30-cp310-cp310-win_amd64.whl ; python_version=='3.10' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.30-cp311-cp311-win_amd64.whl ; python_version=='3.11' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.30-cp312-cp312-win_amd64.whl ; python_version=='3.12' and (sys_platform == 'win32')",
]
cu128onlytorch270 = [
    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.30-cp39-cp39-manylinux_2_28_x86_64.whl ; python_version=='3.9' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.30-cp310-cp310-manylinux_2_28_x86_64.whl ; python_version=='3.10' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.30-cp311-cp311-manylinux_2_28_x86_64.whl ; python_version=='3.11' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.30-cp312-cp312-manylinux_2_28_x86_64.whl ; python_version=='3.12' and ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.30-cp39-cp39-win_amd64.whl ; python_version=='3.9' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.30-cp310-cp310-win_amd64.whl ; python_version=='3.10' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.30-cp311-cp311-win_amd64.whl ; python_version=='3.11' and (sys_platform == 'win32')",
    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.30-cp312-cp312-win_amd64.whl ; python_version=='3.12' and (sys_platform == 'win32')",
]
cu118onlytorch271 = [
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.31.post1-cp39-abi3-manylinux_2_28_x86_64.whl ; ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu118/xformers-0.0.31.post1-cp39-abi3-win_amd64.whl ; (sys_platform == 'win32')",
]
cu126onlytorch271 = [
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.31.post1-cp39-abi3-manylinux_2_28_x86_64.whl ; ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.31.post1-cp39-abi3-win_amd64.whl ; (sys_platform == 'win32')",
]
cu128onlytorch271 = [
    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.31.post1-cp39-abi3-manylinux_2_28_x86_64.whl ; ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.31.post1-cp39-abi3-win_amd64.whl ; (sys_platform == 'win32')",
]
cu118onlytorch280 = [
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.32.post2-cp39-abi3-manylinux_2_28_x86_64.whl ; ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.32.post2-cp39-abi3-win_amd64.whl ; (sys_platform == 'win32')",
]
cu126onlytorch280 = [
    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.32.post2-cp39-abi3-manylinux_2_28_x86_64.whl ; ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.32.post2-cp39-abi3-win_amd64.whl ; (sys_platform == 'win32')",
]
cu128onlytorch280 = [
    "xformers @ https://download.pytorch.org/whl/cu129/xformers-0.0.32.post2-cp39-abi3-manylinux_2_28_x86_64.whl ; ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu129/xformers-0.0.32.post2-cp39-abi3-win_amd64.whl ; (sys_platform == 'win32')",
]
cu130onlytorch280 = [
]
cu126onlytorch290 = [
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.33.post1-cp39-abi3-manylinux_2_28_x86_64.whl ; ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.33.post1-cp39-abi3-win_amd64.whl ; (sys_platform == 'win32')",
]
cu128onlytorch290 = [
    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.33.post1-cp39-abi3-manylinux_2_28_x86_64.whl ; ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.33.post1-cp39-abi3-win_amd64.whl ; (sys_platform == 'win32')",
]
cu130onlytorch290 = [
    "xformers @ https://download.pytorch.org/whl/cu130/xformers-0.0.33.post1-cp39-abi3-manylinux_2_28_x86_64.whl ; ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu130/xformers-0.0.33.post1-cp39-abi3-win_amd64.whl ; (sys_platform == 'win32')",
]
cu126onlytorch291 = [
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.33.post2-cp39-abi3-manylinux_2_28_x86_64.whl ; ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.33.post2-cp39-abi3-win_amd64.whl ; (sys_platform == 'win32')",
]
cu128onlytorch291 = [
    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.33.post2-cp39-abi3-manylinux_2_28_x86_64.whl ; ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.33.post2-cp39-abi3-win_amd64.whl ; (sys_platform == 'win32')",
]
cu130onlytorch291 = [
    "xformers @ https://download.pytorch.org/whl/cu130/xformers-0.0.33.post2-cp39-abi3-manylinux_2_28_x86_64.whl ; ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu130/xformers-0.0.33.post2-cp39-abi3-win_amd64.whl ; (sys_platform == 'win32')",
]
cu126onlytorch2100 = [
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.34-cp39-abi3-manylinux_2_28_x86_64.whl ; ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu126/xformers-0.0.34-cp39-abi3-win_amd64.whl ; (sys_platform == 'win32')",
]
cu128onlytorch2100 = [
    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.34-cp39-abi3-manylinux_2_28_x86_64.whl ; ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu128/xformers-0.0.34-cp39-abi3-win_amd64.whl ; (sys_platform == 'win32')",
]
cu130onlytorch2100 = [
    "xformers @ https://download.pytorch.org/whl/cu130/xformers-0.0.34-cp39-abi3-manylinux_2_28_x86_64.whl ; ('linux' in sys_platform)",
    "xformers @ https://download.pytorch.org/whl/cu130/xformers-0.0.34-cp39-abi3-win_amd64.whl ; (sys_platform == 'win32')",
]
cu118 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118only]",
]
cu121 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu121only]",
]
cu118-torch211 = [
    "unsloth[huggingface]",
    "bitsandbytes==0.45.5",
    "unsloth[cu118onlytorch211]",
]
cu121-torch211 = [
    "unsloth[huggingface]",
    "bitsandbytes==0.45.5",
    "unsloth[cu121onlytorch211]",
]
cu118-torch212 = [
    "unsloth[huggingface]",
    "bitsandbytes==0.45.5",
    "unsloth[cu118onlytorch212]",
]
cu121-torch212 = [
    "unsloth[huggingface]",
    "bitsandbytes==0.45.5",
    "unsloth[cu121onlytorch212]",
]
cu118-torch220 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118onlytorch220]",
]
cu121-torch220 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu121onlytorch220]",
]
cu118-torch230 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118onlytorch230]",
]
cu121-torch230 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu121onlytorch230]",
]
cu118-torch240 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118onlytorch240]",
]
cu121-torch240 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu121onlytorch240]",
]
cu124-torch240 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu124onlytorch240]",
]
cu118-torch250 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118onlytorch250]",
]
cu121-torch250 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu121onlytorch250]",
]
cu124-torch250 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu124onlytorch250]",
]
cu118-torch251 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118onlytorch251]",
]
cu121-torch251 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu121onlytorch251]",
]
cu124-torch251 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu124onlytorch251]",
]
cu118-torch260 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118onlytorch260]",
]
cu124-torch260 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu124onlytorch260]",
]
cu126-torch260 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu126onlytorch260]",
]
cu118-torch270 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118onlytorch270]",
]
cu126-torch270 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu126onlytorch270]",
]
cu128-torch270 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu128onlytorch270]",
]
cu118-torch271 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118onlytorch271]",
]
cu126-torch271 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu126onlytorch271]",
]
cu128-torch271 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu128onlytorch271]",
]
cu118-torch280 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118onlytorch280]",
]
cu126-torch280 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu126onlytorch280]",
]
cu128-torch280 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu128onlytorch280]",
]
cu130-torch280 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu130onlytorch280]",
]
cu126-torch290 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu126onlytorch290]",
]
cu128-torch290 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu128onlytorch290]",
]
cu130-torch290 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu130onlytorch290]",
]
cu126-torch291 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu126onlytorch291]",
]
cu128-torch291 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu128onlytorch291]",
]
cu130-torch291 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu130onlytorch291]",
]
cu126-torch2100 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu126onlytorch2100]",
]
cu128-torch2100 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu128onlytorch2100]",
]
cu130-torch2100 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu130onlytorch2100]",
]
kaggle = [
    "unsloth[huggingface]",
]
kaggle-new = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
]
conda = [
    "unsloth[huggingface]",
]
colab-torch211 = [
    "unsloth[huggingface]",
    "bitsandbytes==0.45.5",
    "unsloth[cu121onlytorch211]",
]
colab-ampere-torch211 = [
    "unsloth[huggingface]",
    "bitsandbytes==0.45.5",
    "unsloth[cu121onlytorch211]",
    "packaging",
    "ninja",
    "flash-attn>=2.6.3 ; ('linux' in sys_platform)",
]
colab-torch220 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu121onlytorch220]",
]
colab-ampere-torch220 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu121onlytorch220]",
    "packaging",
    "ninja",
    "flash-attn>=2.6.3 ; ('linux' in sys_platform)",
]
colab-new = [
    "unsloth_zoo>=2026.3.4",
    "packaging",
    "tyro",
    "transformers>=4.51.3,!=4.52.0,!=4.52.1,!=4.52.2,!=4.52.3,!=4.53.0,!=4.54.0,!=4.55.0,!=4.55.1,!=4.57.0,!=4.57.4,!=4.57.5,!=5.0.0,!=5.1.0,<=5.3.0",
    "datasets>=3.4.1,!=4.0.*,!=4.1.0,<4.4.0",
    "sentencepiece>=0.2.0",
    "tqdm",
    "psutil",
    "wheel>=0.42.0",
    "numpy",
    "protobuf",
    "huggingface_hub>=0.34.0",
    "hf_transfer",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[triton]",
    "sentence-transformers",
]
colab-no-deps = [
    "accelerate>=0.34.1",
    "trl>=0.18.2,!=0.19.0,<=0.24.0",
    "peft>=0.18.0",
    "xformers ; ('linux' in sys_platform or sys_platform == 'win32') and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "protobuf",
]
colab = [
    "unsloth[cu121]",
]
flashattention = [
    "packaging ; ('linux' in sys_platform)",
    "ninja ; ('linux' in sys_platform)",
    "flash-attn>=2.6.3 ; ('linux' in sys_platform)",
]
colab-ampere = [
    "unsloth[colab-ampere-torch220]",
    "unsloth[flashattention]",
]
cu118-ampere = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118only]",
    "unsloth[flashattention]",
]
cu121-ampere = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu121only]",
    "unsloth[flashattention]",
]
cu118-ampere-torch211 = [
    "unsloth[huggingface]",
    "bitsandbytes==0.45.5",
    "unsloth[cu118onlytorch211]",
    "unsloth[flashattention]",
]
cu121-ampere-torch211 = [
    "unsloth[huggingface]",
    "bitsandbytes==0.45.5",
    "unsloth[cu121onlytorch211]",
    "unsloth[flashattention]",
]
cu118-ampere-torch220 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118onlytorch220]",
    "unsloth[flashattention]",
]
cu121-ampere-torch220 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu121onlytorch220]",
    "unsloth[flashattention]",
]
cu118-ampere-torch230 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118onlytorch230]",
    "unsloth[flashattention]",
]
cu121-ampere-torch230 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu121onlytorch230]",
    "unsloth[flashattention]",
]
cu118-ampere-torch240 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118onlytorch240]",
    "unsloth[flashattention]",
]
cu121-ampere-torch240 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu121onlytorch240]",
    "unsloth[flashattention]",
]
cu124-ampere-torch240 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu124onlytorch240]",
    "unsloth[flashattention]",
]
cu118-ampere-torch250 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118onlytorch250]",
    "unsloth[flashattention]",
]
cu121-ampere-torch250 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu121onlytorch250]",
    "unsloth[flashattention]",
]
cu124-ampere-torch250 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu124onlytorch250]",
    "unsloth[flashattention]",
]
cu118-ampere-torch251 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118onlytorch251]",
    "unsloth[flashattention]",
]
cu121-ampere-torch251 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu121onlytorch251]",
    "unsloth[flashattention]",
]
cu124-ampere-torch251 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu124onlytorch251]",
    "unsloth[flashattention]",
]
cu118-ampere-torch260 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118onlytorch260]",
    "unsloth[flashattention]",
]
cu124-ampere-torch260 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu124onlytorch260]",
    "unsloth[flashattention]",
]
cu126-ampere-torch260 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu126onlytorch260]",
    "unsloth[flashattention]",
]
cu118-ampere-torch270 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118onlytorch270]",
    "unsloth[flashattention]",
]
cu126-ampere-torch270 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu126onlytorch270]",
    "unsloth[flashattention]",
]
cu128-ampere-torch270 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu128onlytorch270]",
    "unsloth[flashattention]",
]
cu118-ampere-torch271 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118onlytorch271]",
    "unsloth[flashattention]",
]
cu126-ampere-torch271 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu126onlytorch271]",
    "unsloth[flashattention]",
]
cu128-ampere-torch271 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu128onlytorch271]",
    "unsloth[flashattention]",
]
cu118-ampere-torch280 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu118onlytorch280]",
    "unsloth[flashattention]",
]
cu126-ampere-torch280 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu126onlytorch280]",
    "unsloth[flashattention]",
]
cu128-ampere-torch280 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu128onlytorch280]",
    "unsloth[flashattention]",
]
cu130-ampere-torch280 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu130onlytorch280]",
    "unsloth[flashattention]",
]
cu126-ampere-torch290 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu126onlytorch290]",
]
cu128-ampere-torch290 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu128onlytorch290]",
]
cu130-ampere-torch290 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu130onlytorch290]",
]
cu126-ampere-torch291 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu126onlytorch291]",
]
cu128-ampere-torch291 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu128onlytorch291]",
]
cu130-ampere-torch291 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu130onlytorch291]",
]
cu126-ampere-torch2100 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu126onlytorch2100]",
]
cu128-ampere-torch2100 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu128onlytorch2100]",
]
cu130-ampere-torch2100 = [
    "unsloth[huggingface]",
    "bitsandbytes>=0.45.5,!=0.46.0,!=0.48.0",
    "unsloth[cu130onlytorch2100]",
]
flashattentiontorch260abiFALSEcu12x = [
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp39-cp39-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.9'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.10'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.11'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp312-cp312-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.12'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp313-cp313-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.13'",
]
flashattentiontorch260abiTRUEcu12x = [
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiTRUE-cp39-cp39-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.9'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiTRUE-cp310-cp310-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.10'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiTRUE-cp311-cp311-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.11'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiTRUE-cp312-cp312-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.12'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiTRUE-cp313-cp313-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.13'",
]
flashattentiontorch250abiFALSEcu12x = [
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp39-cp39-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.9'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.10'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp311-cp311-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.11'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp312-cp312-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.12'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp313-cp313-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.13'",
]
flashattentiontorch250abiTRUEcu12x = [
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiTRUE-cp39-cp39-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.9'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiTRUE-cp310-cp310-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.10'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiTRUE-cp311-cp311-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.11'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiTRUE-cp312-cp312-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.12'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiTRUE-cp313-cp313-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.13'",
]
flashattentiontorch240abiFALSEcu12x = [
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiFALSE-cp39-cp39-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.9'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.10'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.11'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiFALSE-cp312-cp312-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.12'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiFALSE-cp313-cp313-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.13'",
]
flashattentiontorch240abiTRUEcu12x = [
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiTRUE-cp39-cp39-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.9'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiTRUE-cp310-cp310-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.10'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiTRUE-cp311-cp311-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.11'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiTRUE-cp312-cp312-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.12'",
    "flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiTRUE-cp313-cp313-linux_x86_64.whl ; ('linux' in sys_platform) and python_version == '3.13'",
]
intelgputorch260 = [
    "unsloth_zoo[intelgpu]",
    "unsloth[huggingfacenotorch]",

    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.2.0-cp39-cp39-linux_x86_64.whl#sha256=147607f190a7d7aa24ba454def5977fbbfec792fdae18e4ed278cfec29b69271 ; ('linux' in sys_platform) and python_version == '3.9' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.2.0-cp310-cp310-linux_x86_64.whl#sha256=23aa423fa1542afc34f67eb3ba8ef20060f6d1b3a4697eaeab22b11c92b30f2b ; ('linux' in sys_platform) and python_version == '3.10' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.2.0-cp311-cp311-linux_x86_64.whl#sha256=bcfa995229bbfd9ffd8d6c8d9f6428d393e876fa6e23ee3c20e3c0d73ca75ca5 ; ('linux' in sys_platform) and python_version == '3.11' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.2.0-cp312-cp312-linux_x86_64.whl#sha256=bd340903d03470708df3442438acb8b7e08087ab9e61fbe349b2872bf9257ab0 ; ('linux' in sys_platform) and python_version == '3.12' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.2.0-cp313-cp313-linux_x86_64.whl#sha256=814dccc8a07159e6eca74bed70091bc8fea2d9dd87b0d91845f9f38cde62f01c ; ('linux' in sys_platform) and python_version == '3.13' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",

    "bitsandbytes @ https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-manylinux_2_24_x86_64.whl ; ('linux' in sys_platform) and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "bitsandbytes @ https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-win_amd64.whl ; (sys_platform == 'win32') and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",

    "torch @ https://download.pytorch.org/whl/xpu/torch-2.6.0%2Bxpu-cp39-cp39-linux_x86_64.whl#sha256=6a8adf6dc4c089406e8b3a7e58ab57a463bddf9b07130d2576e76eced43e92af ; ('linux' in sys_platform) and python_version == '3.9' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.6.0%2Bxpu-cp310-cp310-linux_x86_64.whl#sha256=ff4561cbf07c83bbccaa0f6e9bb0e6dcf721bacd53c9c43c4eb0e7331b4792f9 ; ('linux' in sys_platform) and python_version == '3.10' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.6.0%2Bxpu-cp311-cp311-linux_x86_64.whl#sha256=12005f66b810ddd3ab93f86c4522bcfdd412cbd27fc9d189b661ff7509bc5e8a ; ('linux' in sys_platform) and python_version == '3.11' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.6.0%2Bxpu-cp312-cp312-linux_x86_64.whl#sha256=c4c5c67625cdacf35765c2b94e61fe166e3c3f4a14521b1212a59ad1b3eb0f2e ; ('linux' in sys_platform) and python_version == '3.12' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.6.0%2Bxpu-cp313-cp313-linux_x86_64.whl#sha256=e6864f7a60a5ecc43d5d38f59a16e5dd132384f73dfd3a697f74944026038f7b ; ('linux' in sys_platform) and python_version == '3.13' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
]
intel-gpu-torch260 = [
    "unsloth[intelgputorch260]"
]
intelgputorch270 = [
    "unsloth_zoo[intelgpu]",
    "unsloth[huggingfacenotorch]",

    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.3.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl#sha256=749a7098492c6a27b356c97149a4a62973b953eae60bc1b6259260974f344913 ; ('linux' in sys_platform) and python_version == '3.9' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.3.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl#sha256=44362e80abd752471a08341093321955b066daa2cfb4810e73b8e3b240850f93 ; ('linux' in sys_platform) and python_version == '3.10' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl#sha256=faa6b8c945a837a080f641bc8ccc77a98fa66980dcd7e62e715fd853737343fd ; ('linux' in sys_platform) and python_version == '3.11' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl#sha256=40f6fb65b345dc9a61813abe7ac9a585f2c9808f414d140cc2a5f11f53ee063c ; ('linux' in sys_platform) and python_version == '3.12' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl#sha256=b22b4c02ec71b4bfc862ae3cdfd2871dc0b05d2b1802f5db2196e0f897d581e9 ; ('linux' in sys_platform) and python_version == '3.13' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.3.0-cp39-cp39-win_amd64.whl#sha256=d4b738d7fa5100c1bd766f91614962828a4810eb57b4df92cd5214a83505a752 ; sys_platform == 'win32' and python_version == '3.9' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.3.0-cp310-cp310-win_amd64.whl#sha256=143fe8a64d807bcdb7d81bbc062816add325570aa160448454ab6ded4a0a17a1 ; sys_platform == 'win32' and python_version == '3.10' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.3.0-cp311-cp311-win_amd64.whl#sha256=a8025459ff325d6e3532eb5cf72519db1b178155e7d60aff6c56beb5968fc758 ; sys_platform == 'win32' and python_version == '3.11' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.3.0-cp312-cp312-win_amd64.whl#sha256=0dd07e6d5b872e42e48f5ee140e609d4554ca3cc509d5bf509ac232267cf358e ; sys_platform == 'win32' and python_version == '3.12' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.3.0-cp313-cp313-win_amd64.whl#sha256=a936a18182d8e065a9933afc9a3ebbffadd38604969f87c493831214539fc027 ; sys_platform == 'win32' and python_version == '3.13' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",

    "bitsandbytes @ https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-manylinux_2_24_x86_64.whl ; ('linux' in sys_platform) and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "bitsandbytes @ https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-win_amd64.whl ; (sys_platform == 'win32') and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",

    "torch @ https://download.pytorch.org/whl/xpu/torch-2.7.0%2Bxpu-cp39-cp39-linux_x86_64.whl#sha256=f8ee75e50fcbb37ed5b498299ca2264da99ab278a93fae2358e921e4a6e28273 ; ('linux' in sys_platform) and python_version == '3.9' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.7.0%2Bxpu-cp310-cp310-linux_x86_64.whl#sha256=d6fdc342961d98fdcd9d03dfd491a3208bb5f7fbb435841f8f72ce9fdcd2d026 ; ('linux' in sys_platform) and python_version == '3.10' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.7.0%2Bxpu-cp311-cp311-linux_x86_64.whl#sha256=74d07f9357df5cf2bf223ad3c84de16346bfaa0504f988fdd5590d3e177e5e86 ; ('linux' in sys_platform) and python_version == '3.11' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.7.0%2Bxpu-cp312-cp312-linux_x86_64.whl#sha256=c806d44aa2ca5d225629f6fbc6c994d5deaac2d2cde449195bc8e3522ddd219a ; ('linux' in sys_platform) and python_version == '3.12' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.7.0%2Bxpu-cp313-cp313-linux_x86_64.whl#sha256=25d8277b7f01d42e2e014ccbab57a2692b6ec4eff8dcf894eda1b297407cf97a ; ('linux' in sys_platform) and python_version == '3.13' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.7.0%2Bxpu-cp39-cp39-win_amd64.whl#sha256=046e85125266ae69c1a0d083e6c092f947ab4b6b41532c16bafe40dbced845df ; sys_platform == 'win32' and python_version == '3.9' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.7.0%2Bxpu-cp310-cp310-win_amd64.whl#sha256=9ebaeffb82b0b3e39b6030927d3ebe0eb62a0e9045a3b2d7b0a9e7b15222c0db ; sys_platform == 'win32' and python_version == '3.10' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.7.0%2Bxpu-cp311-cp311-win_amd64.whl#sha256=356ba66cee127e7e2c942880bd50e03768306a4ea08d358a0f29c6eebfc4bc81 ; sys_platform == 'win32' and python_version == '3.11' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.7.0%2Bxpu-cp312-cp312-win_amd64.whl#sha256=94739e665d9b4d5cd7af5f517cb6103f6f9fb421c095184609653a24524040f5 ; sys_platform == 'win32' and python_version == '3.12' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.7.0%2Bxpu-cp313-cp313-win_amd64.whl#sha256=31df3cb674918e89bc8c532baa331dc84f4430e1f9c0ec379232db44cba78355 ; sys_platform == 'win32' and python_version == '3.13' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
]
intel-gpu-torch270 = [
    "unsloth[intelgputorch270]"
]
intelgputorch280 = [
    "unsloth_zoo[intelgpu]",
    "unsloth[huggingfacenotorch]",

    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.4.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl#sha256=ac4d8e33986b1c3c5e48151640539272b2187e83016985853111b46fb82c3c94 ; 'linux' in sys_platform and python_version == '3.9' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl#sha256=999fef4c1f711092b9d3086525920545df490de476ecebe899ffc777019ae17f ; 'linux' in sys_platform and python_version == '3.10' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.4.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl#sha256=57b09c8c492985ff6a27cd3a22b08e8f7b96b407bd8030967b6efbb9f63b80cf ; 'linux' in sys_platform and python_version == '3.11' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl#sha256=df4bb3282bac9a3b90231700077110d8680b338416de03c2b7c6133c9b602649 ; 'linux' in sys_platform and python_version == '3.12' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.4.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl#sha256=60da63c99ca827bdcb0df28e0298bf7d066dc607454c6d6176783cb4e79d838b ; 'linux' in sys_platform and python_version == '3.13' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.4.0-cp39-cp39-win_amd64.whl#sha256=64aea8de349f3e2e0ebf4c24b011a8122531fdffda5776edaef45829cc241cf8 ; sys_platform == 'win32' and python_version == '3.9' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.4.0-cp310-cp310-win_amd64.whl#sha256=ae573d255b257fdbed319a3440dc9d0a721e31160ab7f6eba1b2226e6a409a1d ; sys_platform == 'win32' and python_version == '3.10' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.4.0-cp311-cp311-win_amd64.whl#sha256=8e0ea4558e5776d8ddab0264310be9b26aee5641bcac0da023537556d4317b86 ; sys_platform == 'win32' and python_version == '3.11' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.4.0-cp312-cp312-win_amd64.whl#sha256=4090dde07a4fffc34aaf855701a9db28e9fccb57b368ade520f1a0f8e811c878 ; sys_platform == 'win32' and python_version == '3.12' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.4.0-cp313-cp313-win_amd64.whl#sha256=a33d0888f3c8df028a2d028842715837d0049524d6c06b9bb11869890a13601a ; sys_platform == 'win32' and python_version == '3.13' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",

    "torch @ https://download.pytorch.org/whl/xpu/torch-2.8.0%2Bxpu-cp39-cp39-linux_x86_64.whl ; 'linux' in sys_platform and python_version == '3.9' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.8.0%2Bxpu-cp310-cp310-linux_x86_64.whl ; 'linux' in sys_platform and python_version == '3.10' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.8.0%2Bxpu-cp311-cp311-linux_x86_64.whl ; 'linux' in sys_platform and python_version == '3.11' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.8.0%2Bxpu-cp312-cp312-linux_x86_64.whl ; 'linux' in sys_platform and python_version == '3.12' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.8.0%2Bxpu-cp313-cp313-linux_x86_64.whl ; 'linux' in sys_platform and python_version == '3.13' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.8.0%2Bxpu-cp39-cp39-win_amd64.whl#sha256=f2f401276892428e4875cf1d8717c5cbab704b16fc594ccf23795e7b16549a99 ; sys_platform == 'win32' and python_version == '3.9' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.8.0%2Bxpu-cp310-cp310-win_amd64.whl#sha256=125c60cd59d51b39581a7e9afcd4679bc3a6b8c1f9440b1bb502a23fdd60571e ; sys_platform == 'win32' and python_version == '3.10' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.8.0%2Bxpu-cp311-cp311-win_amd64.whl#sha256=47f1a57258cd460e80b38b2ed6744e31587ab77a96b4215bf59546cb4bab5cc0 ; sys_platform == 'win32' and python_version == '3.11' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.8.0%2Bxpu-cp312-cp312-win_amd64.whl#sha256=0937d8943c145a83d9bafc6f80ef28971167817f9eda26066d33f72caf8a6646 ; sys_platform == 'win32' and python_version == '3.12' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.8.0%2Bxpu-cp313-cp313-win_amd64.whl#sha256=e034aab1d71760dc80a731531be43673ffe15e99033b82d24e40d2e6d41bd8bf ; sys_platform == 'win32' and python_version == '3.13' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",

    "bitsandbytes @ https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-manylinux_2_24_x86_64.whl ; ('linux' in sys_platform) and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "bitsandbytes @ https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-win_amd64.whl ; (sys_platform == 'win32') and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",

    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.23.0%2Bxpu-cp39-cp39-manylinux_2_28_x86_64.whl#sha256=6e981c192045fc249c008441179ff237bb00174d818b875b0475730b63f0eaca ; 'linux' in sys_platform and python_version == '3.9' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.23.0%2Bxpu-cp310-cp310-manylinux_2_28_x86_64.whl#sha256=e5ba4805969277175ebfd59cc717093528cc6e3ada89ac2725fc7a3c1fee6169 ; 'linux' in sys_platform and python_version == '3.10' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.23.0%2Bxpu-cp311-cp311-manylinux_2_28_x86_64.whl#sha256=74c39c144104416bc4c5ad8c26ab0c169dc5cc6be58059e01bc3665dd0ef676f ; 'linux' in sys_platform and python_version == '3.11' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.23.0%2Bxpu-cp312-cp312-manylinux_2_28_x86_64.whl#sha256=0acec355b80c3899841184084f365df336c508602812e34a44007b8b60d53af4 ; 'linux' in sys_platform and python_version == '3.12' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.23.0%2Bxpu-cp313-cp313-manylinux_2_28_x86_64.whl#sha256=e2109ae773dad27b98ca17681044b4f876563c37f2382b75de3a371399edcff8 ; 'linux' in sys_platform and python_version == '3.13' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.23.0%2Bxpu-cp39-cp39-win_amd64.whl#sha256=5f7904e7048d414379bc8c1167260f1e84204f105db2d0a2f9c89e87ce1cf205 ; sys_platform == 'win32' and python_version == '3.9' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.23.0%2Bxpu-cp310-cp310-win_amd64.whl#sha256=005fca5e658ca8e37adb63c1a021c84f5e56dfa6cf0d601d89cfe40b9473f79f ; sys_platform == 'win32' and python_version == '3.10' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.23.0%2Bxpu-cp311-cp311-win_amd64.whl#sha256=c6d030f5361461550c0ff1339b5bca8585fc1e84fda2e64b6184e65a581e4f98 ; sys_platform == 'win32' and python_version == '3.11' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.23.0%2Bxpu-cp312-cp312-win_amd64.whl#sha256=91aafd61864cdce27461cbec13ddbf28c1bc6494265a1e4b80131c64a3b7d18f ; sys_platform == 'win32' and python_version == '3.12' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.23.0%2Bxpu-cp313-cp313-win_amd64.whl#sha256=71dc4a6421742ed1e7f585b04a100ad53615c341fbccfbc255aefb38ea9091da ; sys_platform == 'win32' and python_version == '3.13' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
]
intel-gpu-torch280 = [
    "unsloth[intelgputorch280]"
]
intelgputorch290 = [
    "unsloth_zoo[intelgpu]",
    "unsloth[huggingfacenotorch]",

    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.5.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl#sha256=c169a1de14c19673b17c751290d467fa282fc90fa5da4314b2e5cdab1f553146 ; platform_system == 'Linux' and python_version == '3.10' and platform_machine == 'x86_64'",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.5.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl#sha256=013d9dd5d6479bd22983161f462e61c8dbe1d82e6730624a7a8d5945507eaa61 ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl#sha256=afc8cabfbf7ed51fd278d1e0f88d6afc157b0201bad4b99d681e4d542f9e66d4 ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.5.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl#sha256=0d24c1716088f2764d0d24c64227732195b6a42706c3c5fc89eeb4904bfa0818 ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.5.0-cp310-cp310-win_amd64.whl#sha256=c83ab007311d9cfb6e809ee5a4587d99a9eef4be720b90da4f1aaa68b45139a0 ; sys_platform == 'win32' and python_version == '3.10' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.5.0-cp311-cp311-win_amd64.whl#sha256=debf75348da8e8c7166b4d4a9b91d1508bb8d6581e339f79f7604b2e6746bacd ; sys_platform == 'win32' and python_version == '3.11' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.5.0-cp312-cp312-win_amd64.whl#sha256=97337a47425f1963a723475bd61037460e84ba01db4f87a1d662c3718ff6c47e ; sys_platform == 'win32' and python_version == '3.12' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.5.0-cp313-cp313-win_amd64.whl#sha256=2caf8138695f6abb023ecd02031a2611ba1bf8fff2f19802567cb2fadefe9e87 ; sys_platform == 'win32' and python_version == '3.13' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",

    "torch @ https://download.pytorch.org/whl/xpu/torch-2.9.0%2Bxpu-cp310-cp310-linux_x86_64.whl#sha256=5afbe860ce991825a36b75706a523601087e414b77598ef0d9d3d565741c277d ; platform_system == 'Linux' and python_version == '3.10' and platform_machine == 'x86_64'",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.9.0%2Bxpu-cp311-cp311-linux_x86_64.whl#sha256=607fe419c32d6e8e0556f745742e7cff1d0babce51f54be890e0c1422359c442 ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.9.0%2Bxpu-cp312-cp312-linux_x86_64.whl#sha256=376bae584d89980b8e59934d248c38d5fa3b7d4687a4df1a19f4bc1d23dcc8c1 ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.9.0%2Bxpu-cp313-cp313-linux_x86_64.whl#sha256=98d6a06dd7fb185874367b18bd609f05f16fdce4142a5980ca94461949965cd2 ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.9.0%2Bxpu-cp310-cp310-win_amd64.whl#sha256=47cc68f631f65bd9c84924d052cd04dec7531023caa85e80345e9c94611c887d ; sys_platform == 'win32' and python_version == '3.10' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.9.0%2Bxpu-cp311-cp311-win_amd64.whl#sha256=d56c44ab4818aba57e5c7b628f422d014e0d507427170a771c5be85e308b0bc6 ; sys_platform == 'win32' and python_version == '3.11' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.9.0%2Bxpu-cp312-cp312-win_amd64.whl#sha256=18cad93aaff76a01ce73aef6935ece7cfc03344b905592ec731446c44d44592b ; sys_platform == 'win32' and python_version == '3.12' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.9.0%2Bxpu-cp313-cp313-win_amd64.whl#sha256=579929cdc10a76800ead41289cac191ea36d1b16f5f501d3fc25607d4375cd83 ; sys_platform == 'win32' and python_version == '3.13' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",

    "bitsandbytes @ https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-manylinux_2_24_x86_64.whl ; ('linux' in sys_platform) and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "bitsandbytes @ https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-win_amd64.whl ; (sys_platform == 'win32') and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",

    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.24.0%2Bxpu-cp310-cp310-manylinux_2_28_x86_64.whl#sha256=cbfae2b79b7549fd368c2462fc8e94f8f26cc450782ee72138e908077c09a519 ; platform_system == 'Linux' and python_version == '3.10' and platform_machine == 'x86_64'",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.24.0%2Bxpu-cp311-cp311-manylinux_2_28_x86_64.whl#sha256=044fa36ef4b6b43edcd490b75c853fa4b3eb033c2bded29f8fbcf27734713c67 ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.24.0%2Bxpu-cp312-cp312-manylinux_2_28_x86_64.whl#sha256=4b91e4bec1d740a6211f02578a79888550b73f3a4e1383035f8f6d72f587212c ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.24.0%2Bxpu-cp313-cp313-manylinux_2_28_x86_64.whl#sha256=88239e73ca37254bec84f29cd5887e10ff712de7edbbda3fbb3609cd6190d99e ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.24.0%2Bxpu-cp310-cp310-win_amd64.whl#sha256=19c7da8ca767d593e13a88a12bb08d06e34a673f6f26c2f9c191d60e81c02953 ; sys_platform == 'win32' and python_version == '3.10' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.24.0%2Bxpu-cp311-cp311-win_amd64.whl#sha256=9bb0d1421c544ac8e2eca5b47daacaf54706dc9139c003aa5e77ee5f355c5931 ; sys_platform == 'win32' and python_version == '3.11' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.24.0%2Bxpu-cp312-cp312-win_amd64.whl#sha256=6a5194bc736089606342d48a3f6822829b167617e9495d91d753dd1bd46fda18 ; sys_platform == 'win32' and python_version == '3.12' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.24.0%2Bxpu-cp313-cp313-win_amd64.whl#sha256=da47a3ce2bb7f0301a31124668b5908f9b9e92d6241443de15a310ef9632fd83 ; sys_platform == 'win32' and python_version == '3.13' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
]
intel-gpu-torch290 = [
    "unsloth[intelgputorch290]"
]
intelgputorch210 = [
    "unsloth_zoo[intelgpu]",
    "unsloth[huggingfacenotorch]",

    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.5.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl#sha256=c169a1de14c19673b17c751290d467fa282fc90fa5da4314b2e5cdab1f553146 ; platform_system == 'Linux' and python_version == '3.10' and platform_machine == 'x86_64'",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.5.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl#sha256=013d9dd5d6479bd22983161f462e61c8dbe1d82e6730624a7a8d5945507eaa61 ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl#sha256=afc8cabfbf7ed51fd278d1e0f88d6afc157b0201bad4b99d681e4d542f9e66d4 ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.5.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl#sha256=0d24c1716088f2764d0d24c64227732195b6a42706c3c5fc89eeb4904bfa0818 ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.5.0-cp310-cp310-win_amd64.whl#sha256=c83ab007311d9cfb6e809ee5a4587d99a9eef4be720b90da4f1aaa68b45139a0 ; sys_platform == 'win32' and python_version == '3.10' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.5.0-cp311-cp311-win_amd64.whl#sha256=debf75348da8e8c7166b4d4a9b91d1508bb8d6581e339f79f7604b2e6746bacd ; sys_platform == 'win32' and python_version == '3.11' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.5.0-cp312-cp312-win_amd64.whl#sha256=97337a47425f1963a723475bd61037460e84ba01db4f87a1d662c3718ff6c47e ; sys_platform == 'win32' and python_version == '3.12' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "pytorch_triton_xpu @ https://download.pytorch.org/whl/pytorch_triton_xpu-3.5.0-cp313-cp313-win_amd64.whl#sha256=2caf8138695f6abb023ecd02031a2611ba1bf8fff2f19802567cb2fadefe9e87 ; sys_platform == 'win32' and python_version == '3.13' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",

    "torch @ https://download.pytorch.org/whl/xpu/torch-2.10.0%2Bxpu-cp310-cp310-linux_x86_64.whl#sha256=abb1d1ec1ac672bac0ff35420c965f2df0c636ef9d94e2a830e34578489d0a57 ; platform_system == 'Linux' and python_version == '3.10' and platform_machine == 'x86_64'",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.10.0%2Bxpu-cp311-cp311-linux_x86_64.whl#sha256=71ad2f82da0f41eaec159f39fc85854e27c2391efa91b373e550648a6f4aaad3 ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.10.0%2Bxpu-cp312-cp312-linux_x86_64.whl#sha256=b473571d478912f92881cc13f15fa18f8463fb0fb8a068c96ed47a7d45a4da0a ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.10.0%2Bxpu-cp313-cp313-linux_x86_64.whl#sha256=3bc64a746ff25a93de140902c60c9e819d7413f5cea1e88d80999c27a5901e9c ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.10.0%2Bxpu-cp310-cp310-win_amd64.whl#sha256=ce50691ab3fb6301d9b7bb8b3834cf5fa7152a2b5f91fd24c5efdc601a25b780 ; sys_platform == 'win32' and python_version == '3.10' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.10.0%2Bxpu-cp311-cp311-win_amd64.whl#sha256=cb9d37f21cb9fb7df67d62863f021c3144e8d8832b9ea8e8523ac308bc620ea1 ; sys_platform == 'win32' and python_version == '3.11' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.10.0%2Bxpu-cp312-cp312-win_amd64.whl#sha256=3ad605be4728b6d3a28a44d07dd794b1a9e45551b0057815bf25eb2a6d6a56a7 ; sys_platform == 'win32' and python_version == '3.12' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torch @ https://download.pytorch.org/whl/xpu/torch-2.10.0%2Bxpu-cp313-cp313-win_amd64.whl#sha256=2b4b56dd6c792aef82006904fa888692e3782e4ae5da27526801bad4898f05a5 ; sys_platform == 'win32' and python_version == '3.13' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",

    "bitsandbytes @ https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-manylinux_2_24_x86_64.whl ; ('linux' in sys_platform) and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "bitsandbytes @ https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-win_amd64.whl ; (sys_platform == 'win32') and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",

    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp310-cp310-manylinux_2_28_x86_64.whl#sha256=7e1e7b170fcf7161c8499b67156c5a05462243626dc0974010791a0bab4378d3 ; platform_system == 'Linux' and python_version == '3.10' and platform_machine == 'x86_64'",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp311-cp311-manylinux_2_28_x86_64.whl#sha256=bd6add201bd7628af70437292e1447abb368e0b5f4ff9abd334ae435efd44792 ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp312-cp312-manylinux_2_28_x86_64.whl#sha256=6ad2543496bc29e59d3dd614a94d09aa9870318aedb66045344fffddfedd2cf8 ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp313-cp313-manylinux_2_28_x86_64.whl#sha256=80269f37865fcd8b57f20e4786efae2200bfa2b2727926c3c7acc82f0e7d3548 ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp310-cp310-win_amd64.whl#sha256=6b9485ba85dcba4d196d6134d9c3332fb228fb2556416bf0450a64e8a472fcba ; sys_platform == 'win32' and python_version == '3.10' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp311-cp311-win_amd64.whl#sha256=36cbaedf10f6412af5c89afd9aeea474e6a56a0050348ada8fabe1ecaf6b879e ; sys_platform == 'win32' and python_version == '3.11' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp312-cp312-win_amd64.whl#sha256=738357d97468d75fe3d510ac37e65130f2787f81d9bbc1518898f7396dc3403f ; sys_platform == 'win32' and python_version == '3.12' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
    "torchvision @ https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp313-cp313-win_amd64.whl#sha256=1c4b44b36a557f7381e3076fb8843366742238648441d607c8d049c6da0f8886 ; sys_platform == 'win32' and python_version == '3.13' and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
]
intel-gpu-torch210 = [
    "unsloth[intelgputorch210]"
]
intel = [
    "unsloth[intelgputorch280]",
]
amd = [
    "unsloth[huggingfacenotorch]",
    "bitsandbytes>=0.49.1 ; ('linux' in sys_platform) and (platform_machine == 'AMD64' or platform_machine == 'x86_64' or platform_machine == 'aarch64')",
    "bitsandbytes>=0.49.1 ; (sys_platform == 'win32') and (platform_machine == 'AMD64' or platform_machine == 'x86_64')",
]
rocm702-torch280 = [
    "unsloth[amd]",

    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.0.2/triton-3.4.0%2Brocm7.0.2.gitf9e5bf54-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.0.2/triton-3.4.0%2Brocm7.0.2.gitf9e5bf54-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.0.2/triton-3.4.0%2Brocm7.0.2.gitf9e5bf54-cp313-cp313-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",

    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.0.2/torch-2.8.0%2Brocm7.0.2.lw.git245bf6ed-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.0.2/torch-2.8.0%2Brocm7.0.2.lw.git245bf6ed-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.0.2/torch-2.8.0%2Brocm7.0.2.lw.git245bf6ed-cp313-cp313-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",

    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.0.2/torchvision-0.23.0%2Brocm7.0.2.git824e8c87-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.0.2/torchvision-0.23.0%2Brocm7.0.2.git824e8c87-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.0.2/torchvision-0.23.0%2Brocm7.0.2.git824e8c87-cp313-cp313-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",
]
rocm72-torch291 = [
    "unsloth[amd]",

    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/triton-3.5.1%2Brocm7.2.0.gita272dfa8-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.10' and platform_machine == 'x86_64'",
    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/triton-3.5.1%2Brocm7.2.0.gita272dfa8-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/triton-3.5.1%2Brocm7.2.0.gita272dfa8-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/triton-3.5.1%2Brocm7.2.0.gita272dfa8-cp313-cp313-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",

    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torch-2.9.1%2Brocm7.2.0.lw.git7e1940d4-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.10' and platform_machine == 'x86_64'",
    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torch-2.9.1%2Brocm7.2.0.lw.git7e1940d4-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torch-2.9.1%2Brocm7.2.0.lw.git7e1940d4-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torch-2.9.1%2Brocm7.2.0.lw.git7e1940d4-cp313-cp313-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",
    "torch @ https://repo.radeon.com/rocm/windows/rocm-rel-7.2/torch-2.9.1%2Brocmsdk20260116-cp312-cp312-win_amd64.whl ; sys_platform == 'win32' and python_version == '3.12'",

    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torchvision-0.24.0%2Brocm7.2.0.gitb919bd0c-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.10' and platform_machine == 'x86_64'",
    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torchvision-0.24.0%2Brocm7.2.0.gitb919bd0c-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torchvision-0.24.0%2Brocm7.2.0.gitb919bd0c-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torchvision-0.24.0%2Brocm7.2.0.gitb919bd0c-cp313-cp313-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",
    "torchvision @ https://repo.radeon.com/rocm/windows/rocm-rel-7.2/torchvision-0.24.1%2Brocmsdk20260116-cp312-cp312-win_amd64.whl ; sys_platform == 'win32' and python_version == '3.12'",
]
rocm711-torch291 = [
    "unsloth[amd]",

    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/triton-3.5.1%2Brocm7.1.1.gita272dfa8-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.10' and platform_machine == 'x86_64'",
    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/triton-3.5.1%2Brocm7.1.1.gita272dfa8-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/triton-3.5.1%2Brocm7.1.1.gita272dfa8-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/triton-3.5.1%2Brocm7.1.1.gita272dfa8-cp313-cp313-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",

    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/torch-2.9.1%2Brocm7.1.1.lw.git351ff442-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.10' and platform_machine == 'x86_64'",
    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/torch-2.9.1%2Brocm7.1.1.lw.git351ff442-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/torch-2.9.1%2Brocm7.1.1.lw.git351ff442-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/torch-2.9.1%2Brocm7.1.1.lw.git351ff442-cp313-cp313-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",

    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/torchvision-0.24.0%2Brocm7.1.1.gitb919bd0c-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.10' and platform_machine == 'x86_64'",
    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/torchvision-0.24.0%2Brocm7.1.1.gitb919bd0c-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/torchvision-0.24.0%2Brocm7.1.1.gitb919bd0c-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/torchvision-0.24.0%2Brocm7.1.1.gitb919bd0c-cp313-cp313-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",
]
rocm72-torch2100 = [
    "unsloth[amd]",

    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/triton-3.6.0%2Brocm7.2.0.gitba5c1517-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.10' and platform_machine == 'x86_64'",
    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/triton-3.6.0%2Brocm7.2.0.gitba5c1517-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/triton-3.6.0%2Brocm7.2.0.gitba5c1517-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/triton-3.6.0%2Brocm7.2.0.gitba5c1517-cp313-cp313-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",

    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torch-2.10.0%2Brocm7.2.0.lw.gitb6ee5fde-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.10' and platform_machine == 'x86_64'",
    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torch-2.10.0%2Brocm7.2.0.lw.gitb6ee5fde-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torch-2.10.0%2Brocm7.2.0.lw.gitb6ee5fde-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torch-2.10.0%2Brocm7.2.0.lw.gitb6ee5fde-cp313-cp313-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",

    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torchvision-0.25.0%2Brocm7.2.0.git82df5f59-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.10' and platform_machine == 'x86_64'",
    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torchvision-0.25.0%2Brocm7.2.0.git82df5f59-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torchvision-0.25.0%2Brocm7.2.0.git82df5f59-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torchvision-0.25.0%2Brocm7.2.0.git82df5f59-cp313-cp313-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",
]
rocm711-torch2100 = [
    "unsloth[amd]",

    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/triton-3.6.0%2Brocm7.1.1.gitba5c1517-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.10' and platform_machine == 'x86_64'",
    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/triton-3.6.0%2Brocm7.1.1.gitba5c1517-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/triton-3.6.0%2Brocm7.1.1.gitba5c1517-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "triton @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/triton-3.6.0%2Brocm7.1.1.gitba5c1517-cp313-cp313-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",

    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/torch-2.10.0%2Brocm7.1.1.lw.gitd9556b05-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.10' and platform_machine == 'x86_64'",
    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/torch-2.10.0%2Brocm7.1.1.lw.gitd9556b05-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/torch-2.10.0%2Brocm7.1.1.lw.gitd9556b05-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "torch @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/torch-2.10.0%2Brocm7.1.1.lw.gitd9556b05-cp313-cp313-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",

    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/torchvision-0.25.0%2Brocm7.1.1.git82df5f59-cp310-cp310-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.10' and platform_machine == 'x86_64'",
    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/torchvision-0.25.0%2Brocm7.1.1.git82df5f59-cp311-cp311-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.11' and platform_machine == 'x86_64'",
    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/torchvision-0.25.0%2Brocm7.1.1.git82df5f59-cp312-cp312-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.12' and platform_machine == 'x86_64'",
    "torchvision @ https://repo.radeon.com/rocm/manylinux/rocm-rel-7.1.1/torchvision-0.25.0%2Brocm7.1.1.git82df5f59-cp313-cp313-linux_x86_64.whl ; platform_system == 'Linux' and python_version == '3.13' and platform_machine == 'x86_64'",
]

[project.urls]
homepage = "https://unsloth.ai"
documentation = "https://unsloth.ai/docs"
repository = "https://github.com/unslothai/unsloth"

[tool.ruff]
target-version = "py311"
force-exclude = true
extend-exclude = [
    "*chat_templates.py",
    "*ollama_template_mappers.py",
    "*_auto_install.py",
    "*mapper.py",
]

[tool.ruff.lint]
select = ["E9", "F63", "F7", "F82"]
ignore = [
    "E402",
    "E722",
    "F403",
    "F405",
    "F811",
    "F821",
    "F841",
    "F401",
    "E731",
    "E741",
    "F601",
    "E712",
]

[tool.ruff.format]


================================================
FILE: scripts/enforce_kwargs_spacing.py
================================================
#!/usr/bin/env python3
"""Ensure keyword arguments use spaces around '=', prune redundant pass statements."""

from __future__ import annotations

import ast
import argparse
import io
import sys
import tokenize
from collections import defaultdict
from pathlib import Path


def enforce_spacing(text: str) -> tuple[str, bool]:
    """Return updated text with keyword '=' padded by spaces, plus change flag."""
    lines = text.splitlines(keepends=True)
    if not lines:
        return text, False

    offsets: dict[int, int] = defaultdict(int)
    changed = False

    reader = io.StringIO(text).readline
    for token in tokenize.generate_tokens(reader):
        if token.type != tokenize.OP or token.string != "=":
            continue

        line_index = token.start[0] - 1
        col = token.start[1] + offsets[line_index]

        if line_index < 0 or line_index >= len(lines):
            continue

        line = lines[line_index]
        if col >= len(line) or line[col] != "=":
            continue

        line_changed = False

        # Insert a space before '=' when missing and not preceded by whitespace.
        if col > 0 and line[col - 1] not in {" ", "\t"}:
            line = f"{line[:col]} {line[col:]}"
            offsets[line_index] += 1
            col += 1
            line_changed = True
            changed = True

        # Insert a space after '=' when missing and not followed by whitespace or newline.
        next_index = col + 1
        if next_index < len(line) and line[next_index] not in {" ", "\t", "\n", "\r"}:
            line = f"{line[:next_index]} {line[next_index:]}"
            offsets[line_index] += 1
            line_changed = True
            changed = True

        if line_changed:
            lines[line_index] = line

    if not changed:
        return text, False

    return "".join(lines), True


def remove_redundant_passes(text: str) -> tuple[str, bool]:
    """Drop pass statements that share a block with other executable code."""

    try:
        tree = ast.parse(text)
    except SyntaxError:
        return text, False

    redundant: list[ast.Pass] = []

    def visit(node: ast.AST) -> None:
        for attr in ("body", "orelse", "finalbody"):
            value = getattr(node, attr, None)
            if not isinstance(value, list) or len(value) <= 1:
                continue
            for stmt in value:
                if isinstance(stmt, ast.Pass):
                    redundant.append(stmt)
            for stmt in value:
                if isinstance(stmt, ast.AST):
                    visit(stmt)
        handlers = getattr(node, "handlers", None)
        if handlers:
            for handler in handlers:
                visit(handler)

    visit(tree)

    if not redundant:
        return text, False

    lines = text.splitlines(keepends=True)
    changed = False

    for node in sorted(
        redundant, key=lambda item: (item.lineno, item.col_offset), reverse=True
    ):
        start = node.lineno - 1
        end = (node.end_lineno or node.lineno) - 1
        if start >= len(lines):
            continue
        changed = True
        if start == end:
            line = lines[start]
            col_start = node.col_offset
            col_end = node.end_col_offset or (col_start + 4)
            segment = line[:col_start] + line[col_end:]
            lines[start] = segment if segment.strip() else ""
            continue

        # Defensive fall-back for unexpected multi-line 'pass'.
        prefix = lines[start][: node.col_offset]
        lines[start] = prefix if prefix.strip() else ""
        for idx in range(start + 1, end):
            lines[idx] = ""
        suffix = lines[end][(node.end_col_offset or 0) :]
        lines[end] = suffix

    # Normalise to ensure lines end with newlines except at EOF.
    result_lines: list[str] = []
    for index, line in enumerate(lines):
        if not line:
            continue
        if index < len(lines) - 1 and not line.endswith("\n"):
            result_lines.append(f"{line}\n")
        else:
            result_lines.append(line)

    return "".join(result_lines), changed


def process_file(path: Path) -> bool:
    try:
        with tokenize.open(path) as handle:
            original = handle.read()
            encoding = handle.encoding
    except (OSError, SyntaxError) as exc:  # SyntaxError from tokenize on invalid python
        print(f"Failed to read {path}: {exc}", file=sys.stderr)
        return False

    updated, changed = enforce_spacing(original)
    updated, removed = remove_redundant_passes(updated)
    if changed or removed:
        path.write_text(updated, encoding=encoding)
        return True
    return False


def main(argv: list[str]) -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("files", nargs="+", help="Python files to fix")
    args = parser.parse_args(argv)

    touched: list[Path] = []
    self_path = Path(__file__).resolve()

    for entry in args.files:
        path = Path(entry)
        # Skip modifying this script to avoid self-edit loops.
        if path.resolve() == self_path:
            continue
        if not path.exists() or path.is_dir():
            continue
        if process_file(path):
            touched.append(path)

    if touched:
        for path in touched:
            print(f"Adjusted kwarg spacing in {path}")
    return 0


if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))


================================================
FILE: scripts/run_ruff_format.py
================================================
#!/usr/bin/env python3
"""Run `ruff format` followed by kwarg spacing enforcement."""

from __future__ import annotations

import subprocess
import sys
from pathlib import Path

HERE = Path(__file__).resolve().parent


def main(argv: list[str]) -> int:
    files = [arg for arg in argv if Path(arg).exists()]
    if not files:
        return 0

    ruff_cmd = [sys.executable, "-m", "ruff", "format", *files]
    ruff_proc = subprocess.run(ruff_cmd)
    if ruff_proc.returncode != 0:
        return ruff_proc.returncode

    spacing_script = HERE / "enforce_kwargs_spacing.py"
    spacing_cmd = [sys.executable, str(spacing_script), *files]
    spacing_proc = subprocess.run(spacing_cmd)
    return spacing_proc.returncode


if __name__ == "__main__":
    raise SystemExit(main(sys.argv[1:]))


================================================
FILE: studio/LICENSE.AGPL-3.0
================================================
                    GNU AFFERO GENERAL PUBLIC LICENSE
                       Version 3, 19 November 2007

 Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.

                            Preamble

  The GNU Affero General Public License is a free, copyleft license for
software and other kinds of works, specifically designed to ensure
cooperation with the community in the case of network server software.

  The licenses for most software and other practical works are designed
to take away your freedom to share and change the works.  By contrast,
our General Public Licenses are intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users.

  When we speak of free software, we are referring to freedom, not
price.  Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.

  Developers that use our General Public Licenses protect your rights
with two steps: (1) assert copyright on the software, and (2) offer
you this License which gives you legal permission to copy, distribute
and/or modify the software.

  A secondary benefit of defending all users' freedom is that
improvements made in alternate versions of the program, if they
receive widespread use, become available for other developers to
incorporate.  Many developers of free software are heartened and
encouraged by the resulting cooperation.  However, in the case of
software used on network servers, this result may fail to come about.
The GNU General Public License permits making a modified version and
letting the public access it on a server without ever releasing its
source code to the public.

  The GNU Affero General Public License is designed specifically to
ensure that, in such cases, the modified source code becomes available
to the community.  It requires the operator of a network server to
provide the source code of the modified version running there to the
users of that server.  Therefore, public use of a modified version, on
a publicly accessible server, gives the public access to the source
code of the modified version.

  An older license, called the Affero General Public License and
published by Affero, was designed to accomplish similar goals.  This is
a different license, not a version of the Affero GPL, but Affero has
released a new version of the Affero GPL which permits relicensing under
this license.

  The precise terms and conditions for copying, distribution and
modification follow.

                       TERMS AND CONDITIONS

  0. Definitions.

  "This License" refers to version 3 of the GNU Affero General Public License.

  "Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.

  "The Program" refers to any copyrightable work licensed under this
License.  Each licensee is addressed as "you".  "Licensees" and
"recipients" may be individuals or organizations.

  To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy.  The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.

  A "covered work" means either the unmodified Program or a work based
on the Program.

  To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy.  Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.

  To "convey" a work means any kind of propagation that enables other
parties to make or receive copies.  Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.

  An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License.  If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.

  1. Source Code.

  The "source code" for a work means the preferred form of the work
for making modifications to it.  "Object code" means any non-source
form of a work.

  A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.

  The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form.  A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.

  The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities.  However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work.  For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.

  The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.

  The Corresponding Source for a work in source code form is that
same work.

  2. Basic Permissions.

  All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met.  This License explicitly affirms your unlimited
permission to run the unmodified Program.  The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work.  This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.

  You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force.  You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright.  Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.

  Conveying under any other circumstances is permitted solely under
the conditions stated below.  Sublicensing is not allowed; section 10
makes it unnecessary.

  3. Protecting Users' Legal Rights From Anti-Circumvention Law.

  No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.

  When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.

  4. Conveying Verbatim Copies.

  You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.

  You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.

  5. Conveying Modified Source Versions.

  You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:

    a) The work must carry prominent notices stating that you modified
    it, and giving a relevant date.

    b) The work must carry prominent notices stating that it is
    released under this License and any conditions added under section
    7.  This requirement modifies the requirement in section 4 to
    "keep intact all notices".

    c) You must license the entire work, as a whole, under this
    License to anyone who comes into possession of a copy.  This
    License will therefore apply, along with any applicable section 7
    additional terms, to the whole of the work, and all its parts,
    regardless of how they are packaged.  This License gives no
    permission to license the work in any other way, but it does not
    invalidate such permission if you have separately received it.

    d) If the work has interactive user interfaces, each must display
    Appropriate Legal Notices; however, if the Program has interactive
    interfaces that do not display Appropriate Legal Notices, your
    work need not make them do so.

  A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit.  Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.

  6. Conveying Non-Source Forms.

  You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:

    a) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by the
    Corresponding Source fixed on a durable physical medium
    customarily used for software interchange.

    b) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by a
    written offer, valid for at least three years and valid for as
    long as you offer spare parts or customer support for that product
    model, to give anyone who possesses the object code either (1) a
    copy of the Corresponding Source for all the software in the
    product that is covered by this License, on a durable physical
    medium customarily used for software interchange, for a price no
    more than your reasonable cost of physically performing this
    conveying of source, or (2) access to copy the
    Corresponding Source from a network server at no charge.

    c) Convey individual copies of the object code with a copy of the
    written offer to provide the Corresponding Source.  This
    alternative is allowed only occasionally and noncommercially, and
    only if you received the object code with such an offer, in accord
    with subsection 6b.

    d) Convey the object code by offering access from a designated
    place (gratis or for a charge), and offer equivalent access to the
    Corresponding Source in the same way through the same place at no
    further charge.  You need not require recipients to copy the
    Corresponding Source along with the object code.  If the place to
    copy the object code is a network server, the Corresponding Source
    may be on a different server (operated by you or a third party)
    that supports equivalent copying facilities, provided you maintain
    clear directions next to the object code saying where to find the
    Corresponding Source.  Regardless of what server hosts the
    Corresponding Source, you remain obligated to ensure that it is
    available for as long as needed to satisfy these requirements.

    e) Convey the object code using peer-to-peer transmission, provided
    you inform other peers where the object code and Corresponding
    Source of the work are being offered to the general public at no
    charge under subsection 6d.

  A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.

  A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling.  In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage.  For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product.  A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.

  "Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source.  The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.

  If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information.  But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).

  The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed.  Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.

  Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.

  7. Additional Terms.

  "Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law.  If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.

  When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it.  (Additional permissions may be written to require their own
removal in certain cases when you modify the work.)  You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.

  Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:

    a) Disclaiming warranty or limiting liability differently from the
    terms of sections 15 and 16 of this License; or

    b) Requiring preservation of specified reasonable legal notices or
    author attributions in that material or in the Appropriate Legal
    Notices displayed by works containing it; or

    c) Prohibiting misrepresentation of the origin of that material, or
    requiring that modified versions of such material be marked in
    reasonable ways as different from the original version; or

    d) Limiting the use for publicity purposes of names of licensors or
    authors of the material; or

    e) Declining to grant rights under trademark law for use of some
    trade names, trademarks, or service marks; or

    f) Requiring indemnification of licensors and authors of that
    material by anyone who conveys the material (or modified versions of
    it) with contractual assumptions of liability to the recipient, for
    any liability that these contractual assumptions directly impose on
    those licensors and authors.

  All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10.  If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term.  If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.

  If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.

  Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.

  8. Termination.

  You may not propagate or modify a covered work except as expressly
provided under this License.  Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).

  However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.

  Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.

  Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License.  If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.

  9. Acceptance Not Required for Having Copies.

  You are not required to accept this License in order to receive or
run a copy of the Program.  Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance.  However,
nothing other than this License grants you permission to propagate or
modify any covered work.  These actions infringe copyright if you do
not accept this License.  Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.

  10. Automatic Licensing of Downstream Recipients.

  Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License.  You are not responsible
for enforcing compliance by third parties with this License.

  An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations.  If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.

  You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License.  For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.

  11. Patents.

  A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based.  The
work thus licensed is called the contributor's "contributor version".

  A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version.  For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.

  Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.

  In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement).  To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.

  If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients.  "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.

  If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.

  A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License.  You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.

  Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.

  12. No Surrender of Others' Freedom.

  If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License.  If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all.  For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.

  13. Remote Network Interaction; Use with the GNU General Public License.

  Notwithstanding any other provision of this License, if you modify the
Program, your modified version must prominently offer all users
interacting with it remotely through a computer network (if your version
supports such interaction) an opportunity to receive the Corresponding
Source of your version by providing access to the Corresponding Source
from a network server at no charge, through some standard or customary
means of facilitating copying of software.  This Corresponding Source
shall include the Corresponding Source for any work covered by version 3
of the GNU General Public License that is incorporated pursuant to the
following paragraph.

  Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU General Public License into a single
combined work, and to convey the resulting work.  The terms of this
License will continue to apply to the part which is the covered work,
but the work with which it is combined will remain governed by version
3 of the GNU General Public License.

  14. Revised Versions of this License.

  The Free Software Foundation may publish revised and/or new versions of
the GNU Affero General Public License from time to time.  Such new versions
will be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.

  Each version is given a distinguishing version number.  If the
Program specifies that a certain numbered version of the GNU Affero General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation.  If the Program does not specify a version number of the
GNU Affero General Public License, you may choose any version ever published
by the Free Software Foundation.

  If the Program specifies that a proxy can decide which future
versions of the GNU Affero General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.

  Later license versions may give you additional or different
permissions.  However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.

  15. Disclaimer of Warranty.

  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.

  16. Limitation of Liability.

  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.

  17. Interpretation of Sections 15 and 16.

  If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.

                     END OF TERMS AND CONDITIONS

            How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.

  To do so, attach the following notices to the program.  It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.

    <one line to give the program's name and a brief idea of what it does.>
    Copyright (C) <year>  <name of author>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.

Also add information on how to contact you by electronic and paper mail.

  If your software can interact with users remotely through a computer
network, you should also make sure that it provides a way for users to
get its source.  For example, if your program is a web application, its
interface could display a "Source" link that leads users to an archive
of the code.  There are many ways you could offer source, and different
solutions will be better for different programs; see section 13 for the
specific requirements.

  You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU AGPL, see
<https://www.gnu.org/licenses/>.


================================================
FILE: studio/Unsloth_Studio_Colab.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "6b87de59",
   "metadata": {},
   "source": [
    "To run this, press \"*Runtime*\" and press \"*Run all*\" on a **free** Tesla T4 Google Colab instance!\n",
    "<div class=\"align-center\">\n",
    "<a href=\"https://unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png\" width=\"115\"></a>\n",
    "<a href=\"https://discord.gg/unsloth\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/Discord button.png\" width=\"145\"></a>\n",
    "<a href=\"https://unsloth.ai/docs/\"><img src=\"https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true\" width=\"125\"></a> Join Discord if you need help + ⭐ <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> ⭐\n",
    "</div>\n",
    "\n",
    "To install Unsloth Studio on your local device, follow [our guide](https://unsloth.ai/docs/new/unsloth-studio/install). Unsloth Studio is licensed [AGPL-3.0](https://github.com/unslothai/unsloth/blob/main/studio/LICENSE.AGPL-3.0).\n",
    "\n",
    "### Unsloth Studio\n",
     "\n",
        "Train and run open models with [**Unsloth Studio**](https://unsloth.ai/docs/new/unsloth-studio/start). Currently, installation may take 30+ mins so use a newer GPU.\n",
        "\n",
        "\n",
        "We are actively working on making Unsloth Studio install on Colab T4 GPUs faster.\n",
        "\n",
        "[Features](https://unsloth.ai/docs/new/unsloth-studio#features) • [Quickstart](https://unsloth.ai/docs/new/unsloth-studio/start) • [Data Recipes](https://unsloth.ai/docs/new/unsloth-studio/data-recipe) • [Studio Chat](https://unsloth.ai/docs/new/unsloth-studio/chat) • [Export](https://unsloth.ai/docs/new/unsloth-studio/export)"
      ]
  },
  {
   "cell_type": "markdown",
   "id": "e4206349",
   "metadata": {},
   "source": [
    "<p align=\"left\"><img src=\"https://github.com/unslothai/unsloth/raw/main/studio/frontend/public/studio%20github%20landscape%20colab%20display.png\" width=\"600\"></p>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "27da2957",
   "metadata": {},
   "source": [
    "### Setup: Clone repo and run setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "27e68f91",
   "metadata": {},
   "outputs": [],
   "source": [
    "!git clone --depth 1 --branch main https://github.com/unslothai/unsloth.git\n",
    "%cd /content/unsloth\n",
    "\n",
    "# Run setup script\n",
    "!chmod +x studio/setup.sh\n",
    "!./studio/setup.sh"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3e1771a9",
   "metadata": {},
   "source": [
    "### Start Unsloth Studio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "277e431e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.insert(0, '/content/unsloth/studio/backend')\n",
    "\n",
    "from colab import start\n",
    "start()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f2b0c6a1",
   "metadata": {},
   "source": [
    "And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord!\n",
    "\n",
    "Some other resources:\n",
    "1. Looking to use Unsloth locally? Read our [Installation Guide](https://unsloth.ai/docs/get-started/install) for details on installing Unsloth on Windows, Docker, AMD, Intel GPUs.\n",
    "2. Learn how to do Reinforcement Learning with our [RL Guide and notebooks](https://unsloth.ai/docs/get-started/reinforcement-learning-rl-guide).\n",
    "3. Read our guides and notebooks for [Text-to-speech (TTS)](https://unsloth.ai/docs/basics/text-to-speech-tts-fine-tuning) and [vision](https://unsloth.ai/docs/basics/vision-fine-tuning) model support.\n",
    "4. Explore our [LLM Tutorials Directory](https://unsloth.ai/docs/models/tutorials-how-to-fine-tune-and-run-llms) to find dedicated guides for each model.\n",
    "5. Need help with Inference? Read our [Inference & Deployment page](https://unsloth.ai/docs/basics/inference-and-deployment) for details on using vLLM, llama.cpp, Ollama etc.\n",
    "\n",
    "<div class=\"align-center\">\n",
    "  <a href=\"https://unsloth.ai\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png\" width=\"115\"></a>\n",
    "  <a href=\"https://discord.gg/unsloth\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/Discord.png\" width=\"145\"></a>\n",
    "  <a href=\"https://unsloth.ai/docs/\"><img src=\"https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true\" width=\"125\"></a>\n",
    "\n",
    "  Join Discord if you need help + ⭐️ <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> ⭐️\n",
    "\n",
    "  <b>This notebook is licensed <a href=\"https://github.com/unslothai/unsloth/blob/main/studio/LICENSE.AGPL-3.0\">AGPL-3.0</a></b>\n",
    "</div>"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "gpuType": "T4",
   "include_colab_link": true,
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}


================================================
FILE: studio/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0


================================================
FILE: studio/backend/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0


================================================
FILE: studio/backend/assets/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0


================================================
FILE: studio/backend/assets/configs/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0


================================================
FILE: studio/backend/assets/configs/full_finetune.yaml
================================================
model: unsloth/Qwen2.5-0.5B

data:
  dataset: tatsu-lab/alpaca
  format_type: auto

training:
  training_type: full
  max_seq_length: 2048
  load_in_4bit: false
  output_dir: outputs
  num_epochs: 1
  learning_rate: 0.0002
  batch_size: 1
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 0
  save_steps: 0
  weight_decay: 0.01
  random_seed: 3407
  packing: false
  train_on_completions: false
  gradient_checkpointing: "unsloth"

lora:
  lora_r: 64
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules: ""
  vision_all_linear: false
  use_rslora: false
  use_loftq: false
  finetune_vision_layers: true
  finetune_language_layers: true
  finetune_attention_modules: true
  finetune_mlp_modules: true

logging:
  enable_wandb: false
  wandb_project: unsloth-training
  enable_tensorboard: false
  tensorboard_dir: runs


================================================
FILE: studio/backend/assets/configs/inference_defaults.json
================================================
{
  "_comment": "Per-model-family inference parameter defaults. Sources: (1) Ollama params blobs, (2) Existing Unsloth Studio YAML configs. Patterns ordered longest-match-first.",
  "families": {
    "qwen3.5": {
      "temperature": 0.7,
      "top_p": 0.8,
      "top_k": 20,
      "min_p": 0.0,
      "repetition_penalty": 1.0,
      "presence_penalty": 1.5
    },
    "qwen3-coder": {
      "temperature": 0.7,
      "top_p": 0.8,
      "top_k": 20,
      "min_p": 0.0,
      "repetition_penalty": 1.0
    },
    "qwen3-next": {
      "temperature": 0.7,
      "top_p": 0.8,
      "top_k": 20,
      "min_p": 0.0,
      "repetition_penalty": 1.0
    },
    "qwen3-vl": {
      "temperature": 0.7,
      "top_p": 0.8,
      "top_k": 20,
      "min_p": 0.0,
      "repetition_penalty": 1.0
    },
    "qwen3": {
      "temperature": 0.6,
      "top_p": 0.95,
      "top_k": 20,
      "min_p": 0.0,
      "repetition_penalty": 1.0
    },
    "qwen2.5-coder": {
      "temperature": 1.5,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.1,
      "repetition_penalty": 1.0
    },
    "qwen2.5-vl": {
      "temperature": 1.5,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.1,
      "repetition_penalty": 1.0
    },
    "qwen2.5-omni": {
      "temperature": 0.7,
      "top_p": 0.8,
      "top_k": 20,
      "min_p": 0.0,
      "repetition_penalty": 1.0
    },
    "qwen2.5-math": {
      "temperature": 0.7,
      "top_p": 0.8,
      "top_k": 20,
      "min_p": 0.0,
      "repetition_penalty": 1.0
    },
    "qwen2.5": {
      "temperature": 0.7,
      "top_p": 0.8,
      "top_k": 20,
      "min_p": 0.0,
      "repetition_penalty": 1.0
    },
    "qwen2-vl": {
      "temperature": 1.5,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.1,
      "repetition_penalty": 1.0
    },
    "qwen2": {
      "temperature": 0.7,
      "top_p": 0.8,
      "top_k": 20,
      "min_p": 0.0,
      "repetition_penalty": 1.0
    },
    "qwq": {
      "temperature": 0.6,
      "top_p": 0.95,
      "top_k": 40,
      "min_p": 0.0,
      "repetition_penalty": 1.0
    },
    "gemma-3n": {
      "temperature": 1.0,
      "top_p": 0.95,
      "top_k": 64,
      "min_p": 0.0,
      "repetition_penalty": 1.0
    },
    "gemma-3": {
      "temperature": 1.0,
      "top_p": 0.95,
      "top_k": 64,
      "min_p": 0.0,
      "repetition_penalty": 1.0
    },
    "medgemma": {
      "temperature": 1.0,
      "top_p": 0.95,
      "top_k": 64,
      "min_p": 0.0,
      "repetition_penalty": 1.0
    },
    "gemma-2": {
      "temperature": 1.0,
      "top_p": 0.95,
      "top_k": 64,
      "min_p": 0.0,
      "repetition_penalty": 1.0
    },
    "llama-4": {
      "temperature": 1.0,
      "top_p": 0.9,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "llama-3.3": {
      "temperature": 1.5,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.1,
      "repetition_penalty": 1.0
    },
    "llama-3.2": {
      "temperature": 1.5,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.1,
      "repetition_penalty": 1.0
    },
    "llama-3.1": {
      "temperature": 1.5,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.1,
      "repetition_penalty": 1.0
    },
    "llama-3": {
      "temperature": 1.5,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.1,
      "repetition_penalty": 1.0
    },
    "phi-4": {
      "temperature": 0.8,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.0,
      "repetition_penalty": 1.0
    },
    "phi-3": {
      "temperature": 0.7,
      "top_p": 0.9,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "mistral-nemo": {
      "temperature": 0.7,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "mistral-small": {
      "temperature": 0.15,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "mistral-large": {
      "temperature": 0.7,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "magistral": {
      "temperature": 0.7,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "ministral": {
      "temperature": 0.15,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "devstral": {
      "temperature": 0.7,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "pixtral": {
      "temperature": 1.5,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.1,
      "repetition_penalty": 1.0
    },
    "deepseek-r1": {
      "temperature": 0.6,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "deepseek-v3": {
      "temperature": 0.6,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "deepseek-ocr": {
      "temperature": 0.0,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "glm-5": {
      "temperature": 1.0,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "glm-4": {
      "temperature": 1.0,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "nemotron": {
      "temperature": 1.0,
      "top_p": 1.0,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "minimax-m2.5": {
      "temperature": 1.0,
      "top_p": 0.95,
      "top_k": 40,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "minimax": {
      "temperature": 1.0,
      "top_p": 0.95,
      "top_k": 40,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "gpt-oss": {
      "temperature": 1.0,
      "top_p": 1.0,
      "top_k": 0,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "granite-4": {
      "temperature": 0.0,
      "top_p": 1.0,
      "top_k": 0,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "kimi-k2": {
      "temperature": 0.6,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "kimi": {
      "temperature": 0.6,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "lfm2": {
      "temperature": 0.1,
      "top_p": 0.1,
      "top_k": 50,
      "min_p": 0.15,
      "repetition_penalty": 1.05
    },
    "smollm": {
      "temperature": 0.7,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "olmo": {
      "temperature": 0.7,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "falcon": {
      "temperature": 0.7,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "ernie": {
      "temperature": 0.7,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "seed": {
      "temperature": 0.7,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "grok": {
      "temperature": 1.0,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    },
    "mimo": {
      "temperature": 0.7,
      "top_p": 0.95,
      "top_k": -1,
      "min_p": 0.01,
      "repetition_penalty": 1.0
    }
  },
  "patterns": [
    "qwen3.5",
    "qwen3-coder", "qwen3-next", "qwen3-vl", "qwen3",
    "qwen2.5-coder", "qwen2.5-vl", "qwen2.5-omni", "qwen2.5-math", "qwen2.5",
    "qwen2-vl", "qwen2",
    "qwq",
    "gemma-3n", "gemma-3", "medgemma", "gemma-2",
    "llama-4", "llama-3.3", "llama-3.2", "llama-3.1", "llama-3",
    "phi-4", "phi-3",
    "mistral-nemo", "mistral-small", "mistral-large", "magistral", "ministral",
    "devstral", "pixtral",
    "deepseek-r1", "deepseek-v3", "deepseek-ocr",
    "glm-5", "glm-4",
    "nemotron",
    "minimax-m2.5", "minimax",
    "gpt-oss", "granite-4",
    "kimi-k2", "kimi",
    "lfm2", "smollm", "olmo", "falcon", "ernie", "seed", "grok", "mimo"
  ]
}


================================================
FILE: studio/backend/assets/configs/lora_text.yaml
================================================
model: unsloth/Qwen2.5-0.5B

data:
  dataset: tatsu-lab/alpaca
  format_type: auto

training:
  training_type: lora
  max_seq_length: 2048
  load_in_4bit: true
  output_dir: outputs
  num_epochs: 1
  learning_rate: 0.0002
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 0
  save_steps: 0
  weight_decay: 0.01
  random_seed: 3407
  packing: false
  train_on_completions: false
  gradient_checkpointing: "unsloth"

lora:
  lora_r: 64
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules: "q_proj,k_proj,v_proj,o_proj,gate_proj,up_proj,down_proj"
  vision_all_linear: false
  use_rslora: false
  use_loftq: false
  finetune_vision_layers: true
  finetune_language_layers: true
  finetune_attention_modules: true
  finetune_mlp_modules: true

logging:
  enable_wandb: false
  wandb_project: unsloth-training
  enable_tensorboard: false
  tensorboard_dir: runs


================================================
FILE: studio/backend/assets/configs/model_defaults/default.yaml
================================================
# Default model training parameters
# Used for models without specific configurations

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 5e-5
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_ratio: 0.1
  max_steps: 30
  save_steps: 30
  weight_decay: 0.01
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false
  finetune_vision_layers: true
  finetune_language_layers: true
  finetune_attention_modules: true
  finetune_mlp_modules: true


logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.7
  top_p: 0.95
  top_k: -1
  min_p: 0.01


================================================
FILE: studio/backend/assets/configs/model_defaults/embedding/unsloth_Qwen3-Embedding-0.6B.yaml
================================================
# Model defaults for unsloth/Qwen3-Embedding-0.6B
# Based on Qwen3_Embedding_(0_6B).py embedding notebook
# Also applies to: unsloth/Qwen3-Embedding-4B

training:
  max_seq_length: 512
  # num_epochs: 2
  num_epochs: 0
  learning_rate: 3e-5
  batch_size: 256
  gradient_accumulation_steps: 1
  warmup_ratio: 0.03
  max_steps: 30
  save_steps: 30
  weight_decay: 0.01
  random_seed: 3407
  packing: false
  train_on_completions: false
  gradient_checkpointing: false
  optim: "adamw_8bit"
  lr_scheduler_type: "constant_with_warmup"

lora:
  lora_r: 32
  lora_alpha: 32
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "embedding-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 50


================================================
FILE: studio/backend/assets/configs/model_defaults/embedding/unsloth_all-MiniLM-L6-v2.yaml
================================================
# Model defaults for unsloth/all-MiniLM-L6-v2
# Based on All_MiniLM_L6_v2.py embedding notebook

training:
  max_seq_length: 512
  # num_epochs: 2
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 256
  gradient_accumulation_steps: 1
  warmup_ratio: 0.03
  max_steps: 30
  save_steps: 30
  weight_decay: 0.01
  random_seed: 3407
  packing: false
  train_on_completions: false
  gradient_checkpointing: false
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 64
  lora_alpha: 128
  lora_dropout: 0.0
  target_modules:
    - "value"
    - "key"
    - "dense"
    - "query"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "embedding-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 50


================================================
FILE: studio/backend/assets/configs/model_defaults/embedding/unsloth_bge-m3.yaml
================================================
# Model defaults for unsloth/bge-m3
# Based on BGE_M3.py embedding notebook

training:
  max_seq_length: 512
  # num_epochs: 2
  num_epochs: 0
  learning_rate: 3e-5
  batch_size: 256
  gradient_accumulation_steps: 1
  warmup_ratio: 0.03
  max_steps: 30
  save_steps: 30
  weight_decay: 0.01
  random_seed: 3407
  packing: false
  train_on_completions: false
  gradient_checkpointing: false
  optim: "adamw_8bit"
  lr_scheduler_type: "constant_with_warmup"

lora:
  lora_r: 32
  lora_alpha: 64
  lora_dropout: 0.0
  target_modules:
    - "key"
    - "query"
    - "dense"
    - "value"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "embedding-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 50


================================================
FILE: studio/backend/assets/configs/model_defaults/embedding/unsloth_embeddinggemma-300m.yaml
================================================
# Model defaults for unsloth/embeddinggemma-300m
# Based on EmbeddingGemma_(300M).py embedding notebook

training:
  max_seq_length: 1024
  # num_epochs: 1
  num_epochs: 0
  learning_rate: 2e-5
  batch_size: 64
  gradient_accumulation_steps: 2
  warmup_ratio: 0.03
  max_steps: 30
  save_steps: 30
  weight_decay: 0.01
  random_seed: 3407
  packing: false
  train_on_completions: false
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 32
  lora_alpha: 64
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "embedding-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 5


================================================
FILE: studio/backend/assets/configs/model_defaults/embedding/unsloth_gte-modernbert-base.yaml
================================================
# Model defaults for unsloth/gte-modernbert-base
# Based on ModernBert.py embedding notebook

training:
  max_seq_length: 512
  # num_epochs: 2
  num_epochs: 0
  learning_rate: 3e-5
  batch_size: 256
  gradient_accumulation_steps: 1
  warmup_ratio: 0.03
  max_steps: 30
  save_steps: 30
  weight_decay: 0.01
  random_seed: 3407
  packing: false
  train_on_completions: false
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "constant_with_warmup"

lora:
  lora_r: 64
  lora_alpha: 128
  lora_dropout: 0.0
  target_modules:
    - "Wi"
    - "Wo"
    - "Wqkv"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "embedding-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 50


================================================
FILE: studio/backend/assets/configs/model_defaults/ernie/unsloth_ERNIE-4.5-21B-A3B-PT.yaml
================================================
# Model defaults for unsloth/ERNIE-4.5-21B-A3B-PT
# Based on ERNIE_4_5_21B_A3B_PT-Conversational.ipynb
# Also applies to: unsloth/ERNIE-4.5-21B-A3B-PT

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 4
  gradient_accumulation_steps: 2
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 8
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/ernie/unsloth_ERNIE-4.5-VL-28B-A3B-PT.yaml
================================================
# Model defaults for unsloth/ERNIE-4.5-VL-28B-A3B-PT
# Based on ERNIE_4_5_VL_28B_A3B_PT_Vision.ipynb
# Also applies to: unsloth/ERNIE-4.5-VL-28B-A3B-PT
# added inference parameters from unsloth notebook

training:
  trust_remote_code: true
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 2
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 8
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false
  finetune_vision_layers: true
  finetune_language_layers: true
  finetune_attention_modules: true
  finetune_mlp_modules: true

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: true
  temperature: 1.5
  min_p: 0.1


================================================
FILE: studio/backend/assets/configs/model_defaults/falcon/tiiuae_Falcon-H1-0.5B-Instruct.yaml
================================================
# Model defaults for tiiuae/Falcon-H1-0.5B-Instruct
# Based on Falcon_H1_(0.5B)-Alpaca.ipynb
# Also applies to: tiiuae/Falcon-H1-0.5B-Instruct, unsloth/Falcon-H1-0.5B-Instruct

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 8
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.01
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: false
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.1
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/gemma/unsloth_codegemma-7b-bnb-4bit.yaml
================================================
# Model defaults for unsloth/codegemma-7b-bnb-4bit
# Based on CodeGemma_(7B)-Conversational.ipynb
# Also applies to: unsloth/codegemma-7b, google/codegemma-7b
# added inference parameters from Ollama

training:
  trust_remote_code: false
  max_seq_length: 4096
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 1
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0
  top_p: 0.9


================================================
FILE: studio/backend/assets/configs/model_defaults/gemma/unsloth_functiongemma-270m-it.yaml
================================================
# Model defaults for unsloth/functiongemma-270m-it
# Based on FunctionGemma_(270M).ipynb
# Also applies to: unsloth/functiongemma-270m-it-unsloth-bnb-4bit, google/functiongemma-270m-it, unsloth/functiongemma-270m-it-unsloth-bnb-4bit
# added inference parameters from unsloth guides

training:
  trust_remote_code: false
  max_seq_length: 4096
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 4
  gradient_accumulation_steps: 2
  warmup_steps: 10
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 128
  lora_alpha: 256
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 1.0
  top_k: 64
  top_p: 0.95
  min_p: 0.0


================================================
FILE: studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-2-27b-bnb-4bit.yaml
================================================
# Model defaults for unsloth/gemma-2-27b-bnb-4bit
# Based on Gemma2_(9B)-Alpaca.ipynb (same defaults for larger models)

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-2-2b.yaml
================================================
# Model defaults for unsloth/gemma-2-2b
# Based on Gemma2_(2B)-Alpaca.ipynb
# Also applies to: unsloth/gemma-2-2b-bnb-4bit, google/gemma-2-2b

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.01
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-3-270m-it.yaml
================================================
# Model defaults for unsloth/gemma-3-270m-it
# Based on Gemma3_(270M).ipynb
# Also applies to: unsloth/gemma-3-270m-it-unsloth-bnb-4bit, google/gemma-3-270m-it, unsloth/gemma-3-270m-it-bnb-4bit
# added inference parameters from unsloth guides

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 5e-5
  batch_size: 4
  gradient_accumulation_steps: 1
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 128
  lora_alpha: 128
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 1.0
  top_k: 64
  top_p: 0.95
  min_p: 0.0


================================================
FILE: studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-3-27b-it.yaml
================================================
# Model defaults for unsloth/gemma-3-27b-it
# Based on Gemma3_(27B)_A100-Conversational.ipynb
# Also applies to: unsloth/gemma-3-27b-it-unsloth-bnb-4bit, google/gemma-3-27b-it, unsloth/gemma-3-27b-it-bnb-4bit
# added inference parameters from unsloth guides

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 8
  lora_alpha: 8
  lora_dropout: 0.0
  target_modules:
    - "all-linear"
  use_rslora: false
  use_loftq: false
  finetune_vision_layers: true
  finetune_language_layers: true
  finetune_attention_modules: true
  finetune_mlp_modules: true

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 1.0
  top_k: 64
  top_p: 0.95
  min_p: 0.0


================================================
FILE: studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-3-4b-it.yaml
================================================
# Model defaults for unsloth/gemma-3-4b-it
# Based on Gemma3_(4B).ipynb
# Also applies to: unsloth/gemma-3-4b-it-unsloth-bnb-4bit, google/gemma-3-4b-it, unsloth/gemma-3-4b-it-bnb-4bit
# added inference parameters from unsloth guides

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 8
  lora_alpha: 8
  lora_dropout: 0.0
  target_modules:
    - "all-linear"
  use_rslora: false
  use_loftq: false
  finetune_vision_layers: true
  finetune_language_layers: true
  finetune_attention_modules: true
  finetune_mlp_modules: true

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 1.0
  top_k: 64
  top_p: 0.95
  min_p: 0.0


================================================
FILE: studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-3-4b-pt.yaml
================================================
# Model defaults for unsloth/gemma-3-4b-pt
# Based on Gemma3_(4B)-Vision.ipynb
# Also applies to: unsloth/gemma-3-4b-pt-unsloth-bnb-4bit, google/gemma-3-4b-pt, unsloth/gemma-3-4b-pt-bnb-4bit
# added inference parameters from unsloth guides

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 2
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 1
  gradient_accumulation_steps: 4
  warmup_ratio: 0.03
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: true
  optim: "adamw_torch_fused"
  lr_scheduler_type: "cosine"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "all-linear"
  use_rslora: false
  use_loftq: false
  finetune_vision_layers: true
  finetune_language_layers: true
  finetune_attention_modules: true
  finetune_mlp_modules: true

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 1.0
  top_k: 64
  top_p: 0.95
  min_p: 0.0


================================================
FILE: studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-3n-E4B-it.yaml
================================================
# Model defaults for unsloth/gemma-3n-E4B-it
# Based on Gemma3N_(4B)-Conversational.ipynb
# Also applies to: unsloth/gemma-3n-E4B-it-unsloth-bnb-4bit, google/gemma-3n-E4B-it, unsloth/gemma-3n-E4B-it-unsloth-bnb-4bit
# added inference parameters from unsloth guides

training:
  trust_remote_code: false
  max_seq_length: 1024
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 1
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 8
  lora_alpha: 8
  lora_dropout: 0.0
  target_modules:
    - "all-linear"
  use_rslora: false
  use_loftq: false
  finetune_vision_layers: true
  finetune_language_layers: true
  finetune_attention_modules: true
  finetune_mlp_modules: true

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

audio_input: true

inference:
  trust_remote_code: false
  temperature: 1.0
  top_k: 64
  top_p: 0.95
  min_p: 0.0


================================================
FILE: studio/backend/assets/configs/model_defaults/gemma/unsloth_gemma-3n-E4B.yaml
================================================
# Model defaults for unsloth/gemma-3n-E4B
# Based on Gemma3N_(4B)-Vision.ipynb
# Also applies to: unsloth/gemma-3n-E4B-unsloth-bnb-4bit, google/gemma-3n-E4B
# added inference parameters from unsloth guides

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 2
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 1
  gradient_accumulation_steps: 4
  warmup_ratio: 0.03
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: true
  optim: "adamw_torch_fused"
  lr_scheduler_type: "cosine"

lora:
  lora_r: 32
  lora_alpha: 32
  lora_dropout: 0.0
  target_modules:
    - "all-linear"
  use_rslora: false
  use_loftq: false
  finetune_vision_layers: true
  finetune_language_layers: true
  finetune_attention_modules: true
  finetune_mlp_modules: true

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

audio_input: true

inference:
  trust_remote_code: false
  temperature: 1.0
  top_k: 64
  top_p: 0.95
  min_p: 0.0


================================================
FILE: studio/backend/assets/configs/model_defaults/gpt-oss/unsloth_gpt-oss-120b.yaml
================================================
# Model defaults for unsloth/gpt-oss-120b
# Based on gpt-oss-(120B)_A100-Fine-tuning.ipynb
# Also applies to: openai/gpt-oss-120b, unsloth/gpt-oss-120b-unsloth-bnb-4bit
# added inference parameters from unsloth guides

training:
  trust_remote_code: false
  max_seq_length: 4096
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 4
  gradient_accumulation_steps: 1
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 32
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 1.0
  top_p: 1.0
  top_k: 0


================================================
FILE: studio/backend/assets/configs/model_defaults/gpt-oss/unsloth_gpt-oss-20b.yaml
================================================
# Model defaults for unsloth/gpt-oss-20b
# Based on gpt-oss-(20B)-Fine-tuning.ipynb
# Also applies to: openai/gpt-oss-20b, unsloth/gpt-oss-20b-unsloth-bnb-4bit, unsloth/gpt-oss-20b-BF16
# added inference parameters from unsloth guides

training:
  trust_remote_code: false
  max_seq_length: 1024
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 1
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 8
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 1.0
  top_p: 1.0
  top_k: 0


================================================
FILE: studio/backend/assets/configs/model_defaults/granite/unsloth_granite-4.0-350m-unsloth-bnb-4bit.yaml
================================================
# Model defaults for unsloth/granite-4.0-350m
# Based on Granite4.0_350M.ipynb
# Also applies to: ibm-granite/granite-4.0-350m, unsloth/granite-4.0-350m-bnb-4bit
# added inference parameters from unsloth guides

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 32
  lora_alpha: 32
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
    - "shared_mlp.input_linear"
    - "shared_mlp.output_linear"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.0
  top_p: 1.0
  top_k: 0


================================================
FILE: studio/backend/assets/configs/model_defaults/granite/unsloth_granite-4.0-h-micro.yaml
================================================
# Model defaults for unsloth/granite-4.0-h-micro
# Based on Granite4.0.ipynb
# Also applies to: ibm-granite/granite-4.0-h-micro, unsloth/granite-4.0-h-micro-bnb-4bit, unsloth/granite-4.0-h-micro-unsloth-bnb-4bit
# added inference parameters from unsloth guides

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 32
  lora_alpha: 32
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
    - "shared_mlp.input_linear"
    - "shared_mlp.output_linear"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.0
  top_p: 1.0
  top_k: 0


================================================
FILE: studio/backend/assets/configs/model_defaults/llama/unsloth_Llama-3.2-11B-Vision-Instruct.yaml
================================================
# Model defaults for unsloth/Llama-3.2-11B-Vision-Instruct
# Based on Llama3.2_(11B)-Vision.ipynb
# Also applies to: unsloth/Llama-3.2-11B-Vision-Instruct-unsloth-bnb-4bit, meta-llama/Llama-3.2-11B-Vision-Instruct, unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit
# added inference parameters from unsloth notebook

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "all-linear"
  use_rslora: false
  use_loftq: false
  finetune_vision_layers: true
  finetune_language_layers: true
  finetune_attention_modules: true
  finetune_mlp_modules: true

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 1.5
  min_p: 0.1


================================================
FILE: studio/backend/assets/configs/model_defaults/llama/unsloth_Llama-3.2-1B-Instruct.yaml
================================================
# Model defaults for unsloth/Llama-3.2-1B-Instruct
# Based on Llama3.2_(1B)-RAFT.ipynb
# Also applies to: unsloth/Llama-3.2-1B-Instruct-unsloth-bnb-4bit, meta-llama/Llama-3.2-1B-Instruct, unsloth/Llama-3.2-1B-Instruct-bnb-4bit, RedHatAI/Llama-3.2-1B-Instruct-FP8, unsloth/Llama-3.2-1B-Instruct-FP8-Block, unsloth/Llama-3.2-1B-Instruct-FP8-Dynamic

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 5
  num_epochs: 0
  learning_rate: 2e-5
  batch_size: 1
  gradient_accumulation_steps: 8
  warmup_steps: 0
  max_steps: 30
  save_steps: 30
  weight_decay: 0.01
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: true
  optim: "adamw_torch"
  lr_scheduler_type: "cosine"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/llama/unsloth_Llama-3.2-3B-Instruct.yaml
================================================
# Model defaults for unsloth/Llama-3.2-3B-Instruct
# Based on Llama3.2_(1B_and_3B)-Conversational.ipynb
# Also applies to: unsloth/Llama-3.2-3B-Instruct-unsloth-bnb-4bit, meta-llama/Llama-3.2-3B-Instruct, unsloth/Llama-3.2-3B-Instruct-bnb-4bit, RedHatAI/Llama-3.2-3B-Instruct-FP8, unsloth/Llama-3.2-3B-Instruct-FP8-Block, unsloth/Llama-3.2-3B-Instruct-FP8-Dynamic
# added inference parameters from unsloth notebook

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 1.5
  min_p: 0.1


================================================
FILE: studio/backend/assets/configs/model_defaults/llama/unsloth_Llama-3.3-70B-Instruct.yaml
================================================
# Model defaults for unsloth/Llama-3.3-70B-Instruct
# Based on Llama3.3_(70B)_A100-Conversational.ipynb
# Also applies to: unsloth/Llama-3.3-70B-Instruct-unsloth-bnb-4bit, meta-llama/Llama-3.3-70B-Instruct, unsloth/Llama-3.3-70B-Instruct-bnb-4bit, RedHatAI/Llama-3.3-70B-Instruct-FP8, unsloth/Llama-3.3-70B-Instruct-FP8-Block, unsloth/Llama-3.3-70B-Instruct-FP8-Dynamic
# added inference parameters from unsloth notebook

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 1.5
  min_p: 0.1


================================================
FILE: studio/backend/assets/configs/model_defaults/llama/unsloth_Meta-Llama-3.1-70B-bnb-4bit.yaml
================================================
# Model defaults for unsloth/Meta-Llama-3.1-70B-bnb-4bit
# Based on Llama3.1_(8B)-Alpaca.ipynb
# Also applies to: unsloth/Meta-Llama-3.1-8B-bnb-4bit, unsloth/Meta-Llama-3.1-8B-unsloth-bnb-4bit, meta-llama/Meta-Llama-3.1-8B, unsloth/Meta-Llama-3.1-8B, unsloth/Meta-Llama-3.1-70B, meta-llama/Meta-Llama-3.1-70B, unsloth/Meta-Llama-3.1-405B-bnb-4bit, meta-llama/Meta-Llama-3.1-405B

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/llama/unsloth_Meta-Llama-3.1-8B-Instruct-bnb-4bit.yaml
================================================
# Model defaults for unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit
# Based on Llama3.1_(8B)-Inference.ipynb
# Also applies to: "unsloth/Meta-Llama-3.1-8B-Instruct-unsloth-bnb-4bit", "meta-llama/Meta-Llama-3.1-8B-Instruct", "unsloth/Meta-Llama-3.1-8B-Instruct","RedHatAI/Llama-3.1-8B-Instruct-FP8","unsloth/Llama-3.1-8B-Instruct-FP8-Block","unsloth/Llama-3.1-8B-Instruct-FP8-Dynamic"

training:
  trust_remote_code: false
  max_seq_length: 8192
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/llama/unsloth_llama-3-8b-Instruct-bnb-4bit.yaml
================================================
# Model defaults for unsloth/llama-3-8b-Instruct-bnb-4bit
# Based on Llama3_(8B)-Conversational.ipynb
# Also applies to: unsloth/llama-3-8b-Instruct, meta-llama/Meta-Llama-3-8B-Instruct

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/llama/unsloth_llama-3-8b-bnb-4bit.yaml
================================================
# Model defaults for unsloth/llama-3-8b-bnb-4bit
# Based on Llama3_(8B)-Alpaca.ipynb
# Also applies to: unsloth/llama-3-8b, meta-llama/Meta-Llama-3-8B

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/llasa/unsloth_Llasa-3B.yaml
================================================
# Model defaults for unsloth/Llasa-3B
# Based on Llasa_TTS_(3B).ipynb and Llasa_TTS_(1B).ipynb
# Also applies to: HKUSTAudio/Llasa-1B
# added inference parameters from unsloth notebook

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 5e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 128
  lora_alpha: 128
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "v_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 1.2
  top_p: 1.2


================================================
FILE: studio/backend/assets/configs/model_defaults/mistral/unsloth_Magistral-Small-2509-unsloth-bnb-4bit.yaml
================================================
# Model defaults for unsloth/Magistral-Small-2509
# Based on Magistral_(24B)-Reasoning-Conversational.ipynb
# Also applies to: mistralai/Magistral-Small-2509, unsloth/Magistral-Small-2509-bnb-4bit
# added inference parameters from unsloth guides

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 2
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 32
  lora_alpha: 32
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false
  finetune_vision_layers: true
  finetune_language_layers: true
  finetune_attention_modules: true
  finetune_mlp_modules: true

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.7
  min_p: 0.01
  top_p: 0.95


================================================
FILE: studio/backend/assets/configs/model_defaults/mistral/unsloth_Ministral-3-3B-Instruct-2512.yaml
================================================
# Model defaults for unsloth/Ministral-3-3B-Instruct-2512
# Based on Ministral_3_VL_(3B)_Vision.ipynb
# Also applies to: unsloth/Ministral-3-3B-Instruct-2512
# added inference parameters from unsloth guides

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 4
  gradient_accumulation_steps: 2
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 32
  lora_alpha: 32
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false
  finetune_vision_layers: true
  finetune_language_layers: true
  finetune_attention_modules: true
  finetune_mlp_modules: true

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.15
  top_p: 0.95


================================================
FILE: studio/backend/assets/configs/model_defaults/mistral/unsloth_Mistral-Nemo-Base-2407-bnb-4bit.yaml
================================================
# Model defaults for unsloth/Mistral-Nemo-Base-2407-bnb-4bit
# Based on Mistral_Nemo_(12B)-Alpaca.ipynb
# Also applies to:  "unsloth/Mistral-Nemo-Base-2407",  "mistralai/Mistral-Nemo-Base-2407", "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit", "unsloth/Mistral-Nemo-Instruct-2407", "mistralai/Mistral-Nemo-Instruct-2407",

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/mistral/unsloth_Mistral-Small-Instruct-2409.yaml
================================================
# Model defaults for unsloth/Mistral-Small-Instruct-2409
# Based on Mistral_Small_(22B)-Alpaca.ipynb 
# Also applies to: unsloth/Mistral-Small-Instruct-2409-bnb-4bit, mistralai/Mistral-Small-Instruct-2409

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 1
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/mistral/unsloth_Pixtral-12B-2409.yaml
================================================
# Model defaults for unsloth/Pixtral-12B-2409
# Based on Pixtral_(12B)-Vision.ipynb
# Also applies to: unsloth/Pixtral-12B-2409-unsloth-bnb-4bit, mistralai/Pixtral-12B-2409, unsloth/Pixtral-12B-2409-bnb-4bit
# added inference parameters from unsloth notebook

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 1
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "paged_adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 8
  lora_alpha: 8
  lora_dropout: 0.0
  target_modules:
    - "all-linear"
  use_rslora: false
  use_loftq: false
  finetune_vision_layers: true
  finetune_language_layers: true
  finetune_attention_modules: false
  finetune_mlp_modules: true

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 1.5
  min_p: 0.1


================================================
FILE: studio/backend/assets/configs/model_defaults/mistral/unsloth_mistral-7b-instruct-v0.3-bnb-4bit.yaml
================================================
# Model defaults for unsloth/mistral-7b-instruct-v0.3-bnb-4bit
# Based on Mistral_v0.3_(7B)-Conversational.ipynb
# Also applies to: unsloth/mistral-7b-instruct-v0.3, mistralai/Mistral-7B-Instruct-v0.3

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/mistral/unsloth_mistral-7b-v0.3-bnb-4bit.yaml
================================================
# Model defaults for unsloth/mistral-7b-v0.3-bnb-4bit
# Based on Mistral_v0.3_(7B)-Alpaca.ipynb
# Also applies to: "unsloth/mistral-7b-v0.3", "mistralai/Mistral-7B-v0.3",
training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/other/OuteAI_Llama-OuteTTS-1.0-1B.yaml
================================================
# Model defaults for OuteAI/Llama-OuteTTS-1.0-1B
# Based on Oute_TTS_(1B).ipynb
# Also applies to: OuteAI/Llama-OuteTTS-1.0-1B
# added inference parameters from unsloth notebook

audio_type: dac

training:
  trust_remote_code: false
  eval_steps: 0
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 128
  lora_alpha: 128
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "v_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.4
  top_k: 40
  top_p: 0.9
  min_p: 0.05


================================================
FILE: studio/backend/assets/configs/model_defaults/other/Spark-TTS-0.5B_LLM.yaml
================================================
# Model defaults for Spark-TTS-0.5B/LLM
# Based on Spark_TTS_(0_5B).ipynb
# Also applies to: Spark-TTS-0.5B/LLM
# added inference parameters from unsloth notebook

audio_type: bicodec

training:
  trust_remote_code: false
  eval_steps: 0
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 128
  lora_alpha: 128
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.8
  top_k: 50
  top_p: 1.0


================================================
FILE: studio/backend/assets/configs/model_defaults/other/sesame_csm-1b.yaml
================================================
# Model defaults for sesame/csm-1b
# Based on Sesame_CSM_(1B)-TTS.ipynb
# Also applies to: sesame/csm-1b

audio_type: csm

training:
  trust_remote_code: false
  eval_steps: 0
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 32
  lora_alpha: 32
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/other/unsloth_GLM-4.7-Flash.yaml
================================================
# Model defaults for unsloth/GLM-4.7-Flash
# Based on GLM_Flash_A100(80GB).py
# Also applies to: unsloth/GLM-4.7-Flash-unsloth-bnb-4bit, unsloth/GLM-4.7-Flash-bnb-4bit, THUDM/GLM-4.7-Flash

training:
  trust_remote_code: true
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 4
  gradient_accumulation_steps: 2
  warmup_steps: 5
  max_steps: 60
  save_steps: 60
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 8
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
    - "out_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: true
  temperature: 0.7
  top_p: 0.8
  top_k: 20


================================================
FILE: studio/backend/assets/configs/model_defaults/other/unsloth_LFM2-1.2B.yaml
================================================
# Model defaults for unsloth/LFM2-1.2B
# Based on Liquid_LFM2_(1.2B)-Conversational.ipynb
# Also applies to: unsloth/LFM2-1.2B
# added inference parameters from unsloth notebook

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "all-linear"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.3
  min_p: 0.15


================================================
FILE: studio/backend/assets/configs/model_defaults/other/unsloth_Nemotron-3-Nano-30B-A3B.yaml
================================================
# Model defaults for unsloth/Nemotron-3-Nano-30B-A3B
# Based on Nemotron-3-Nano-30B-A3B_A100.ipynb
# Also applies to: unsloth/Nemotron-3-Nano-30B-A3B
# added inference parameters from unsloth guides

training:
  trust_remote_code: true
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 4
  gradient_accumulation_steps: 2
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 8
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
    - "in_proj"
    - "out_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: true
  temperature: 1.0
  top_p: 1.0


================================================
FILE: studio/backend/assets/configs/model_defaults/other/unsloth_PaddleOCR-VL.yaml
================================================
# Model defaults for unsloth/PaddleOCR-VL
# Based on Paddle_OCR_(1B)_Vision.ipynb
# Also applies to: unsloth/PaddleOCR-VL
# added inference parameters from unsloth notebook

training:
  trust_remote_code: true
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 5e-5
  batch_size: 4
  gradient_accumulation_steps: 2
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 64
  lora_alpha: 64
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false
  finetune_vision_layers: true
  finetune_language_layers: true
  finetune_attention_modules: true
  finetune_mlp_modules: true

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: true
  temperature: 1.5
  min_p: 0.1


================================================
FILE: studio/backend/assets/configs/model_defaults/other/unsloth_answerdotai_ModernBERT-large.yaml
================================================
# Model defaults for answerdotai/ModernBERT-large
# Based on bert_classification.ipynb

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 1
  num_epochs: 0
  learning_rate: 5e-5
  batch_size: 32
  gradient_accumulation_steps: 1
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/other/unsloth_orpheus-3b-0.1-ft.yaml
================================================
# Model defaults for unsloth/orpheus-3b-0.1-ft
# Based on Orpheus_(3B)-TTS.ipynb
# Also applies to: unsloth/orpheus-3b-0.1-ft-unsloth-bnb-4bit, canopylabs/orpheus-3b-0.1-ft, unsloth/orpheus-3b-0.1-ft-bnb-4bit
# added inference parameters from unsloth notebook

audio_type: snac

training:
  trust_remote_code: false
  eval_steps: 0
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 1
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 64
  lora_alpha: 64
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.6
  top_p: 0.95


================================================
FILE: studio/backend/assets/configs/model_defaults/other/unsloth_tinyllama-bnb-4bit.yaml
================================================
# Model defaults for unsloth/tinyllama
# Based on TinyLlama_(1.1B)-Alpaca.ipynb
# Also applies to: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T

training:
  trust_remote_code: false
  max_seq_length: 4096
  # num_epochs: 1
  num_epochs: 0
  learning_rate: 2e-5
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_ratio: 0.1
  max_steps: 30
  save_steps: 30
  weight_decay: 0.1
  random_seed: 3407
  packing: true
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 32
  lora_alpha: 32
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/other/unsloth_whisper-large-v3.yaml
================================================
# Model defaults for unsloth/whisper-large-v3
# Based on Whisper.ipynb
# Also applies to: unsloth/whisper-large-v3, openai/whisper-large-v3

audio_type: whisper
audio_input: true

training:
  trust_remote_code: false
  eval_steps: 5
  max_seq_length: 448
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 1e-4
  batch_size: 1
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 64
  lora_alpha: 64
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "v_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/phi/unsloth_Phi-3-medium-4k-instruct.yaml
================================================
# Model defaults for unsloth/Phi-3-medium-4k-instruct
# Based on Phi_3_Medium-Conversational.ipynb
# Also applies to: "unsloth/Phi-3-medium-4k-instruct-bnb-4bit", "microsoft/Phi-3-medium-4k-instruct",

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/phi/unsloth_Phi-3.5-mini-instruct.yaml
================================================
# Model defaults for unsloth/Phi-3.5-mini-instruct
# Based on Phi_3.5_Mini-Conversational.ipynb
# Also applies to: "unsloth/Phi-3.5-mini-instruct-bnb-4bit", "microsoft/Phi-3.5-mini-instruct"

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/phi/unsloth_Phi-4.yaml
================================================
# Model defaults for unsloth/Phi-4
# Based on Phi_4-Conversational.ipynb
# Also applies to: unsloth/phi-4-unsloth-bnb-4bit, microsoft/phi-4, unsloth/phi-4-bnb-4bit
# added inference parameters from unsloth guides

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.8
  top_p: 0.95


================================================
FILE: studio/backend/assets/configs/model_defaults/qwen/imdatta0_tiny_qwen3_moe_2.8B_0.7B.yaml
================================================
# Model defaults for imdatta0/tiny_qwen3_moe_2.8B_0.7B
# Based on TinyQwen3_MoE.py
# Dummy model of qwen3moe architecture created to fit in T4
# MoE model - includes gate_up_proj for MoE layers

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 1
  gradient_accumulation_steps: 1
  warmup_steps: 5
  max_steps: 50
  save_steps: 50
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 32
  lora_alpha: 64
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
    - "gate_up_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.6
  top_k: 20
  top_p: 0.95


================================================
FILE: studio/backend/assets/configs/model_defaults/qwen/unsloth_Qwen2-7B.yaml
================================================
# Model defaults for unsloth/Qwen2-7B
# Based on Qwen2_(7B)-Alpaca.ipynb
# Also applies to: unsloth/Qwen2-7B-bnb-4bit, Qwen/Qwen2-7B

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/qwen/unsloth_Qwen2-VL-7B-Instruct.yaml
================================================
# Model defaults for unsloth/Qwen2-VL-7B-Instruct
# Based on Qwen2_VL_(7B)-Vision.ipynb 
# Also applies to: unsloth/Qwen2-VL-7B-Instruct-unsloth-bnb-4bit, Qwen/Qwen2-VL-7B-Instruct, unsloth/Qwen2-VL-7B-Instruct-bnb-4bit
# added inference parameters from unsloth notebook

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "all-linear"
  use_rslora: false
  use_loftq: false
  finetune_vision_layers: true
  finetune_language_layers: true
  finetune_attention_modules: true
  finetune_mlp_modules: true

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 1.5
  min_p: 0.1


================================================
FILE: studio/backend/assets/configs/model_defaults/qwen/unsloth_Qwen2.5-1.5B-Instruct.yaml
================================================
# Model defaults for unsloth/Qwen2.5-1.5B-Instruct
# Based on nemo_gym_sudoku.ipynb
# Also applies to: unsloth/Qwen2.5-1.5B-Instruct-unsloth-bnb-4bit, Qwen/Qwen2.5-1.5B-Instruct, unsloth/Qwen2.5-1.5B-Instruct-bnb-4bit

training:
  trust_remote_code: false
  max_seq_length: 4096
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 1e-5
  batch_size: 1
  gradient_accumulation_steps: 64
  warmup_ratio: 0.1
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 42
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 4
  lora_alpha: 8
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/qwen/unsloth_Qwen2.5-7B.yaml
================================================
# Model defaults for unsloth/Qwen2.5-7B
# Based on Qwen2.5_(7B)-Alpaca.ipynb
# Also applies to: unsloth/Qwen2.5-7B-unsloth-bnb-4bit, Qwen/Qwen2.5-7B, unsloth/Qwen2.5-7B-bnb-4bit

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/qwen/unsloth_Qwen2.5-Coder-1.5B-Instruct.yaml
================================================
# Model defaults for unsloth/Qwen2.5-Coder-1.5B-Instruct
# Based on Qwen2.5_Coder_(1.5B)-Tool_Calling.ipynb
# Also applies to: unsloth/Qwen2.5-Coder-1.5B-Instruct-bnb-4bit, Qwen/Qwen2.5-Coder-1.5B-Instruct

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/qwen/unsloth_Qwen2.5-Coder-14B-Instruct.yaml
================================================
# Model defaults for unsloth/Qwen2.5-Coder-14B-Instruct
# Based on Qwen2.5_Coder_(14B)-Conversational.ipynb
# Also applies to: unsloth/Qwen2.5-Coder-14B-Instruct-bnb-4bit, Qwen/Qwen2.5-Coder-14B-Instruct
# added inference parameters from unsloth notebook

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 1
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "paged_adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 1.5
  min_p: 0.1


================================================
FILE: studio/backend/assets/configs/model_defaults/qwen/unsloth_Qwen2.5-Coder-7B-Instruct-bnb-4bit.yaml
================================================
# Model defaults for unsloth/Qwen2.5-Coder-7B-Instruct-bnb-4bit
# Based on CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.ipynb
# Also applies to: unsloth/Qwen2.5-Coder-7B-Instruct, Qwen/Qwen2.5-Coder-7B-Instruct

training:
  trust_remote_code: false
  max_seq_length: 32768
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false


================================================
FILE: studio/backend/assets/configs/model_defaults/qwen/unsloth_Qwen2.5-VL-7B-Instruct-bnb-4bit.yaml
================================================
# Model defaults for unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit
# Based on Qwen2.5_VL_(7B)-Vision.ipynb
# Also applies to: unsloth/Qwen2.5-VL-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, unsloth/Qwen2.5-VL-7B-Instruct-unsloth-bnb-4bit
# added inference parameters from unsloth notebook

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "all-linear"
  use_rslora: false
  use_loftq: false
  finetune_vision_layers: true
  finetune_language_layers: true
  finetune_attention_modules: true
  finetune_mlp_modules: true

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 1.5
  min_p: 0.1


================================================
FILE: studio/backend/assets/configs/model_defaults/qwen/unsloth_Qwen3-0.6B.yaml
================================================
# Model defaults for unsloth/Qwen3-0.6B
# Based on Qwen3_(0_6B)-Phone_Deployment.ipynb
# Also applies to: unsloth/Qwen3-0.6B-unsloth-bnb-4bit, Qwen/Qwen3-0.6B, unsloth/Qwen3-0.6B-bnb-4bit, Qwen/Qwen3-0.6B-FP8, unsloth/Qwen3-0.6B-FP8
# added inference parameters from Ollama

training:
  trust_remote_code: false
  max_seq_length: 1024
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 5e-5
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.6
  top_k: 20
  top_p: 0.95


================================================
FILE: studio/backend/assets/configs/model_defaults/qwen/unsloth_Qwen3-14B-Base-unsloth-bnb-4bit.yaml
================================================
# Model defaults for unsloth/Qwen3-14B-Base
# Based on Qwen3_(14B)-Alpaca.ipynb
# Also applies to: unsloth/Qwen3-14B-Base, Qwen/Qwen3-14B-Base, unsloth/Qwen3-14B-Base-bnb-4bit
# added inference parameters from Ollama

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.6
  top_k: 20
  top_p: 0.95


================================================
FILE: studio/backend/assets/configs/model_defaults/qwen/unsloth_Qwen3-14B.yaml
================================================
# Model defaults for unsloth/Qwen3-14B
# Based on Qwen3_(14B).ipynb
# Also applies to: unsloth/Qwen3-14B-unsloth-bnb-4bit, Qwen/Qwen3-14B, unsloth/Qwen3-14B-bnb-4bit, Qwen/Qwen3-14B-FP8, unsloth/Qwen3-14B-FP8
# added inference parameters from Ollama

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 32
  lora_alpha: 32
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.6
  top_k: 20
  top_p: 0.95


================================================
FILE: studio/backend/assets/configs/model_defaults/qwen/unsloth_Qwen3-30B-A3B-Instruct-2507.yaml
================================================
# Model defaults for unsloth/Qwen3-30B-A3B-Instruct-2507
# Based on Qwen3_MoE.py
# Also applies to: Qwen/Qwen3-30B-A3B-Instruct-2507, unsloth/Qwen3-30B-A3B-Instruct-2507-bnb-4bit
# MoE model - includes gate_up_proj for MoE layers

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 1
  gradient_accumulation_steps: 1
  warmup_steps: 5
  max_steps: 50
  save_steps: 50
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 32
  lora_alpha: 64
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
    - "gate_up_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.6
  top_k: 20
  top_p: 0.95


================================================
FILE: studio/backend/assets/configs/model_defaults/qwen/unsloth_Qwen3-32B.yaml
================================================
# Model defaults for unsloth/Qwen3-32B
# Based on Qwen3_(32B)_A100-Reasoning-Conversational.ipynb
# Also applies to: unsloth/Qwen3-32B-unsloth-bnb-4bit, Qwen/Qwen3-32B, unsloth/Qwen3-32B-bnb-4bit, Qwen/Qwen3-32B-FP8, unsloth/Qwen3-32B-FP8
# added inference parameters from Ollama

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 32
  lora_alpha: 32
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.6
  top_k: 20
  top_p: 0.95


================================================
FILE: studio/backend/assets/configs/model_defaults/qwen/unsloth_Qwen3-4B-Instruct-2507.yaml
================================================
# Model defaults for unsloth/Qwen3-4B-Instruct-2507
# Based on Qwen3_(4B)-Instruct.ipynb
# Also applies to: unsloth/Qwen3-4B-Instruct-2507-unsloth-bnb-4bit, Qwen/Qwen3-4B-Instruct-2507, unsloth/Qwen3-4B-Instruct-2507-bnb-4bit, Qwen/Qwen3-4B-Instruct-2507-FP8, unsloth/Qwen3-4B-Instruct-2507-FP8
# added inference parameters from unsloth guides

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 32
  lora_alpha: 32
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.7
  top_p: 0.80
  top_k: 20
  min_p: 0.00


================================================
FILE: studio/backend/assets/configs/model_defaults/qwen/unsloth_Qwen3-4B-Thinking-2507.yaml
================================================
# Model defaults for unsloth/Qwen3-4B-Thinking-2507
# Based on Qwen3_(4B)-Thinking.ipynb
# Also applies to: unsloth/Qwen3-4B-Thinking-2507-unsloth-bnb-4bit, Qwen/Qwen3-4B-Thinking-2507, unsloth/Qwen3-4B-Thinking-2507-bnb-4bit, Qwen/Qwen3-4B-Thinking-2507-FP8, unsloth/Qwen3-4B-Thinking-2507-FP8
# added inference parameters from unsloth guides

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 32
  lora_alpha: 32
  lora_dropout: 0.0
  target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  use_rslora: false
  use_loftq: false

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.6
  top_p: 0.95
  top_k: 20
  min_p: 0.00


================================================
FILE: studio/backend/assets/configs/model_defaults/qwen/unsloth_Qwen3-VL-8B-Instruct-unsloth-bnb-4bit.yaml
================================================
# Model defaults for unsloth/Qwen3-VL-8B-Instruct
# Based on Qwen3_VL_(8B)-Vision.ipynb
# Also applies to: Qwen/Qwen3-VL-8B-Instruct-FP8, unsloth/Qwen3-VL-8B-Instruct-FP8, unsloth/Qwen3-VL-8B-Instruct, Qwen/Qwen3-VL-8B-Instruct, unsloth/Qwen3-VL-8B-Instruct-bnb-4bit
# added inference parameters from unsloth guides

training:
  trust_remote_code: false
  max_seq_length: 2048
  # num_epochs: 4
  num_epochs: 0
  learning_rate: 2e-4
  batch_size: 2
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 30
  save_steps: 30
  weight_decay: 0.001
  random_seed: 3407
  packing: false
  train_on_completions: true
  gradient_checkpointing: "unsloth"
  optim: "adamw_8bit"
  lr_scheduler_type: "linear"

lora:
  lora_r: 16
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules:
    - "all-linear"
  use_rslora: false
  use_loftq: false
  finetune_vision_layers: true
  finetune_language_layers: true
  finetune_attention_modules: true
  finetune_mlp_modules: true

logging:
  enable_wandb: false
  wandb_project: "llm-finetuning"
  enable_tensorboard: false
  tensorboard_dir: "runs"
  log_frequency: 10

inference:
  trust_remote_code: false
  temperature: 0.7
  top_p: 0.8
  top_k: 20


================================================
FILE: studio/backend/assets/configs/vision_lora.yaml
================================================
model: unsloth/Qwen2-VL-2B-Instruct-bnb-4bit

data:
  dataset: philschmid/amazon-product-descriptions-vlm
  format_type: auto

training:
  training_type: lora
  max_seq_length: 2048
  load_in_4bit: true
  output_dir: outputs
  num_epochs: 1
  learning_rate: 0.0002
  batch_size: 1
  gradient_accumulation_steps: 4
  warmup_steps: 5
  max_steps: 0
  save_steps: 0
  weight_decay: 0.01
  random_seed: 3407
  packing: false
  train_on_completions: false
  gradient_checkpointing: "unsloth"

lora:
  lora_r: 64
  lora_alpha: 16
  lora_dropout: 0.0
  target_modules: ""   # vision uses vision_all_linear by default
  vision_all_linear: true
  use_rslora: false
  use_loftq: false
  finetune_vision_layers: true
  finetune_language_layers: true
  finetune_attention_modules: true
  finetune_mlp_modules: true

logging:
  enable_wandb: false
  wandb_project: unsloth-training
  enable_tensorboard: false
  tensorboard_dir: runs


================================================
FILE: studio/backend/auth/.gitkeep
================================================


================================================
FILE: studio/backend/auth/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Authentication module for JWT-based auth with SQLite storage.
"""

from .authentication import (
    create_access_token,
    create_refresh_token,
    refresh_access_token,
    get_current_subject,
    get_current_subject_allow_password_change,
    reload_secret,
)
from .storage import (
    DEFAULT_ADMIN_USERNAME,
    clear_bootstrap_password,
    generate_bootstrap_password,
    get_bootstrap_password,
    is_initialized,
    create_initial_user,
    ensure_default_admin,
    get_jwt_secret,
    get_user_and_secret,
    load_jwt_secret,
    requires_password_change,
    save_refresh_token,
    update_password,
    verify_refresh_token,
    revoke_user_refresh_tokens,
)
from .hashing import hash_password, verify_password

__all__ = [
    "create_access_token",
    "create_refresh_token",
    "refresh_access_token",
    "get_current_subject",
    "get_current_subject_allow_password_change",
    "reload_secret",
    "DEFAULT_ADMIN_USERNAME",
    "clear_bootstrap_password",
    "generate_bootstrap_password",
    "get_bootstrap_password",
    "is_initialized",
    "create_initial_user",
    "ensure_default_admin",
    "get_jwt_secret",
    "get_user_and_secret",
    "load_jwt_secret",
    "requires_password_change",
    "save_refresh_token",
    "update_password",
    "verify_refresh_token",
    "revoke_user_refresh_tokens",
    "hash_password",
    "verify_password",
]


================================================
FILE: studio/backend/auth/authentication.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import secrets
from datetime import datetime, timedelta, timezone
from typing import Optional, Tuple

from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
import jwt

from .storage import (
    get_jwt_secret,
    get_user_and_secret,
    load_jwt_secret,
    save_refresh_token,
    verify_refresh_token,
)

ALGORITHM = "HS256"
ACCESS_TOKEN_EXPIRE_MINUTES = 60
REFRESH_TOKEN_EXPIRE_DAYS = 7

security = HTTPBearer()  # Reads Authorization: Bearer <token>


def _get_secret_for_subject(subject: str) -> str:
    secret = get_jwt_secret(subject)
    if secret is None:
        raise HTTPException(
            status_code = status.HTTP_401_UNAUTHORIZED,
            detail = "Invalid or expired token",
        )
    return secret


def _decode_subject_without_verification(token: str) -> Optional[str]:
    try:
        payload = jwt.decode(
            token,
            options = {"verify_signature": False, "verify_exp": False},
        )
    except jwt.InvalidTokenError:
        return None

    subject = payload.get("sub")
    return subject if isinstance(subject, str) else None


def create_access_token(
    subject: str,
    expires_delta: Optional[timedelta] = None,
) -> str:
    """
    Create a signed JWT for the given subject (e.g. username).

    Tokens are valid across restarts because the signing secret is stored in SQLite.
    """
    to_encode = {"sub": subject}
    expire = datetime.now(timezone.utc) + (
        expires_delta or timedelta(minutes = ACCESS_TOKEN_EXPIRE_MINUTES)
    )
    to_encode.update({"exp": expire})
    return jwt.encode(
        to_encode,
        _get_secret_for_subject(subject),
        algorithm = ALGORITHM,
    )


def create_refresh_token(subject: str) -> str:
    """
    Create a random refresh token, store its hash in SQLite, and return it.

    Refresh tokens are opaque (not JWTs) and expire after REFRESH_TOKEN_EXPIRE_DAYS.
    """
    token = secrets.token_urlsafe(48)
    expires_at = datetime.now(timezone.utc) + timedelta(days = REFRESH_TOKEN_EXPIRE_DAYS)
    save_refresh_token(token, subject, expires_at.isoformat())
    return token


def refresh_access_token(refresh_token: str) -> Tuple[Optional[str], Optional[str]]:
    """
    Validate a refresh token and issue a new access token.

    The refresh token itself is NOT consumed — it stays valid until expiry.
    Returns a new access_token or None if the refresh token is invalid/expired.
    """
    username = verify_refresh_token(refresh_token)
    if username is None:
        return None, None
    return create_access_token(subject = username), username


def reload_secret() -> None:
    """
    Keep legacy API compatibility for callers expecting auth storage init.

    Auth now resolves the current signing secret directly from SQLite.
    """
    load_jwt_secret()


async def get_current_subject(
    credentials: HTTPAuthorizationCredentials = Depends(security),
) -> str:
    """Validate JWT and require the password-change flow to be completed."""
    return await _get_current_subject(
        credentials,
        allow_password_change = False,
    )


async def get_current_subject_allow_password_change(
    credentials: HTTPAuthorizationCredentials = Depends(security),
) -> str:
    """Validate JWT but allow access to the password-change endpoint."""
    return await _get_current_subject(
        credentials,
        allow_password_change = True,
    )


async def _get_current_subject(
    credentials: HTTPAuthorizationCredentials,
    *,
    allow_password_change: bool,
) -> str:
    """
    FastAPI dependency to validate the JWT and return the subject.

    Use this as a dependency on routes that should be protected, e.g.:

        @router.get("/secure")
        async def secure_endpoint(current_subject: str = Depends(get_current_subject)):
            ...
    """
    token = credentials.credentials
    subject = _decode_subject_without_verification(token)
    if subject is None:
        raise HTTPException(
            status_code = status.HTTP_401_UNAUTHORIZED,
            detail = "Invalid token payload",
        )

    record = get_user_and_secret(subject)
    if record is None:
        raise HTTPException(
            status_code = status.HTTP_401_UNAUTHORIZED,
            detail = "Invalid or expired token",
        )

    _salt, _pwd_hash, jwt_secret, must_change_password = record
    try:
        payload = jwt.decode(token, jwt_secret, algorithms = [ALGORITHM])
        if payload.get("sub") != subject:
            raise HTTPException(
                status_code = status.HTTP_401_UNAUTHORIZED,
                detail = "Invalid token payload",
            )
        if must_change_password and not allow_password_change:
            raise HTTPException(
                status_code = status.HTTP_403_FORBIDDEN,
                detail = "Password change required",
            )
        return subject
    except jwt.InvalidTokenError:
        raise HTTPException(
            status_code = status.HTTP_401_UNAUTHORIZED,
            detail = "Invalid or expired token",
        )


================================================
FILE: studio/backend/auth/hashing.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Password hashing utilities using PBKDF2.
"""

import hashlib
import hmac
import secrets
from typing import Tuple


def hash_password(password: str, salt: str | None = None) -> Tuple[str, str]:
    """
    Hash a password using PBKDF2-HMAC-SHA256.

    Returns (salt, hex_hash) tuple.
    """
    if salt is None:
        salt = secrets.token_hex(16)
    dk = hashlib.pbkdf2_hmac(
        "sha256",
        password.encode("utf-8"),
        salt.encode("utf-8"),
        100_000,  # 100k iterations
    )
    return salt, dk.hex()


def verify_password(password: str, salt: str, hashed: str) -> bool:
    """
    Verify a password against a stored salt and hash.

    Uses constant-time comparison to prevent timing attacks.
    """
    dk = hashlib.pbkdf2_hmac(
        "sha256",
        password.encode("utf-8"),
        salt.encode("utf-8"),
        100_000,
    )
    return hmac.compare_digest(dk.hex(), hashed)


================================================
FILE: studio/backend/auth/storage.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
SQLite storage for authentication data (user credentials + JWT secret).
"""

import hashlib
import secrets
import sqlite3
from datetime import datetime, timezone
from typing import Optional, Tuple

from utils.paths import auth_db_path, ensure_dir

DB_PATH = auth_db_path()
DEFAULT_ADMIN_USERNAME = "unsloth"

# Plaintext bootstrap password file — lives beside auth.db, deleted on
# first password change so the credential never lingers on disk.
_BOOTSTRAP_PW_PATH = DB_PATH.parent / ".bootstrap_password"

# In-process cache so we don't re-read the file on every HTML serve.
_bootstrap_password: Optional[str] = None


def generate_bootstrap_password() -> str:
    """Generate a 4-word diceware passphrase and persist it to disk.

    The passphrase is written to ``_BOOTSTRAP_PW_PATH`` so that it
    survives server restarts (the DB only stores the *hash*).  On
    subsequent calls / restarts, the persisted value is returned.
    """
    global _bootstrap_password

    # 1. Already cached in this process?
    if _bootstrap_password is not None:
        return _bootstrap_password

    # 2. Already persisted from a previous run?
    if _BOOTSTRAP_PW_PATH.is_file():
        _bootstrap_password = _BOOTSTRAP_PW_PATH.read_text().strip()
        if _bootstrap_password:
            return _bootstrap_password

    # 3. First-ever startup — generate a fresh passphrase.
    import diceware

    _bootstrap_password = diceware.get_passphrase(
        options = diceware.handle_options(args = ["-n", "4", "-d", "", "-c"])
    )

    # Persist so the *same* passphrase is used if the server restarts
    # before the user changes the password.
    ensure_dir(_BOOTSTRAP_PW_PATH.parent)
    _BOOTSTRAP_PW_PATH.write_text(_bootstrap_password)

    return _bootstrap_password


def get_bootstrap_password() -> Optional[str]:
    """Return the cached bootstrap password, or None if not yet generated."""
    return _bootstrap_password


def clear_bootstrap_password() -> None:
    """Delete the persisted bootstrap password file (called after password change)."""
    global _bootstrap_password
    _bootstrap_password = None
    if _BOOTSTRAP_PW_PATH.is_file():
        _BOOTSTRAP_PW_PATH.unlink(missing_ok = True)


def _hash_token(token: str) -> str:
    """SHA-256 hash helper used for refresh token storage."""
    return hashlib.sha256(token.encode("utf-8")).hexdigest()


def get_connection() -> sqlite3.Connection:
    """Get a connection to the auth database, creating tables if needed."""
    ensure_dir(DB_PATH.parent)
    conn = sqlite3.connect(DB_PATH)
    conn.row_factory = sqlite3.Row
    conn.execute(
        """
        CREATE TABLE IF NOT EXISTS auth_user (
            id INTEGER PRIMARY KEY,
            username TEXT UNIQUE NOT NULL,
            password_salt TEXT NOT NULL,
            password_hash TEXT NOT NULL,
            jwt_secret TEXT NOT NULL,
            must_change_password INTEGER NOT NULL DEFAULT 0
        );
        """
    )
    conn.execute(
        """
        CREATE TABLE IF NOT EXISTS refresh_tokens (
            id INTEGER PRIMARY KEY,
            token_hash TEXT NOT NULL,
            username TEXT NOT NULL,
            expires_at TEXT NOT NULL
        );
        """
    )
    columns = {row["name"] for row in conn.execute("PRAGMA table_info(auth_user)")}
    if "must_change_password" not in columns:
        conn.execute(
            "ALTER TABLE auth_user ADD COLUMN must_change_password INTEGER NOT NULL DEFAULT 0"
        )
    conn.commit()
    return conn


def is_initialized() -> bool:
    """Check if auth is ready for login (at least one user exists in DB)."""
    conn = get_connection()
    cur = conn.execute("SELECT COUNT(*) AS c FROM auth_user")
    row = cur.fetchone()
    conn.close()
    return bool(row["c"])


def create_initial_user(
    username: str,
    password: str,
    jwt_secret: str,
    *,
    must_change_password: bool = False,
) -> None:
    """
    Create the initial admin user in the database.

    Raises sqlite3.IntegrityError if username already exists.
    """
    from .hashing import hash_password

    salt, pwd_hash = hash_password(password)
    conn = get_connection()
    try:
        conn.execute(
            """
            INSERT INTO auth_user (
                username,
                password_salt,
                password_hash,
                jwt_secret,
                must_change_password
            )
            VALUES (?, ?, ?, ?, ?)
            """,
            (username, salt, pwd_hash, jwt_secret, int(must_change_password)),
        )
        conn.commit()
    finally:
        conn.close()


def delete_user(username: str) -> None:
    """
    Delete a user from the database.

    Used for rollback when user creation fails partway through bootstrap.
    """
    conn = get_connection()
    try:
        conn.execute("DELETE FROM auth_user WHERE username = ?", (username,))
        conn.commit()
    finally:
        conn.close()


def get_user_and_secret(username: str) -> Optional[Tuple[str, str, str, bool]]:
    """
    Get user's password salt, hash, and JWT secret.

    Returns (password_salt, password_hash, jwt_secret, must_change_password)
    or None if user not found.
    """
    conn = get_connection()
    try:
        cur = conn.execute(
            """
            SELECT password_salt, password_hash, jwt_secret, must_change_password
            FROM auth_user
            WHERE username = ?
            """,
            (username,),
        )
        row = cur.fetchone()
        if not row:
            return None
        return (
            row["password_salt"],
            row["password_hash"],
            row["jwt_secret"],
            bool(row["must_change_password"]),
        )
    finally:
        conn.close()


def get_jwt_secret(username: str) -> Optional[str]:
    """Return the current JWT signing secret for a user."""
    conn = get_connection()
    try:
        cur = conn.execute(
            "SELECT jwt_secret FROM auth_user WHERE username = ?",
            (username,),
        )
        row = cur.fetchone()
        return row["jwt_secret"] if row else None
    finally:
        conn.close()


def requires_password_change(username: str) -> bool:
    """Return whether the user must change the seeded default password."""
    conn = get_connection()
    try:
        cur = conn.execute(
            "SELECT must_change_password FROM auth_user WHERE username = ?",
            (username,),
        )
        row = cur.fetchone()
        return bool(row and row["must_change_password"])
    finally:
        conn.close()


def load_jwt_secret() -> str:
    """
    Load the JWT secret from the database.

    Raises RuntimeError if no auth user has been created yet.
    """
    conn = get_connection()
    try:
        cur = conn.execute("SELECT jwt_secret FROM auth_user LIMIT 1")
        row = cur.fetchone()
        if not row:
            raise RuntimeError(
                "Auth is not initialized. Wait for the seeded admin bootstrap to complete."
            )
        return row["jwt_secret"]
    finally:
        conn.close()


def ensure_default_admin() -> bool:
    """Seed the default admin account on first startup.

    Uses a randomly generated diceware passphrase as the bootstrap password.
    Returns True when the default admin was created in this call.
    """
    bootstrap_pw = generate_bootstrap_password()
    try:
        create_initial_user(
            username = DEFAULT_ADMIN_USERNAME,
            password = bootstrap_pw,
            jwt_secret = secrets.token_urlsafe(64),
            must_change_password = True,
        )
        return True
    except sqlite3.IntegrityError:
        return False


def update_password(username: str, new_password: str) -> bool:
    """Update password, clear first-login requirement, rotate JWT secret."""
    from .hashing import hash_password

    salt, pwd_hash = hash_password(new_password)
    jwt_secret = secrets.token_urlsafe(64)
    conn = get_connection()
    try:
        cursor = conn.execute(
            """
            UPDATE auth_user
            SET password_salt = ?, password_hash = ?, jwt_secret = ?, must_change_password = 0
            WHERE username = ?
            """,
            (salt, pwd_hash, jwt_secret, username),
        )
        conn.commit()
        if cursor.rowcount > 0:
            clear_bootstrap_password()
        return cursor.rowcount > 0
    finally:
        conn.close()


def save_refresh_token(token: str, username: str, expires_at: str) -> None:
    """
    Store a hashed refresh token with its associated username and expiry.
    """
    token_hash = _hash_token(token)
    conn = get_connection()
    try:
        conn.execute(
            """
            INSERT INTO refresh_tokens (token_hash, username, expires_at)
            VALUES (?, ?, ?)
            """,
            (token_hash, username, expires_at),
        )
        conn.commit()
    finally:
        conn.close()


def verify_refresh_token(token: str) -> Optional[str]:
    """
    Verify a refresh token and return the username.

    Returns the username if valid and not expired, None otherwise.
    The token is NOT consumed — it stays valid until it expires.
    """
    token_hash = _hash_token(token)
    conn = get_connection()
    try:
        # Clean up any expired tokens while we're here
        conn.execute(
            "DELETE FROM refresh_tokens WHERE expires_at < ?",
            (datetime.now(timezone.utc).isoformat(),),
        )
        conn.commit()

        cur = conn.execute(
            """
            SELECT id, username, expires_at FROM refresh_tokens
            WHERE token_hash = ?
            """,
            (token_hash,),
        )
        row = cur.fetchone()
        if row is None:
            return None

        # Check expiry
        expires_at = datetime.fromisoformat(row["expires_at"])
        if datetime.now(timezone.utc) > expires_at:
            conn.execute("DELETE FROM refresh_tokens WHERE id = ?", (row["id"],))
            conn.commit()
            return None

        return row["username"]
    finally:
        conn.close()


def revoke_user_refresh_tokens(username: str) -> None:
    """Revoke all refresh tokens for a user (e.g. on logout)."""
    conn = get_connection()
    try:
        conn.execute("DELETE FROM refresh_tokens WHERE username = ?", (username,))
        conn.commit()
    finally:
        conn.close()


================================================
FILE: studio/backend/colab.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Colab-specific helpers for running Unsloth Studio.
Uses Colab's built-in proxy - no external tunneling needed!
"""

from pathlib import Path
import sys


def _bootstrap_studio_venv() -> None:
    """Expose the Studio venv's site-packages to the current interpreter.

    On Colab, notebook cells run outside the venv subshell. Instead of
    installing the full stack into system Python, we prepend the venv's
    site-packages so that packages like structlog, fastapi, etc. are
    importable from notebook cells and take priority over system copies.
    """
    venv_lib = Path.home() / ".unsloth" / "studio" / ".venv" / "lib"
    if not venv_lib.exists():
        import warnings

        warnings.warn(
            f"Studio venv not found at {venv_lib.parent} -- run 'unsloth studio setup' first",
            stacklevel = 2,
        )
        return
    for sp in venv_lib.glob("python*/site-packages"):
        sp_str = str(sp)
        if sp_str not in sys.path:
            sys.path.insert(0, sp_str)


_bootstrap_studio_venv()

# Add backend to path early so local modules like loggers can be imported
backend_path = str(Path(__file__).parent)
if backend_path not in sys.path:
    sys.path.insert(0, backend_path)

from loggers import get_logger

logger = get_logger(__name__)


def get_colab_url(port: int = 8888) -> str:
    """
    Get the actual Colab proxy URL for a port.
    """
    try:
        from google.colab.output import eval_js

        # Use Colab's proxy mechanism
        url = eval_js(f"google.colab.kernel.proxyPort({port})", timeout_sec = 5)
        return url if url else f"http://localhost:{port}"
    except Exception as e:
        logger.info(f"Note: Could not get Colab URL ({e})")
        return f"http://localhost:{port}"


def show_link(port: int = 8888):
    """Display a styled clickable link to the UI."""
    from IPython.display import display, HTML

    # Get real Colab proxy URL
    url = get_colab_url(port)

    short_url = (
        url[: url.index("-", url.index(f"{port}-") + len(str(port)) + 1) + 1] + "..."
        if f"{port}-" in url
        else url
    )
    html = f"""
    <div style="display: inline-block; padding: 20px; background: #ffffff; border: 2px solid #000000;
                border-radius: 12px; margin: 10px 0; font-family: system-ui, -apple-system, sans-serif;">
        <h2 style="color: #000000; margin: 0 0 12px 0; font-size: 26px; font-weight: 800;
                   display: flex; align-items: center; gap: 12px;">
            <img src="https://github.com/unslothai/unsloth/raw/main/studio/frontend/public/unsloth-gem.png"
                 height="48" style="display:block;">
            Unsloth Studio is Ready!
        </h2>
        <a href="{url}" target="_blank"
           style="display: inline-flex; align-items: center; gap: 10px; padding: 14px 28px;
                  background: #000000; color: white; text-decoration: none; border-radius: 8px;
                  font-weight: 800; font-size: 16px;">
            <svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" viewBox="0 0 24 24" fill="white"><polygon points="5,3 19,12 5,21"/></svg>
            Open Unsloth Studio
        </a>
        <p style="color: #333333; margin: 16px 0 0 0; font-size: 13px; font-family: monospace;">
            {short_url}
        </p>
    </div>
    """
    display(HTML(html))


def start(port: int = 8888):
    """
    Start Unsloth Studio server in Colab and display the URL.

    Usage:
        from colab import start
        start()
    """
    import sys

    logger.info("🦥 Starting Unsloth Studio...")

    logger.info("   Loading backend...")
    from run import run_server

    # Auto-detect frontend path
    repo_root = Path(__file__).parent.parent
    frontend_path = repo_root / "frontend" / "dist"

    if not frontend_path.exists():
        logger.info("❌ Frontend not built! Please run the setup cell first.")
        return

    logger.info("   Starting server...")
    # Start server silently
    run_server(host = "0.0.0.0", port = port, frontend_path = frontend_path, silent = True)

    logger.info("   Server started!")

    # Show the clickable link with real URL
    show_link(port)


if __name__ == "__main__":
    start()


================================================
FILE: studio/backend/core/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Unified core module for Unsloth backend

Imports are LAZY (via __getattr__) so that training subprocesses can
import core.training.worker without pulling in heavy ML dependencies
like unsloth, transformers, or torch before the version activation
code has a chance to run.
"""

import sys
from pathlib import Path

# Ensure the backend directory is on sys.path so that bare "from utils.*"
# imports used throughout the backend work when core is imported as a package
# (e.g. from the CLI: "from studio.backend.core import ModelConfig").
_backend_dir = str(Path(__file__).resolve().parent.parent)
if _backend_dir not in sys.path:
    sys.path.insert(0, _backend_dir)

__all__ = [
    # Inference
    "InferenceBackend",
    "get_inference_backend",
    # Training
    "get_training_backend",
    "TrainingBackend",
    "TrainingProgress",
    # Config
    "ModelConfig",
    "is_vision_model",
    "scan_trained_loras",
    "load_model_defaults",
    "get_base_model_from_lora",
    # Utils
    "format_and_template_dataset",
    "normalize_path",
    "is_local_path",
    "is_model_cached",
    "without_hf_auth",
    "format_error_message",
    "get_gpu_memory_info",
    "log_gpu_memory",
    "get_device",
    "is_apple_silicon",
    "clear_gpu_cache",
    "DeviceType",
]


def __getattr__(name):
    # Inference
    if name in ("InferenceBackend", "get_inference_backend"):
        from .inference import InferenceBackend, get_inference_backend

        globals()["InferenceBackend"] = InferenceBackend
        globals()["get_inference_backend"] = get_inference_backend
        return globals()[name]

    # Training
    if name in ("TrainingBackend", "get_training_backend", "TrainingProgress"):
        from .training import TrainingBackend, get_training_backend, TrainingProgress

        globals()["TrainingBackend"] = TrainingBackend
        globals()["get_training_backend"] = get_training_backend
        globals()["TrainingProgress"] = TrainingProgress
        return globals()[name]

    # Config (from utils.models)
    if name in (
        "is_vision_model",
        "ModelConfig",
        "scan_trained_loras",
        "load_model_defaults",
        "get_base_model_from_lora",
    ):
        from utils.models import (
            is_vision_model,
            ModelConfig,
            scan_trained_loras,
            load_model_defaults,
            get_base_model_from_lora,
        )

        globals()["is_vision_model"] = is_vision_model
        globals()["ModelConfig"] = ModelConfig
        globals()["scan_trained_loras"] = scan_trained_loras
        globals()["load_model_defaults"] = load_model_defaults
        globals()["get_base_model_from_lora"] = get_base_model_from_lora
        return globals()[name]

    # Paths
    if name in ("normalize_path", "is_local_path", "is_model_cached"):
        from utils.paths import normalize_path, is_local_path, is_model_cached

        globals()["normalize_path"] = normalize_path
        globals()["is_local_path"] = is_local_path
        globals()["is_model_cached"] = is_model_cached
        return globals()[name]

    # Utils
    if name in ("without_hf_auth", "format_error_message"):
        from utils.utils import without_hf_auth, format_error_message

        globals()["without_hf_auth"] = without_hf_auth
        globals()["format_error_message"] = format_error_message
        return globals()[name]

    # Hardware
    if name in (
        "get_device",
        "is_apple_silicon",
        "clear_gpu_cache",
        "get_gpu_memory_info",
        "log_gpu_memory",
        "DeviceType",
    ):
        from utils.hardware import (
            get_device,
            is_apple_silicon,
            clear_gpu_cache,
            get_gpu_memory_info,
            log_gpu_memory,
            DeviceType,
        )

        globals()["get_device"] = get_device
        globals()["is_apple_silicon"] = is_apple_silicon
        globals()["clear_gpu_cache"] = clear_gpu_cache
        globals()["get_gpu_memory_info"] = get_gpu_memory_info
        globals()["log_gpu_memory"] = log_gpu_memory
        globals()["DeviceType"] = DeviceType
        return globals()[name]

    # Datasets
    if name == "format_and_template_dataset":
        from utils.datasets import format_and_template_dataset

        globals()["format_and_template_dataset"] = format_and_template_dataset
        return format_and_template_dataset

    raise AttributeError(f"module 'core' has no attribute {name!r}")


================================================
FILE: studio/backend/core/data_recipe/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Data Recipe core (DataDesigner wrapper + job runner).
"""

from .jobs import JobManager, get_job_manager

__all__ = ["JobManager", "get_job_manager"]


================================================
FILE: studio/backend/core/data_recipe/huggingface.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from __future__ import annotations

import json
from pathlib import Path

from utils.paths import recipe_datasets_root, resolve_dataset_path

_DATA_DESIGNER_FOOTER = (
    '<sub style="white-space: nowrap;">Made with ❤️ using 🎨 '
    '<a href="https://github.com/NVIDIA-NeMo/DataDesigner">NeMo Data Designer</a></sub>'
)
_UNSLOTH_STUDIO_FOOTER = (
    '<sub style="white-space: nowrap;">Made with ❤️ using 🦥 ' "Unsloth Studio</sub>"
)


class RecipeDatasetPublishError(ValueError):
    """Raised when a recipe dataset cannot be published to Hugging Face."""


def _resolve_recipe_artifact_path(artifact_path: str) -> Path:
    root = recipe_datasets_root().expanduser().resolve()
    candidate = resolve_dataset_path(artifact_path).expanduser()
    resolved = candidate.resolve(strict = False)

    try:
        resolved.relative_to(root)
    except ValueError as exc:
        raise RecipeDatasetPublishError(
            "This execution artifact is outside the Recipe Studio dataset storage."
        ) from exc

    if not resolved.exists():
        raise RecipeDatasetPublishError("Execution artifacts are no longer available.")
    if not resolved.is_dir():
        raise RecipeDatasetPublishError(
            "Execution artifact path is not a dataset folder."
        )

    return resolved


def publish_recipe_dataset(
    *,
    artifact_path: str,
    repo_id: str,
    description: str,
    hf_token: str | None = None,
    private: bool = False,
) -> str:
    dataset_path = _resolve_recipe_artifact_path(artifact_path)

    try:
        from data_designer.engine.storage.artifact_storage import (
            FINAL_DATASET_FOLDER_NAME,
            METADATA_FILENAME,
            PROCESSORS_OUTPUTS_FOLDER_NAME,
            SDG_CONFIG_FILENAME,
        )
        from data_designer.integrations.huggingface.client import (
            HuggingFaceHubClient,
            HuggingFaceHubClientUploadError,
        )
        from data_designer.integrations.huggingface.dataset_card import (
            DataDesignerDatasetCard,
        )
    except ImportError as exc:
        raise RecipeDatasetPublishError(
            "NeMo Data Designer Hugging Face integration is not installed."
        ) from exc

    try:
        client = HuggingFaceHubClient(token = hf_token)
        client._validate_repo_id(repo_id = repo_id)
        client._validate_dataset_path(base_dataset_path = dataset_path)
        client._create_or_get_repo(repo_id = repo_id, private = private)

        metadata_path = dataset_path / METADATA_FILENAME
        builder_config_path = dataset_path / SDG_CONFIG_FILENAME

        with metadata_path.open(encoding = "utf-8") as fh:
            metadata = json.load(fh)

        builder_config = None
        if builder_config_path.exists():
            with builder_config_path.open(encoding = "utf-8") as fh:
                builder_config = json.load(fh)

        card = DataDesignerDatasetCard.from_metadata(
            metadata = metadata,
            builder_config = builder_config,
            repo_id = repo_id,
            description = description,
            tags = None,
        )
        card.text = card.text.replace(_DATA_DESIGNER_FOOTER, _UNSLOTH_STUDIO_FOOTER)
        # Data Designer currently drops the explicit token when pushing the
        # dataset card. Push it ourselves so auth stays request-local.
        card.push_to_hub(repo_id, token = hf_token, repo_type = "dataset")

        client._upload_main_dataset_files(
            repo_id = repo_id,
            parquet_folder = dataset_path / FINAL_DATASET_FOLDER_NAME,
        )
        client._upload_images_folder(
            repo_id = repo_id,
            images_folder = dataset_path / "images",
        )
        client._upload_processor_files(
            repo_id = repo_id,
            processors_folder = dataset_path / PROCESSORS_OUTPUTS_FOLDER_NAME,
        )
        client._upload_config_files(
            repo_id = repo_id,
            metadata_path = metadata_path,
            builder_config_path = builder_config_path,
        )

        return f"https://huggingface.co/datasets/{repo_id}"
    except HuggingFaceHubClientUploadError as exc:
        raise RecipeDatasetPublishError(str(exc)) from exc


================================================
FILE: studio/backend/core/data_recipe/jobs/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from .manager import JobManager, get_job_manager

__all__ = ["JobManager", "get_job_manager"]


================================================
FILE: studio/backend/core/data_recipe/jobs/constants.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from __future__ import annotations

# stages parsed from data-designer logs
STAGE_CREATE = "create"
STAGE_PREVIEW = "preview"
STAGE_DAG = "dag"
STAGE_HEALTHCHECK = "healthcheck"
STAGE_SAMPLING = "sampling"
STAGE_COLUMN_CONFIG = "column_config"
STAGE_GENERATING = "generating"
STAGE_BATCH = "batch"
STAGE_PROFILING = "profiling"

USAGE_RESET_STAGES = {
    STAGE_CREATE,
    STAGE_PREVIEW,
    STAGE_DAG,
    STAGE_HEALTHCHECK,
    STAGE_SAMPLING,
    STAGE_GENERATING,
    STAGE_PROFILING,
}

# job event types emitted by worker/manager
EVENT_JOB_ENQUEUED = "job.enqueued"
EVENT_JOB_STARTED = "job.started"
EVENT_JOB_CANCELLING = "job.cancelling"
EVENT_JOB_CANCELLED = "job.cancelled"
EVENT_JOB_COMPLETED = "job.completed"
EVENT_JOB_ERROR = "job.error"


================================================
FILE: studio/backend/core/data_recipe/jobs/manager.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from __future__ import annotations

import asyncio
import json
import queue
import threading
import time
import uuid
from pathlib import Path
from collections import deque
from dataclasses import dataclass
from typing import Any

import multiprocessing as mp

from ..jsonable import to_preview_jsonable
from .constants import (
    EVENT_JOB_CANCELLING,
    EVENT_JOB_CANCELLED,
    EVENT_JOB_COMPLETED,
    EVENT_JOB_ENQUEUED,
    EVENT_JOB_ERROR,
    EVENT_JOB_STARTED,
)
from .parse import apply_update, coerce_event, parse_log_message
from .types import Job
from .worker import run_job_process


_CTX = mp.get_context("spawn")


@dataclass
class Subscription:
    replay: list[dict]
    _q: queue.Queue
    _next_id: int = 0

    async def next_event(self, *, timeout_sec: float) -> dict | None:
        """Wait for next event (SSE), w/ timeout so we can check disconnects."""
        try:
            return await asyncio.to_thread(self._q.get, True, timeout_sec)
        except queue.Empty:
            return None

    def format_sse(self, event: dict) -> bytes:
        """Turn event dict into SSE bytes (id/event/data)."""
        event_id = event.get("seq")
        if event_id is None:
            self._next_id += 1
            event_id = self._next_id
        body = json.dumps(event, separators = (",", ":"), ensure_ascii = False)
        event_type = event.get("type") or "message"
        return (
            f"id: {event_id}\n" f"event: {event_type}\n" f"data: {body}\n\n"
        ).encode("utf-8")


class JobManager:
    def __init__(self) -> None:
        """Single-job runner (in-mem). Simple on purpose, not a whole platform."""
        self._lock = threading.Lock()
        self._job: Job | None = None
        self._proc: mp.Process | None = None
        self._mp_q: Any | None = None
        self._events: deque[dict] = deque(maxlen = 5000)
        self._subs: list[queue.Queue] = []
        self._pump_thread: threading.Thread | None = None
        self._seq: int = 0

    def start(self, *, recipe: dict, run: dict) -> str:
        """Spawn the job subprocess (one at a time, no cap)."""
        llm_columns = recipe.get("columns") or []
        llm_column_count = 0
        if isinstance(llm_columns, list):
            for column in llm_columns:
                if not isinstance(column, dict):
                    continue
                column_type = str(column.get("column_type") or "").strip().lower()
                if column_type.startswith("llm"):
                    llm_column_count += 1
        if llm_column_count <= 0:
            llm_column_count = 1

        with self._lock:
            if self._proc is not None and self._proc.is_alive():
                raise RuntimeError("job already running")

            job_id = uuid.uuid4().hex
            self._job = Job(job_id = job_id, status = "pending", started_at = time.time())
            self._job.progress_columns_total = llm_column_count
            self._events.clear()
            self._seq = 0

            run_payload = dict(run)
            run_payload["_job_id"] = job_id
            mp_q = _CTX.Queue()
            proc = _CTX.Process(
                target = run_job_process,
                kwargs = {"event_queue": mp_q, "recipe": recipe, "run": run_payload},
                daemon = True,
            )
            proc.start()

            self._mp_q = mp_q
            self._proc = proc
            self._pump_thread = threading.Thread(target = self._pump_loop, daemon = True)
            self._pump_thread.start()

            self._emit(
                {"type": EVENT_JOB_ENQUEUED, "ts": time.time(), "job_id": job_id}
            )
            return job_id

    def cancel(self, job_id: str) -> bool:
        """Hard stop. We terminate the subprocess. Quick + reliable."""
        with self._lock:
            if self._job is None or self._job.job_id != job_id:
                return False
            if self._proc is None or not self._proc.is_alive():
                return True
            self._job.status = "cancelling"
            self._emit(
                {"type": EVENT_JOB_CANCELLING, "ts": time.time(), "job_id": job_id}
            )
            try:
                self._proc.terminate()
            except (AttributeError, OSError):
                pass
            return True

    def get_status(self, job_id: str) -> dict | None:
        """UI friendly snapshot that we need. Alternative to sse kinda of and structured"""
        with self._lock:
            if self._job is None or self._job.job_id != job_id:
                return None
            job = self._job
            return {
                "job_id": job.job_id,
                "status": job.status,
                "stage": job.stage,
                "current_column": job.current_column,
                "completed_columns": list(job.completed_columns),
                "batch": {"idx": job.batch.idx, "total": job.batch.total},
                "progress": {
                    "done": job.progress.done,
                    "total": job.progress.total,
                    "percent": job.progress.percent,
                    "eta_sec": job.progress.eta_sec,
                    "rate": job.progress.rate,
                    "ok": job.progress.ok,
                    "failed": job.progress.failed,
                },
                "column_progress": {
                    "done": job.column_progress.done,
                    "total": job.column_progress.total,
                    "percent": job.column_progress.percent,
                    "eta_sec": job.column_progress.eta_sec,
                    "rate": job.column_progress.rate,
                    "ok": job.column_progress.ok,
                    "failed": job.column_progress.failed,
                },
                "model_usage": {
                    name: {
                        "model": usage.model,
                        "tokens": {
                            "input": usage.input_tokens,
                            "output": usage.output_tokens,
                            "total": usage.total_tokens,
                            "tps": usage.tps,
                        },
                        "requests": {
                            "success": usage.requests_success,
                            "failed": usage.requests_failed,
                            "total": usage.requests_total,
                            "rpm": usage.rpm,
                        },
                    }
                    for name, usage in job.model_usage.items()
                },
                "rows": job.rows,
                "cols": job.cols,
                "error": job.error,
                "has_analysis": job.analysis is not None,
                "dataset_rows": None if job.dataset is None else len(job.dataset),
                "artifact_path": job.artifact_path,
                "execution_type": job.execution_type,
                "started_at": job.started_at,
                "finished_at": job.finished_at,
            }

    def get_current_status(self) -> dict | None:
        """Single-job convenience (last/current)."""
        job_id = self.get_current_job_id()
        if job_id is None:
            return None
        return self.get_status(job_id)

    def get_current_job_id(self) -> str | None:
        """Return current job_id (or None)."""
        with self._lock:
            return None if self._job is None else self._job.job_id

    def get_analysis(self, job_id: str) -> dict | None:
        """Final profiling output (only after job completes)."""
        with self._lock:
            if self._job is None or self._job.job_id != job_id:
                return None
            return self._job.analysis

    def get_dataset(
        self,
        job_id: str,
        *,
        limit: int,
        offset: int = 0,
    ) -> dict[str, Any] | None:
        """Load dataset page (offset + limit) and include total rows."""
        with self._lock:
            if self._job is None or self._job.job_id != job_id:
                return None
            in_memory_dataset = self._job.dataset
            artifact_path = self._job.artifact_path
            job_status = self._job.status

        if in_memory_dataset is not None:
            total = len(in_memory_dataset)
            rows = in_memory_dataset[offset : offset + limit]
            return {"dataset": rows, "total": total}
        if not artifact_path:
            if job_status in {"completed", "error", "cancelled"}:
                return {"error": "artifact path missing"}
            return None

        try:
            base_dataset_path = Path(artifact_path)
            parquet_dir = base_dataset_path / "parquet-files"
            if not parquet_dir.exists():
                return {"error": f"dataset path missing: {parquet_dir}"}

            return self._load_dataset_page(
                parquet_dir = parquet_dir, limit = limit, offset = offset
            )
        except Exception as exc:
            return {"error": f"dataset load failed: {exc}"}

    @staticmethod
    def _load_dataset_page(
        *,
        parquet_dir: Path,
        limit: int,
        offset: int,
    ) -> dict[str, Any]:
        dataset_page = JobManager._load_dataset_page_with_duckdb(
            parquet_dir = parquet_dir,
            limit = limit,
            offset = offset,
        )
        if dataset_page is not None:
            return dataset_page
        return JobManager._load_dataset_page_with_data_designer(
            parquet_dir = parquet_dir,
            limit = limit,
            offset = offset,
        )

    @staticmethod
    def _load_dataset_page_with_duckdb(
        *,
        parquet_dir: Path,
        limit: int,
        offset: int,
    ) -> dict[str, Any] | None:
        parquet_glob = str((parquet_dir / "*.parquet").resolve())
        try:
            import duckdb  # type: ignore
        except Exception:
            return None

        try:
            conn = duckdb.connect(":memory:")
            try:
                total_row = conn.execute(
                    "SELECT COUNT(*) FROM read_parquet(?)",
                    [parquet_glob],
                ).fetchone()
                total = int(total_row[0] if total_row else 0)
                dataframe = conn.execute(
                    (
                        "SELECT *, row_number() OVER (PARTITION BY filename) AS __row_num__ "
                        "FROM read_parquet(?, filename=true) "
                        "ORDER BY filename, __row_num__ "
                        "LIMIT ? OFFSET ?"
                    ),
                    [parquet_glob, int(limit), int(offset)],
                ).fetchdf()
            finally:
                conn.close()
        except (RuntimeError, ValueError, duckdb.Error):
            return None

        for helper_col in ("filename", "__row_num__"):
            if helper_col in dataframe.columns:
                dataframe = dataframe.drop(columns = [helper_col])

        rows = dataframe.to_dict(orient = "records")
        return {"dataset": to_preview_jsonable(rows), "total": total}

    @staticmethod
    def _load_dataset_page_with_data_designer(
        *,
        parquet_dir: Path,
        limit: int,
        offset: int,
    ) -> dict[str, Any]:
        from data_designer.config.utils.io_helpers import read_parquet_dataset

        dataframe = read_parquet_dataset(parquet_dir)
        total = int(len(dataframe.index))
        rows = dataframe.iloc[offset : offset + limit].to_dict(orient = "records")
        return {"dataset": to_preview_jsonable(rows), "total": total}

    def subscribe(
        self, job_id: str, *, after_seq: int | None = None
    ) -> Subscription | None:
        """SSE subscribe: get replay buffer + live events stream."""
        with self._lock:
            if self._job is None or self._job.job_id != job_id:
                return None
            q: queue.Queue = queue.Queue(maxsize = 2000)
            self._subs.append(q)
            if after_seq is None:
                replay = list(self._events)
            else:
                replay = [e for e in self._events if int(e.get("seq") or 0) > after_seq]
            return Subscription(replay = replay, _q = q)

    def unsubscribe(self, sub: Subscription) -> None:
        """Drop SSE subscriber (client disconnected)."""
        with self._lock:
            self._subs = [q for q in self._subs if q is not sub._q]

    def _emit(self, event: dict) -> None:
        """Broadcast event to replay buffer + all subscribers."""
        self._seq += 1
        event["seq"] = self._seq
        self._events.append(event)
        stale: list[queue.Queue] = []
        for q in self._subs:
            try:
                q.put_nowait(event)
            except queue.Full:
                stale.append(q)
        if stale:
            self._subs = [q for q in self._subs if q not in stale]

    def _snapshot(self) -> tuple[Job, mp.Process, Any] | None:
        """Grab pointers for the pump loop (avoid holding lock too long)."""
        with self._lock:
            if self._job is None or self._proc is None or self._mp_q is None:
                return None
            return self._job, self._proc, self._mp_q

    @staticmethod
    def _read_queue_with_timeout(q: Any, *, timeout_sec: float) -> dict | None:
        """Try read 1 event from mp queue. Timeout = pump stays responsive."""
        try:
            return coerce_event(q.get(timeout = timeout_sec))
        except queue.Empty:
            return None
        except (EOFError, OSError, ValueError):
            return None

    @staticmethod
    def _drain_queue(q: Any) -> list[dict]:
        """Drain mp queue fast (used on process exit)."""
        events: list[dict] = []
        while True:
            try:
                events.append(coerce_event(q.get_nowait()))
            except queue.Empty:
                return events
            except (EOFError, OSError, ValueError):
                return events

    def _pump_loop(self) -> None:
        """Background thread: consumes worker events + updates job snapshot."""
        while True:
            snap = self._snapshot()
            if snap is None:
                return
            job, proc, mp_q = snap

            event = self._read_queue_with_timeout(mp_q, timeout_sec = 0.25)
            if event is not None:
                self._handle_event(job, event)
                continue

            if proc.is_alive():
                continue

            for e in self._drain_queue(mp_q):
                self._handle_event(job, e)

            with self._lock:
                if self._job and self._job.status in {
                    "pending",
                    "active",
                    "cancelling",
                }:
                    if self._job.status == "cancelling":
                        self._job.status = "cancelled"
                    else:
                        self._job.status = "error"
                        self._job.error = self._job.error or "process exited"
                    self._job.finished_at = time.time()
                    event_type = (
                        EVENT_JOB_CANCELLED
                        if self._job.status == "cancelled"
                        else EVENT_JOB_ERROR
                    )
                    self._emit(
                        {
                            "type": event_type,
                            "ts": time.time(),
                            "job_id": self._job.job_id,
                        }
                    )
            return

    def _handle_event(self, job: Job, event: dict) -> None:
        """Apply event -> job state + forward to SSE."""
        et = event.get("type")
        msg = event.get("message") if et == "log" else None

        with self._lock:
            if self._job is None or self._job.job_id != job.job_id:
                return
            if et == EVENT_JOB_STARTED:
                self._job.status = "active"
            if et == EVENT_JOB_COMPLETED:
                self._job.status = "completed"
                self._job.finished_at = time.time()
                self._job.analysis = event.get("analysis")
                self._job.artifact_path = event.get("artifact_path")
                self._job.execution_type = event.get("execution_type")
                self._job.dataset = event.get("dataset")
                self._job.processor_artifacts = event.get("processor_artifacts")
                if self._job.progress.total and self._job.progress.total > 0:
                    self._job.progress.done = self._job.progress.total
                    self._job.progress.percent = 100.0
            if et == EVENT_JOB_ERROR:
                self._job.status = "error"
                self._job.finished_at = time.time()
                self._job.error = event.get("error") or "error"

            if msg:
                upd = parse_log_message(msg)
                if upd:
                    apply_update(self._job, upd)

        self._emit(event)


_JOB_MANAGER: JobManager | None = None


def get_job_manager() -> JobManager:
    """Singleton JobManager (we only run 1 job anyway)."""
    global _JOB_MANAGER
    if _JOB_MANAGER is None:
        _JOB_MANAGER = JobManager()
    return _JOB_MANAGER


================================================
FILE: studio/backend/core/data_recipe/jobs/parse.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from __future__ import annotations

import re
from dataclasses import dataclass
from typing import Any

from .constants import (
    STAGE_BATCH,
    STAGE_COLUMN_CONFIG,
    STAGE_CREATE,
    STAGE_DAG,
    STAGE_GENERATING,
    STAGE_HEALTHCHECK,
    STAGE_PREVIEW,
    STAGE_PROFILING,
    STAGE_SAMPLING,
    USAGE_RESET_STAGES,
)
from .types import Job, ModelUsage, Progress


@dataclass(frozen = True)
class ParsedUpdate:
    stage: str | None = None
    current_column: str | None = None
    progress: Progress | None = None
    rows: int | None = None
    cols: int | None = None
    batch_idx: int | None = None
    batch_total: int | None = None
    usage_model: str | None = None
    usage_input_tokens: int | None = None
    usage_output_tokens: int | None = None
    usage_total_tokens: int | None = None
    usage_tps: float | None = None
    usage_requests_success: int | None = None
    usage_requests_failed: int | None = None
    usage_requests_total: int | None = None
    usage_rpm: float | None = None
    usage_section_start: bool | None = None


# kinda of a bummber but currently only option, Best effort parser from data-designer logs -> structured status for UI.
_RE_SAMPLERS = re.compile(
    r"Preparing samplers to generate (?P<rows>\d+) records across (?P<cols>\d+) columns"
)
_RE_COLCFG = re.compile(r"model config for column '(?P<col>[^']+)'")
_RE_PROCESSING_COL = re.compile(r"Processing .* column '(?P<col>[^']+)'")
_RE_PROGRESS = re.compile(
    r"progress: (?P<done>\d+)/(?P<total>\d+) \((?P<pct>\d+)%\) complete, "
    r"(?P<ok>\d+) ok, (?P<failed>\d+) failed, (?P<rate>[0-9.]+) rec/s, eta (?P<eta>[0-9.]+)s"
)
_RE_BATCH = re.compile(r"Processing batch (?P<idx>\d+) of (?P<total>\d+)")
_RE_USAGE_MODEL = re.compile(r"model:\s*(?P<model>.+)$")
_RE_USAGE_TOKENS = re.compile(
    r"tokens:\s*input=(?P<input>\d+),\s*output=(?P<output>\d+),\s*total=(?P<total>\d+),\s*tps=(?P<tps>[0-9.]+)"
)
_RE_USAGE_REQUESTS = re.compile(
    r"requests:\s*success=(?P<success>\d+),\s*failed=(?P<failed>\d+),\s*total=(?P<total>\d+),\s*rpm=(?P<rpm>[0-9.]+)"
)


def parse_log_message(msg: str) -> ParsedUpdate | None:
    m = _RE_SAMPLERS.search(msg)
    if m:
        return ParsedUpdate(
            stage = STAGE_SAMPLING,
            rows = int(m.group("rows")),
            cols = int(m.group("cols")),
        )

    if "Sorting column configs into a Directed Acyclic Graph" in msg:
        return ParsedUpdate(stage = STAGE_DAG)
    if "Running health checks for models" in msg:
        return ParsedUpdate(stage = STAGE_HEALTHCHECK)
    if "Preview generation in progress" in msg:
        return ParsedUpdate(stage = STAGE_PREVIEW)
    if "Creating Data Designer dataset" in msg:
        return ParsedUpdate(stage = STAGE_CREATE)
    if "Measuring dataset column statistics" in msg:
        return ParsedUpdate(stage = STAGE_PROFILING)

    m = _RE_COLCFG.search(msg)
    if m:
        col = m.group("col")
        return ParsedUpdate(stage = STAGE_COLUMN_CONFIG, current_column = col)

    m = _RE_PROCESSING_COL.search(msg)
    if m:
        col = m.group("col")
        return ParsedUpdate(stage = STAGE_GENERATING, current_column = col)

    m = _RE_PROGRESS.search(msg)
    if m:
        p = Progress(
            done = int(m.group("done")),
            total = int(m.group("total")),
            percent = float(m.group("pct")),
            ok = int(m.group("ok")),
            failed = int(m.group("failed")),
            rate = float(m.group("rate")),
            eta_sec = float(m.group("eta")),
        )
        return ParsedUpdate(stage = STAGE_GENERATING, progress = p)

    m = _RE_BATCH.search(msg)
    if m:
        return ParsedUpdate(
            stage = STAGE_BATCH,
            batch_idx = int(m.group("idx")),
            batch_total = int(m.group("total")),
        )

    if "Model usage summary" in msg:
        return ParsedUpdate(usage_section_start = True)

    m = _RE_USAGE_MODEL.search(msg)
    if m and "|-- model:" in msg:
        return ParsedUpdate(usage_model = str(m.group("model")).strip())

    m = _RE_USAGE_TOKENS.search(msg)
    if m:
        return ParsedUpdate(
            usage_input_tokens = int(m.group("input")),
            usage_output_tokens = int(m.group("output")),
            usage_total_tokens = int(m.group("total")),
            usage_tps = float(m.group("tps")),
        )

    m = _RE_USAGE_REQUESTS.search(msg)
    if m:
        return ParsedUpdate(
            usage_requests_success = int(m.group("success")),
            usage_requests_failed = int(m.group("failed")),
            usage_requests_total = int(m.group("total")),
            usage_rpm = float(m.group("rpm")),
        )

    return None


def apply_update(job: Job, update: ParsedUpdate) -> None:
    if update.stage is not None:
        job.stage = update.stage
    if update.current_column is not None:
        job.current_column = update.current_column
        if (
            update.stage == STAGE_GENERATING
            and update.current_column not in job._seen_generation_columns
        ):
            job._seen_generation_columns.append(update.current_column)
    if update.rows is not None:
        job.rows = update.rows
    if update.cols is not None:
        job.cols = update.cols
    if update.progress is not None:
        job.column_progress = update.progress
        if (
            job.current_column
            and update.progress.done is not None
            and update.progress.total is not None
            and update.progress.total > 0
            and update.progress.done >= update.progress.total
            and job.current_column not in job.completed_columns
        ):
            job.completed_columns.append(job.current_column)
        job.progress = _compute_overall_progress(job, update.progress)
    if update.batch_idx is not None:
        job.batch.idx = update.batch_idx
    if update.batch_total is not None:
        job.batch.total = update.batch_total

    if update.stage in USAGE_RESET_STAGES:
        # usage summary is a short block so we reset once we move into the next stage.
        job._in_usage_summary = False

    if update.usage_section_start is not None:
        job._in_usage_summary = update.usage_section_start
        if update.usage_section_start:
            job._current_usage_model = None

    if not job._in_usage_summary:
        return

    if update.usage_model is not None:
        name = update.usage_model.strip().strip("'").strip('"')
        job._current_usage_model = name
        if name not in job.model_usage:
            job.model_usage[name] = ModelUsage(model = name)

    if job._current_usage_model is None:
        return

    usage = job.model_usage.get(job._current_usage_model)
    if usage is None:
        return

    if update.usage_input_tokens is not None:
        usage.input_tokens = update.usage_input_tokens
    if update.usage_output_tokens is not None:
        usage.output_tokens = update.usage_output_tokens
    if update.usage_total_tokens is not None:
        usage.total_tokens = update.usage_total_tokens
    if update.usage_tps is not None:
        usage.tps = update.usage_tps
    if update.usage_requests_success is not None:
        usage.requests_success = update.usage_requests_success
    if update.usage_requests_failed is not None:
        usage.requests_failed = update.usage_requests_failed
    if update.usage_requests_total is not None:
        usage.requests_total = update.usage_requests_total
    if update.usage_rpm is not None:
        usage.rpm = update.usage_rpm


def _compute_overall_progress(job: Job, column_progress: Progress) -> Progress:
    if not job.rows:
        return column_progress

    total_rows = max(1, int(job.rows))
    current_done = 0 if column_progress.done is None else int(column_progress.done)
    current_done = max(0, min(current_done, total_rows))
    total_columns = max(1, int(job.progress_columns_total or 1))

    if job.current_column:
        job._column_done[job.current_column] = current_done

    if len(job._column_done) == 0:
        done = current_done
    else:
        sum_done = sum(
            max(0, min(value, total_rows)) for value in job._column_done.values()
        )
        done = int(sum_done / total_columns)

    prev_done = int(job.progress.done or 0)
    if done < prev_done:
        done = prev_done
    if done > total_rows:
        done = total_rows
    percent = (done / total_rows) * 100 if total_rows > 0 else 100.0
    prev_percent = float(job.progress.percent or 0.0)
    if percent < prev_percent:
        percent = prev_percent

    return Progress(
        done = done,
        total = total_rows,
        percent = percent,
        eta_sec = column_progress.eta_sec,
        rate = column_progress.rate,
        ok = column_progress.ok,
        failed = column_progress.failed,
    )


def coerce_event(obj: Any) -> dict:
    """Normalize worker payload into event dict."""
    return obj if isinstance(obj, dict) else {"type": "log", "message": str(obj)}


================================================
FILE: studio/backend/core/data_recipe/jobs/types.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any, Literal


JobStatus = Literal[
    "created",
    "pending",
    "active",
    "cancelling",
    "cancelled",
    "error",
    "completed",
]


@dataclass
class Progress:
    done: int | None = None
    total: int | None = None
    percent: float | None = None
    eta_sec: float | None = None
    rate: float | None = None
    ok: int | None = None
    failed: int | None = None


@dataclass
class BatchProgress:
    idx: int | None = None
    total: int | None = None


@dataclass
class ModelUsage:
    model: str
    input_tokens: int | None = None
    output_tokens: int | None = None
    total_tokens: int | None = None
    tps: float | None = None
    requests_success: int | None = None
    requests_failed: int | None = None
    requests_total: int | None = None
    rpm: float | None = None


@dataclass
class Job:
    job_id: str
    status: JobStatus = "created"
    stage: str | None = None
    current_column: str | None = None
    progress: Progress = field(default_factory = Progress)
    column_progress: Progress = field(default_factory = Progress)
    batch: BatchProgress = field(default_factory = BatchProgress)
    rows: int | None = None
    cols: int | None = None
    error: str | None = None
    started_at: float | None = None
    finished_at: float | None = None

    analysis: dict[str, Any] | None = None
    artifact_path: str | None = None
    execution_type: str | None = None
    dataset: list[dict[str, Any]] | None = None
    processor_artifacts: dict[str, Any] | None = None
    model_usage: dict[str, ModelUsage] = field(default_factory = dict)
    progress_columns_total: int | None = None
    completed_columns: list[str] = field(default_factory = list)
    _current_usage_model: str | None = None
    _in_usage_summary: bool = False
    _seen_generation_columns: list[str] = field(default_factory = list)
    _column_done: dict[str, int] = field(default_factory = dict)


================================================
FILE: studio/backend/core/data_recipe/jobs/worker.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from __future__ import annotations

import json
import structlog
import loggers
import logging
import re
import shutil
import time
import traceback
import unicodedata
from pathlib import Path
from typing import Any

from ..jsonable import to_jsonable, to_preview_jsonable
from .constants import EVENT_JOB_COMPLETED, EVENT_JOB_ERROR, EVENT_JOB_STARTED
from ..service import build_config_builder, create_data_designer
from utils.paths import ensure_dir, recipe_datasets_root

_ARTIFACT_ROOT = recipe_datasets_root()


class _QueueLogHandler(logging.Handler):
    def __init__(self, event_queue):
        super().__init__()
        self._q = event_queue

    def emit(self, record: logging.LogRecord) -> None:
        try:
            event = {
                "type": "log",
                "ts": record.created,
                "level": record.levelname,
                "logger": record.name,
                "message": record.getMessage(),
            }
            self._q.put(event)
        except (OSError, RuntimeError, ValueError):
            pass


def _slugify_run_name(value: str) -> str:
    normalized = unicodedata.normalize("NFKD", value)
    ascii_only = normalized.encode("ascii", "ignore").decode("ascii")
    slug = re.sub(r"[^a-zA-Z0-9]+", "-", ascii_only).strip("-").lower()
    if not slug:
        return ""
    return slug[:80].strip("-")


def _build_dataset_name(
    *, run_name: str | None, job_id: str, artifact_root: Path
) -> str:
    fallback = f"recipe_{job_id}"
    slug = _slugify_run_name(run_name or "")
    base_name = f"recipe_{slug}" if slug else fallback
    candidate = base_name
    suffix = 2
    while (artifact_root / candidate).exists():
        candidate = f"{base_name}_{suffix}"
        suffix += 1
    return candidate


def run_job_process(
    *,
    event_queue,
    recipe: dict[str, Any],
    run: dict[str, Any],
) -> None:
    """
    Subprocess entrypoint.
    Sends events to `event_queue`.
    """
    import os

    os.environ["PYTHONWARNINGS"] = (
        "ignore"  # Suppress warnings at C-level before imports
    )

    import warnings
    from loggers.config import LogConfig

    if os.getenv("ENVIRONMENT_TYPE", "production") == "production":
        warnings.filterwarnings("ignore")

    LogConfig.setup_logging(
        service_name = "unsloth-studio-data-worker",
        env = os.getenv("ENVIRONMENT_TYPE", "production"),
    )

    event_queue.put({"type": EVENT_JOB_STARTED, "ts": time.time()})

    try:
        from data_designer.config.run_config import RunConfig

        rows = int(run.get("rows") or 1000)
        job_id = str(run.get("_job_id") or "").strip()
        if not job_id:
            job_id = f"{int(time.time())}"
        run_name_raw = run.get("run_name")
        run_name = run_name_raw if isinstance(run_name_raw, str) else None
        dataset_name = _build_dataset_name(
            run_name = run_name,
            job_id = job_id,
            artifact_root = _ARTIFACT_ROOT,
        )
        merge_batches = bool(run.get("merge_batches"))
        ensure_dir(_ARTIFACT_ROOT)
        run_config_raw = run.get("run_config") or {}

        builder = build_config_builder(recipe)
        designer = create_data_designer(recipe, artifact_path = str(_ARTIFACT_ROOT))

        # DataDesigner configures root logging in DataDesigner.__init__.
        # Attach queue logger directly to `data_designer` so parser events survive root resets.
        handler = _QueueLogHandler(event_queue)
        handler.setLevel(logging.INFO)
        data_designer_logger = logging.getLogger("data_designer")
        data_designer_logger.addHandler(handler)
        data_designer_logger.setLevel(logging.INFO)
        data_designer_logger.propagate = True

        if run_config_raw:
            designer.set_run_config(RunConfig.model_validate(run_config_raw))

        execution_type = str(run.get("execution_type") or "full").strip().lower()
        if execution_type == "preview":
            results = designer.preview(builder, num_records = rows)
            analysis = (
                None
                if results.analysis is None
                else to_jsonable(results.analysis.model_dump(mode = "json"))
            )
            dataset = (
                []
                if results.dataset is None
                else to_preview_jsonable(results.dataset.to_dict(orient = "records"))
            )
            processor_artifacts = (
                None
                if results.processor_artifacts is None
                else to_jsonable(results.processor_artifacts)
            )
            event_queue.put(
                {
                    "type": EVENT_JOB_COMPLETED,
                    "ts": time.time(),
                    "analysis": analysis,
                    "dataset": dataset,
                    "processor_artifacts": processor_artifacts,
                    "artifact_path": None,
                    "execution_type": execution_type,
                }
            )
        else:
            results = designer.create(
                builder, num_records = rows, dataset_name = dataset_name
            )
            analysis = to_jsonable(results.load_analysis().model_dump(mode = "json"))
            if merge_batches:
                _merge_batches_to_single_parquet(
                    results.artifact_storage.base_dataset_path
                )
            artifact_path = str(results.artifact_storage.base_dataset_path)
            event_queue.put(
                {
                    "type": EVENT_JOB_COMPLETED,
                    "ts": time.time(),
                    "analysis": analysis,
                    "artifact_path": artifact_path,
                    "execution_type": execution_type,
                }
            )
    except Exception as exc:
        event_queue.put(
            {
                "type": EVENT_JOB_ERROR,
                "ts": time.time(),
                "error": str(exc),
                "stack": traceback.format_exc(limit = 20),
            }
        )


def _merge_batches_to_single_parquet(base_dataset_path: Path) -> None:
    parquet_dir = base_dataset_path / "parquet-files"
    parquet_files = sorted(parquet_dir.glob("*.parquet"))
    if len(parquet_files) <= 1:
        return

    try:
        from data_designer.config.utils.io_helpers import read_parquet_dataset
    except ImportError:
        return

    dataframe = read_parquet_dataset(parquet_dir)
    shutil.rmtree(parquet_dir)
    parquet_dir.mkdir(parents = True, exist_ok = True)
    merged_file = parquet_dir / "batch_00000.parquet"
    dataframe.to_parquet(merged_file, index = False)
    _rewrite_merged_metadata(
        base_dataset_path = base_dataset_path,
        parquet_file = merged_file,
    )


def _rewrite_merged_metadata(*, base_dataset_path: Path, parquet_file: Path) -> None:
    metadata_path = base_dataset_path / "metadata.json"
    if not metadata_path.exists():
        return

    try:
        metadata = json.loads(metadata_path.read_text(encoding = "utf-8"))
    except (OSError, TypeError, ValueError):
        return

    if not isinstance(metadata, dict):
        return

    relative_parquet_path = str(parquet_file.relative_to(base_dataset_path))
    file_paths = metadata.get("file_paths")
    if not isinstance(file_paths, dict):
        file_paths = {}
    file_paths["parquet-files"] = [relative_parquet_path]
    metadata["file_paths"] = file_paths
    metadata["total_num_batches"] = 1
    metadata["num_completed_batches"] = 1

    try:
        metadata_path.write_text(
            json.dumps(metadata, indent = 2, sort_keys = True),
            encoding = "utf-8",
        )
    except OSError:
        return


================================================
FILE: studio/backend/core/data_recipe/jsonable.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from __future__ import annotations

import base64
import io
from pathlib import Path
from typing import Any


def _pil_to_preview_payload(image: Any) -> dict[str, Any]:
    buffer = io.BytesIO()
    image.convert("RGB").save(buffer, format = "JPEG", quality = 85)
    return {
        "type": "image",
        "mime": "image/jpeg",
        "width": image.width,
        "height": image.height,
        "data": base64.b64encode(buffer.getvalue()).decode("ascii"),
    }


def _open_pil_image_from_bytes(raw_bytes: bytes):
    from PIL import Image  # type: ignore

    with Image.open(io.BytesIO(raw_bytes)) as image:
        return image.copy()


def _to_pil_from_hf_image_dict(value: Any) -> Any | None:
    if not isinstance(value, dict):
        return None

    raw_bytes = value.get("bytes")
    if isinstance(raw_bytes, (bytes, bytearray)) and len(raw_bytes) > 0:
        try:
            return _open_pil_image_from_bytes(bytes(raw_bytes))
        except (OSError, ValueError):
            pass
    if (
        isinstance(raw_bytes, list)
        and len(raw_bytes) > 0
        and all(isinstance(item, int) and 0 <= item <= 255 for item in raw_bytes)
    ):
        try:
            return _open_pil_image_from_bytes(bytes(raw_bytes))
        except (OSError, ValueError):
            pass

    path_value = value.get("path")
    if isinstance(path_value, str) and path_value.strip():
        try:
            from PIL import Image  # type: ignore

            with Image.open(Path(path_value)) as image:
                return image.copy()
        except (OSError, ValueError, TypeError):
            return None

    return None


def to_jsonable(value: Any) -> Any:
    """Convert numpy/pandas-ish values into plain JSON-safe values."""
    try:
        import numpy as np  # type: ignore
    except ImportError:  # pragma: no cover
        np = None  # type: ignore

    if np is not None:
        if isinstance(value, np.ndarray):
            return value.tolist()
        if isinstance(value, np.generic):
            return value.item()

    if isinstance(value, dict):
        return {str(k): to_jsonable(v) for k, v in value.items()}
    if isinstance(value, (list, tuple, set)):
        return [to_jsonable(v) for v in value]

    if hasattr(value, "isoformat") and callable(value.isoformat):
        try:
            return value.isoformat()
        except (TypeError, ValueError):
            return value

    return value


def _to_preview_image_payload(value: Any) -> dict[str, Any] | None:
    try:
        from PIL.Image import Image as PILImage  # type: ignore
    except ImportError:  # pragma: no cover
        return None

    if not isinstance(value, PILImage):
        hf_image = _to_pil_from_hf_image_dict(value)
        if hf_image is None:
            return None
        value = hf_image

    return _pil_to_preview_payload(value)


def to_preview_jsonable(value: Any) -> Any:
    """Convert values into JSON-safe preview values, including PIL images."""
    image_payload = _to_preview_image_payload(value)
    if image_payload is not None:
        return image_payload

    converted = to_jsonable(value)
    if converted is None or isinstance(converted, (str, int, float, bool)):
        return converted
    if isinstance(converted, dict):
        return {str(k): to_preview_jsonable(v) for k, v in converted.items()}
    if isinstance(converted, (list, tuple, set)):
        return [to_preview_jsonable(v) for v in converted]
    if isinstance(converted, (bytes, bytearray)):
        return base64.b64encode(bytes(converted)).decode("ascii")
    return str(converted)


================================================
FILE: studio/backend/core/data_recipe/local_callable_validators.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from __future__ import annotations

import json
import os
import structlog
import subprocess
from copy import deepcopy
from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path
from typing import Any

from loggers import get_logger
from utils.paths import ensure_dir, oxc_validator_tmp_root

logger = get_logger(__name__)

OXC_VALIDATION_FN_MARKER = "unsloth_oxc_validator"

_OXC_LANG_TO_NODE_LANG = {
    "javascript": "js",
    "typescript": "ts",
    "jsx": "jsx",
    "tsx": "tsx",
}
_OXC_VALIDATION_MODES = {"syntax", "lint", "syntax+lint"}
_OXC_CODE_SHAPES = {"auto", "module", "snippet"}

_OXC_TOOL_DIR = Path(__file__).resolve().parent / "oxc-validator"
_OXC_RUNNER_PATH = _OXC_TOOL_DIR / "validate.mjs"


@dataclass(frozen = True)
class OxcLocalCallableValidatorSpec:
    name: str
    drop: bool
    target_columns: list[str]
    batch_size: int
    code_lang: str
    validation_mode: str
    code_shape: str


def split_oxc_local_callable_validators(
    recipe_core: dict[str, Any],
) -> tuple[dict[str, Any], list[OxcLocalCallableValidatorSpec]]:
    columns = recipe_core.get("columns")
    if not isinstance(columns, list):
        return recipe_core, []

    sanitized = deepcopy(recipe_core)
    sanitized_columns = sanitized.get("columns")
    if not isinstance(sanitized_columns, list):
        return sanitized, []

    kept_columns: list[Any] = []
    oxc_specs: list[OxcLocalCallableValidatorSpec] = []

    for column in sanitized_columns:
        if not isinstance(column, dict):
            kept_columns.append(column)
            continue

        maybe_spec = _parse_oxc_spec(column = column)
        if maybe_spec is None:
            kept_columns.append(column)
            continue
        oxc_specs.append(maybe_spec)

    sanitized["columns"] = kept_columns
    return sanitized, oxc_specs


def register_oxc_local_callable_validators(
    *,
    builder,
    specs: list[OxcLocalCallableValidatorSpec],
) -> None:
    if not specs:
        return

    from data_designer.config.column_configs import ValidationColumnConfig
    from data_designer.config.validator_params import (
        LocalCallableValidatorParams,
        ValidatorType,
    )

    for spec in specs:
        validation_function = _build_oxc_validation_function(
            spec.code_lang,
            spec.validation_mode,
            spec.code_shape,
        )
        builder.add_column(
            ValidationColumnConfig(
                name = spec.name,
                drop = spec.drop,
                target_columns = spec.target_columns,
                validator_type = ValidatorType.LOCAL_CALLABLE,
                validator_params = LocalCallableValidatorParams(
                    validation_function = validation_function,
                ),
                batch_size = spec.batch_size,
            )
        )


def _parse_oxc_spec(
    *,
    column: dict[str, Any],
) -> OxcLocalCallableValidatorSpec | None:
    if str(column.get("column_type") or "").strip() != "validation":
        return None
    if str(column.get("validator_type") or "").strip() != "local_callable":
        return None

    params = column.get("validator_params")
    if not isinstance(params, dict):
        return None

    fn_raw = params.get("validation_function")
    fn_name = fn_raw.strip() if isinstance(fn_raw, str) else ""
    if not fn_name.startswith(OXC_VALIDATION_FN_MARKER):
        return None

    name = str(column.get("name") or "").strip()
    if not name:
        return None

    target_columns_raw = column.get("target_columns")
    target_columns = (
        [
            value.strip()
            for value in target_columns_raw
            if isinstance(value, str) and value.strip()
        ]
        if isinstance(target_columns_raw, list)
        else []
    )
    if not target_columns:
        return None

    code_lang, validation_mode, code_shape = _parse_oxc_validation_marker(fn_name)
    batch_size = _parse_batch_size(column.get("batch_size"))
    drop = bool(column.get("drop") is True)

    return OxcLocalCallableValidatorSpec(
        name = name,
        drop = drop,
        target_columns = target_columns,
        batch_size = batch_size,
        code_lang = code_lang,
        validation_mode = validation_mode,
        code_shape = code_shape,
    )


def _parse_batch_size(value: Any) -> int:
    try:
        parsed = int(value)
    except (TypeError, ValueError):
        return 10
    return parsed if parsed >= 1 else 10


def _parse_oxc_validation_marker(fn_name: str) -> tuple[str, str, str]:
    marker = f"{OXC_VALIDATION_FN_MARKER}:"
    if not fn_name.startswith(marker):
        return "javascript", "syntax", "auto"
    suffix = fn_name[len(marker) :]
    parts = [part.strip() for part in suffix.split(":") if part.strip()]
    if len(parts) < 2:
        return "javascript", "syntax", "auto"
    code_lang = parts[0] if parts[0] in _OXC_LANG_TO_NODE_LANG else "javascript"
    mode = parts[1] if parts[1] in _OXC_VALIDATION_MODES else "syntax"
    code_shape = (
        parts[2] if len(parts) >= 3 and parts[2] in _OXC_CODE_SHAPES else "auto"
    )
    return code_lang, mode, code_shape


@lru_cache(maxsize = 8)
def _build_oxc_validation_function(lang: str, validation_mode: str, code_shape: str):
    node_lang = _OXC_LANG_TO_NODE_LANG.get(lang, "js")
    mode = validation_mode if validation_mode in _OXC_VALIDATION_MODES else "syntax"
    normalized_code_shape = code_shape if code_shape in _OXC_CODE_SHAPES else "auto"

    def _validator(df):
        import pandas as pd  # imported lazily for local callable runtime

        row_count = int(len(df.index))
        if row_count == 0:
            return pd.DataFrame({"is_valid": []})

        code_column = str(df.columns[0]) if len(df.columns) > 0 else ""
        code_values = (
            ["" for _ in range(row_count)]
            if not code_column
            else [
                "" if value is None else str(value)
                for value in df[code_column].tolist()
            ]
        )

        results = _run_oxc_batch(
            node_lang = node_lang,
            validation_mode = mode,
            code_shape = normalized_code_shape,
            code_values = code_values,
        )
        if len(results) != row_count:
            results = _fallback_results(
                row_count,
                "OXC validator returned mismatched result size.",
            )
        return pd.DataFrame(results)

    _validator.__name__ = f"{OXC_VALIDATION_FN_MARKER}_{node_lang}_{mode.replace('+', '_')}_{normalized_code_shape}"
    return _validator


def _run_oxc_batch(
    *,
    node_lang: str,
    validation_mode: str,
    code_shape: str,
    code_values: list[str],
) -> list[dict[str, Any]]:
    if not _OXC_RUNNER_PATH.exists():
        return _fallback_results(
            len(code_values),
            f"OXC runner missing at {_OXC_RUNNER_PATH}",
        )

    payload = {
        "lang": node_lang,
        "mode": validation_mode,
        "code_shape": code_shape,
        "codes": code_values,
    }
    try:
        tmp_dir = ensure_dir(oxc_validator_tmp_root())
        env = dict(os.environ)
        tmp_dir_str = str(tmp_dir)
        env["TMPDIR"] = tmp_dir_str
        env["TMP"] = tmp_dir_str
        env["TEMP"] = tmp_dir_str
        proc = subprocess.run(
            ["node", str(_OXC_RUNNER_PATH)],
            cwd = str(_OXC_TOOL_DIR),
            input = json.dumps(payload),
            text = True,
            capture_output = True,
            check = False,
            env = env,
        )
    except (OSError, ValueError) as exc:
        logger.warning("OXC subprocess launch failed: %s", exc)
        return _fallback_results(len(code_values), f"OXC launch failed: {exc}")

    if proc.returncode != 0:
        message = (proc.stderr or proc.stdout or "unknown error").strip()
        if len(message) > 300:
            message = f"{message[:300]}..."
        return _fallback_results(len(code_values), f"OXC failed: {message}")

    try:
        raw = json.loads(proc.stdout)
    except json.JSONDecodeError:
        return _fallback_results(len(code_values), "OXC output parse failed.")

    if not isinstance(raw, list):
        return _fallback_results(len(code_values), "OXC output must be an array.")

    out: list[dict[str, Any]] = []
    for item in raw:
        if not isinstance(item, dict):
            out.append(
                {
                    "is_valid": False,
                    "error_count": 1,
                    "error_message": "Invalid OXC result entry.",
                    "severity": None,
                    "code": None,
                    "labels": [],
                    "codeframe": None,
                    "warning_count": 0,
                }
            )
            continue
        is_valid_raw = item.get("is_valid")
        error_count_raw = item.get("error_count")
        message_raw = item.get("error_message")
        severity_raw = item.get("severity")
        code_raw = item.get("code")
        labels_raw = item.get("labels")
        codeframe_raw = item.get("codeframe")
        warning_count_raw = item.get("warning_count")
        out.append(
            {
                "is_valid": bool(is_valid_raw)
                if isinstance(is_valid_raw, bool)
                else False,
                "error_count": int(error_count_raw)
                if isinstance(error_count_raw, int)
                else 0,
                "error_message": str(message_raw or ""),
                "severity": str(severity_raw)
                if isinstance(severity_raw, str)
                else None,
                "code": str(code_raw) if isinstance(code_raw, str) else None,
                "labels": labels_raw if isinstance(labels_raw, list) else [],
                "codeframe": str(codeframe_raw)
                if isinstance(codeframe_raw, str)
                else None,
                "warning_count": int(warning_count_raw)
                if isinstance(warning_count_raw, int)
                else 0,
            }
        )
    return out


def _fallback_results(row_count: int, message: str) -> list[dict[str, Any]]:
    return [
        {
            "is_valid": False,
            "error_count": 1,
            "error_message": message,
            "severity": None,
            "code": None,
            "labels": [],
            "codeframe": None,
            "warning_count": 0,
        }
        for _ in range(row_count)
    ]


================================================
FILE: studio/backend/core/data_recipe/oxc-validator/package.json
================================================
{
  "name": "unsloth-oxc-validator-runtime",
  "private": true,
  "version": "0.0.1",
  "type": "module",
  "dependencies": {
    "oxc-parser": "^0.116.0",
    "oxlint": "^1.51.0"
  }
}


================================================
FILE: studio/backend/core/data_recipe/oxc-validator/validate.mjs
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { spawnSync } from "node:child_process";
import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { basename, dirname, join } from "node:path";
import { fileURLToPath } from "node:url";
import { parseSync } from "oxc-parser";

const LANG_TO_EXT = {
  js: "js",
  jsx: "jsx",
  ts: "ts",
  tsx: "tsx",
};

const VALIDATION_MODES = new Set(["syntax", "lint", "syntax+lint"]);
const CODE_SHAPES = new Set(["auto", "module", "snippet"]);
const SNIPPET_PREFIX = "(() => {\n";
const SNIPPET_SUFFIX = "\n})();\nexport {};\n";
const OXLINT_SUPPRESSED_RULES = ["no-unused-vars", "no-new-array"];
const TOOL_DIR = dirname(fileURLToPath(import.meta.url));

function mapLang(value) {
  const normalized = String(value || "").trim().toLowerCase();
  if (normalized === "javascript" || normalized === "js") {
    return "js";
  }
  if (normalized === "typescript" || normalized === "ts") {
    return "ts";
  }
  if (normalized === "jsx") {
    return "jsx";
  }
  if (normalized === "tsx") {
    return "tsx";
  }
  return "js";
}

function mapMode(value) {
  const normalized = String(value || "").trim().toLowerCase();
  if (VALIDATION_MODES.has(normalized)) {
    return normalized;
  }
  return "syntax";
}

function mapCodeShape(value) {
  const normalized = String(value || "").trim().toLowerCase();
  if (CODE_SHAPES.has(normalized)) {
    return normalized;
  }
  return "auto";
}

function parseFileIndex(filePath) {
  if (typeof filePath !== "string") {
    return null;
  }
  const match = basename(filePath).match(/^snippet_(\d+)\./);
  if (!match) {
    return null;
  }
  const parsed = Number.parseInt(match[1], 10);
  return Number.isFinite(parsed) ? parsed : null;
}

function toCodeString(code) {
  return typeof code === "string" ? code : String(code ?? "");
}

function makeValidationEntry({ code, index, lang, codeShape }) {
  const source = toCodeString(code);
  if (codeShape === "snippet") {
    return {
      index,
      lang,
      code: `${SNIPPET_PREFIX}${source}${SNIPPET_SUFFIX}`,
      offset: SNIPPET_PREFIX.length,
    };
  }
  return {
    index,
    lang,
    code: source,
    offset: 0,
  };
}

function shiftOffset(value, offset) {
  if (!Number.isInteger(value)) {
    return null;
  }
  const shifted = value - offset;
  return shifted >= 0 ? shifted : null;
}

function remapDiagnosticOffsets(diagnostic, offset) {
  if (!diagnostic || typeof diagnostic !== "object" || offset <= 0) {
    return diagnostic;
  }
  return {
    ...diagnostic,
    labels: Array.isArray(diagnostic.labels)
      ? diagnostic.labels.map((label) => ({
          ...label,
          start: shiftOffset(label.start, offset),
          end: shiftOffset(label.end, offset),
        }))
      : [],
  };
}

function normalizeParserError(error) {
  if (typeof error === "string") {
    return {
      code: null,
      message: error.trim() || "Unknown parser error",
      severity: null,
      labels: [],
      codeframe: null,
    };
  }
  if (!error || typeof error !== "object") {
    return {
      code: null,
      message: "Unknown parser error",
      severity: null,
      labels: [],
      codeframe: null,
    };
  }
  const code = typeof error.code === "string" ? error.code : null;
  const message = String(error.message || error.reason || "").trim() || "Unknown parser error";
  const severity = typeof error.severity === "string" ? error.severity : null;
  const labels = Array.isArray(error.labels)
    ? error.labels.map((label) => ({
        message:
          label && typeof label === "object" && typeof label.message === "string"
            ? label.message
            : null,
        start:
          label && typeof label === "object" && Number.isInteger(label.start)
            ? label.start
            : null,
        end:
          label && typeof label === "object" && Number.isInteger(label.end)
            ? label.end
            : null,
      }))
    : [];
  const codeframe = typeof error.codeframe === "string" ? error.codeframe : null;
  return {
    code,
    message,
    severity,
    labels,
    codeframe,
  };
}

function normalizeLintDiagnostic(diagnostic) {
  if (!diagnostic || typeof diagnostic !== "object") {
    return null;
  }

  const readString = (value) =>
    typeof value === "string" ? value : null;
  const readInt = (value) =>
    Number.isInteger(value) ? value : null;
  const asObject = (value) =>
    value && typeof value === "object" ? value : null;

  const message = String(diagnostic.message || "").trim();
  if (!message) {
    return null;
  }

  const severityRaw = String(diagnostic.severity || "").trim().toLowerCase();
  const severity = severityRaw === "error" ? "error" : "warning";

  const labels = [];
  if (Array.isArray(diagnostic.labels)) {
    for (const label of diagnostic.labels) {
      const labelObj = asObject(label);
      const span = asObject(labelObj?.span);
      const start = readInt(span?.offset);
      const length = readInt(span?.length);
      labels.push({
        message: readString(labelObj?.label),
        start,
        end: start !== null && length !== null ? start + length : null,
      });
    }
  }

  const code = typeof diagnostic.code === "string" ? diagnostic.code : null;
  return {
    code,
    message: code ? `${code}: ${message}` : message,
    severity,
    labels,
    codeframe: null,
  };
}

function makeResult({
  isValid,
  errorCount,
  warningCount = 0,
  message = "",
  severity = null,
  code = null,
  labels = [],
  codeframe = null,
}) {
  return {
    is_valid: Boolean(isValid),
    error_count: Number.isInteger(errorCount) ? errorCount : 0,
    warning_count: Number.isInteger(warningCount) ? warningCount : 0,
    error_message: String(message || ""),
    severity: typeof severity === "string" ? severity : null,
    code: typeof code === "string" ? code : null,
    labels: Array.isArray(labels) ? labels : [],
    codeframe: typeof codeframe === "string" ? codeframe : null,
  };
}

function syntaxResultFromErrors(errors) {
  const first = errors[0] ?? null;
  return makeResult({
    isValid: errors.length === 0,
    errorCount: errors.length,
    warningCount: 0,
    message: errors.slice(0, 3).map((error) => error.message).join(" | "),
    severity: first ? first.severity : null,
    code: first ? first.code : null,
    labels: first ? first.labels : [],
    codeframe: first ? first.codeframe : null,
  });
}

function runSyntaxParse(entry) {
  const ext = LANG_TO_EXT[entry.lang] ?? "js";
  const filename = `snippet_${entry.index}.${ext}`;
  try {
    const parsed = parseSync(filename, entry.code, {
      lang: entry.lang,
      sourceType: "module",
      showSemanticErrors: true,
    });
    const errors = Array.isArray(parsed?.errors)
      ? parsed.errors
          .map(normalizeParserError)
          .filter(Boolean)
          .map((error) => remapDiagnosticOffsets(error, entry.offset))
      : [];
    return errors;
  } catch (error) {
    return [
      remapDiagnosticOffsets(
        normalizeParserError(error),
        entry.offset,
      ),
    ];
  }
}

function pickPreferredErrorList(firstErrors, secondErrors) {
  if (secondErrors.length < firstErrors.length) {
    return secondErrors;
  }
  return firstErrors;
}

function validateSyntaxOne({ code, lang, index, codeShape }) {
  if (codeShape !== "auto") {
    const lintEntry = makeValidationEntry({
      code,
      index,
      lang,
      codeShape,
    });
    const errors = runSyntaxParse(lintEntry);
    return {
      result: syntaxResultFromErrors(errors),
      lintEntry,
    };
  }

  const moduleEntry = makeValidationEntry({
    code,
    index,
    lang,
    codeShape: "module",
  });
  const moduleErrors = runSyntaxParse(moduleEntry);
  if (moduleErrors.length === 0) {
    return {
      result: syntaxResultFromErrors(moduleErrors),
      lintEntry: moduleEntry,
    };
  }

  const snippetEntry = makeValidationEntry({
    code,
    index,
    lang,
    codeShape: "snippet",
  });
  const snippetErrors = runSyntaxParse(snippetEntry);
  if (snippetErrors.length === 0) {
    return {
      result: syntaxResultFromErrors(snippetErrors),
      lintEntry: snippetEntry,
    };
  }

  const chosenErrors = pickPreferredErrorList(moduleErrors, snippetErrors);
  const lintEntry = chosenErrors === snippetErrors ? snippetEntry : moduleEntry;
  return {
    result: syntaxResultFromErrors(chosenErrors),
    lintEntry,
  };
}

function resolveLintEntry({ code, lang, index, codeShape }) {
  if (codeShape !== "auto") {
    return makeValidationEntry({
      code,
      index,
      lang,
      codeShape,
    });
  }

  const moduleEntry = makeValidationEntry({
    code,
    index,
    lang,
    codeShape: "module",
  });
  if (runSyntaxParse(moduleEntry).length === 0) {
    return moduleEntry;
  }

  const snippetEntry = makeValidationEntry({
    code,
    index,
    lang,
    codeShape: "snippet",
  });
  if (runSyntaxParse(snippetEntry).length === 0) {
    return snippetEntry;
  }

  return moduleEntry;
}

function fallbackLintResults(entries, message) {
  return new Map(
    entries.map((entry) => [
      entry.index,
      makeResult({
        isValid: false,
        errorCount: 1,
        warningCount: 0,
        message,
        severity: "error",
      }),
    ]),
  );
}

function runLintBatch(entries) {
  if (entries.length === 0) {
    return new Map();
  }

  const entryByIndex = new Map(entries.map((entry) => [entry.index, entry]));
  const tempDir = mkdtempSync(join(tmpdir(), "oxlint-"));
  try {
    for (const entry of entries) {
      const ext = LANG_TO_EXT[entry.lang] ?? "js";
      const filePath = join(tempDir, `snippet_${entry.index}.${ext}`);
      writeFileSync(filePath, entry.code, "utf8");
    }

    const oxlintBin = join(TOOL_DIR, "node_modules", ".bin", "oxlint");
    const oxlintArgs = [
      ...OXLINT_SUPPRESSED_RULES.flatMap((rule) => ["-A", rule]),
      "--format",
      "json",
      tempDir,
    ];
    const exec = spawnSync(oxlintBin, oxlintArgs, {
      encoding: "utf8",
      cwd: TOOL_DIR,
    });
    if (exec.error) {
      return fallbackLintResults(
        entries,
        `oxlint execution failed: ${exec.error.message}`,
      );
    }
    const stdout = String(exec.stdout || "").trim();
    if (!stdout) {
      const stderr = String(exec.stderr || "").trim();
      return fallbackLintResults(
        entries,
        stderr || "oxlint returned empty output",
      );
    }

    let parsed;
    try {
      parsed = JSON.parse(stdout);
    } catch {
      return fallbackLintResults(entries, "oxlint JSON parse failed");
    }

    const rawDiagnostics = Array.isArray(parsed?.diagnostics)
      ? parsed.diagnostics
      : [];
    const byIndex = new Map();

    for (const diag of rawDiagnostics) {
      const filenameRaw =
        typeof diag?.filename === "string" ? diag.filename : "";
      const filename = filenameRaw.startsWith("file://")
        ? filenameRaw.replace("file://", "")
        : filenameRaw;
      const index = parseFileIndex(filename);
      if (index === null) {
        continue;
      }
      const normalized = normalizeLintDiagnostic(diag);
      if (!normalized) {
        continue;
      }
      const entry = entryByIndex.get(index);
      const remapped = remapDiagnosticOffsets(normalized, entry?.offset ?? 0);
      const list = byIndex.get(index) ?? [];
      list.push(remapped);
      byIndex.set(index, list);
    }

    const results = new Map();
    for (const entry of entries) {
      const diagnostics = byIndex.get(entry.index) ?? [];
      const errorDiagnostics = diagnostics.filter(
        (diag) => diag.severity === "error",
      );
      const warningDiagnostics = diagnostics.filter(
        (diag) => diag.severity !== "error",
      );
      const top = errorDiagnostics[0] ?? warningDiagnostics[0] ?? null;
      const messageSource =
        errorDiagnostics.length > 0 ? errorDiagnostics : warningDiagnostics;
      results.set(
        entry.index,
        makeResult({
          isValid: errorDiagnostics.length === 0,
          errorCount: errorDiagnostics.length,
          warningCount: warningDiagnostics.length,
          message: messageSource
            .slice(0, 3)
            .map((diag) => diag.message)
            .join(" | "),
          severity: top ? top.severity : null,
          code: top ? top.code : null,
          labels: top ? top.labels : [],
          codeframe: top ? top.codeframe : null,
        }),
      );
    }
    return results;
  } catch (error) {
    return fallbackLintResults(entries, `oxlint execution failed: ${error}`);
  } finally {
    rmSync(tempDir, { recursive: true, force: true });
  }
}

function readStdin() {
  return new Promise((resolve, reject) => {
    let data = "";
    process.stdin.setEncoding("utf8");
    process.stdin.on("data", (chunk) => {
      data += chunk;
    });
    process.stdin.on("end", () => resolve(data));
    process.stdin.on("error", (error) => reject(error));
  });
}

function runValidation({ codes, lang, mode, codeShape }) {
  if (mode === "syntax") {
    return codes.map((code, index) =>
      validateSyntaxOne({ code, lang, index, codeShape }).result,
    );
  }

  if (mode === "lint") {
    const entries = codes.map((code, index) =>
      resolveLintEntry({ code, lang, index, codeShape }),
    );
    const lintMap = runLintBatch(entries);
    return entries.map(
      (entry) =>
        lintMap.get(entry.index) ??
        makeResult({
          isValid: true,
          errorCount: 0,
          warningCount: 0,
        }),
    );
  }

  const syntaxRuns = codes.map((code, index) =>
    validateSyntaxOne({ code, lang, index, codeShape }),
  );
  const lintTargets = syntaxRuns
    .filter((run) => run.result.is_valid === true)
    .map((run) => run.lintEntry);
  const lintMap = runLintBatch(lintTargets);

  return syntaxRuns.map((run) => {
    if (run.result.is_valid !== true) {
      return run.result;
    }
    return (
      lintMap.get(run.lintEntry.index) ??
      makeResult({
        isValid: true,
        errorCount: 0,
        warningCount: 0,
      })
    );
  });
}

async function main() {
  const raw = await readStdin();
  let payload;
  try {
    payload = JSON.parse(raw || "{}");
  } catch {
    process.stdout.write(
      JSON.stringify([
        makeResult({
          isValid: false,
          errorCount: 1,
          warningCount: 0,
          message: "Invalid JSON payload",
          severity: "error",
        }),
      ]),
    );
    return;
  }

  const lang = mapLang(payload?.lang);
  const mode = mapMode(payload?.mode);
  const codeShape = mapCodeShape(payload?.code_shape);
  const codes = Array.isArray(payload?.codes) ? payload.codes : [];
  const out = runValidation({ codes, lang, mode, codeShape });
  process.stdout.write(JSON.stringify(out));
}

main().catch((error) => {
  process.stderr.write(String(error?.stack || error));
  process.exit(1);
});


================================================
FILE: studio/backend/core/data_recipe/service.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from __future__ import annotations

import base64
import io
import os
from pathlib import Path
from typing import Any

from .jsonable import to_jsonable
from .local_callable_validators import (
    register_oxc_local_callable_validators,
    split_oxc_local_callable_validators,
)

_IMAGE_CONTEXT_PATCHED = False


def _encode_bytes_to_base64(value: bytes | bytearray) -> str:
    return base64.b64encode(bytes(value)).decode("utf-8")


def _load_image_file_to_base64(
    path_value: str, *, base_path: str | None = None
) -> str | None:
    try:
        path = Path(path_value)
        candidates: list[Path] = []
        if path.is_absolute():
            candidates.append(path)
        else:
            if base_path:
                candidates.append(Path(base_path) / path)
            candidates.append(Path.cwd() / path)

        for candidate in candidates:
            if not candidate.exists() or not candidate.is_file():
                continue
            with candidate.open("rb") as f:
                return _encode_bytes_to_base64(f.read())
    except (OSError, TypeError, ValueError):
        return None
    return None


def _pil_image_to_base64(value: Any) -> str | None:
    try:
        from PIL.Image import Image as PILImage  # type: ignore
    except ImportError:
        return None
    if not isinstance(value, PILImage):
        return None
    buffer = io.BytesIO()
    image_format = str(getattr(value, "format", "") or "").upper()
    if image_format not in {"PNG", "JPEG", "JPG", "WEBP", "GIF"}:
        image_format = "PNG"
    value.save(buffer, format = image_format)
    return _encode_bytes_to_base64(buffer.getvalue())


def _normalize_image_context_value(value: Any, *, base_path: str | None = None) -> Any:
    if isinstance(value, str):
        return value

    if isinstance(value, (bytes, bytearray)):
        return _encode_bytes_to_base64(value)

    pil_base64 = _pil_image_to_base64(value)
    if pil_base64 is not None:
        return pil_base64

    if isinstance(value, dict):
        url = value.get("url")
        if isinstance(url, str):
            return url

        image_url = value.get("image_url")
        if isinstance(image_url, str):
            return image_url
        if isinstance(image_url, dict):
            nested_url = image_url.get("url")
            if isinstance(nested_url, str):
                return nested_url

        inline_data = value.get("data")
        if isinstance(inline_data, str):
            return inline_data

        raw_bytes = value.get("bytes")
        if isinstance(raw_bytes, (bytes, bytearray)):
            return _encode_bytes_to_base64(raw_bytes)
        if isinstance(raw_bytes, str) and raw_bytes.strip():
            return raw_bytes

        path_value = value.get("path")
        if isinstance(path_value, str) and path_value.strip():
            if as_base64 := _load_image_file_to_base64(path_value, base_path = base_path):
                return as_base64
            return path_value

    return value


def _apply_data_designer_image_context_patch() -> None:
    global _IMAGE_CONTEXT_PATCHED
    if _IMAGE_CONTEXT_PATCHED:
        return

    try:
        from data_designer.config.models import ImageContext
    except ImportError:
        return

    if getattr(ImageContext, "_unsloth_image_context_patch_applied", False):
        _IMAGE_CONTEXT_PATCHED = True
        return

    original_auto_resolve = ImageContext._auto_resolve_context_value

    def _patched_auto_resolve(
        self: Any, context_value: Any, base_path: str | None
    ) -> Any:
        normalized = _normalize_image_context_value(context_value, base_path = base_path)
        return original_auto_resolve(self, normalized, base_path)

    ImageContext._auto_resolve_context_value = _patched_auto_resolve
    setattr(ImageContext, "_unsloth_image_context_patch_applied", True)
    _IMAGE_CONTEXT_PATCHED = True


def build_model_providers(recipe: dict[str, Any]):
    from data_designer.config.models import ModelProvider

    providers: list[ModelProvider] = []
    for provider in recipe.get("model_providers", []):
        api_key = provider.get("api_key")
        api_key_env = provider.get("api_key_env")
        if not api_key and api_key_env:
            api_key = os.getenv(api_key_env)
        providers.append(
            ModelProvider(
                name = provider["name"],
                endpoint = provider["endpoint"],
                provider_type = provider.get("provider_type", "openai"),
                api_key = api_key,
                extra_headers = provider.get("extra_headers"),
                extra_body = provider.get("extra_body"),
            )
        )

    return providers


def _recipe_has_llm_columns(recipe: dict[str, Any]) -> bool:
    for column in recipe.get("columns", []):
        if not isinstance(column, dict):
            continue
        column_type = column.get("column_type")
        if isinstance(column_type, str) and column_type.startswith("llm-"):
            return True
    return False


def _validate_recipe_runtime_support(
    recipe: dict[str, Any],
    model_providers: list[Any],
) -> None:
    if not _recipe_has_llm_columns(recipe):
        raise ValueError(
            "Recipe Studio currently requires at least one AI generation step."
        )

    if not model_providers:
        raise ValueError("Add a Provider connection block before running this recipe.")


def build_mcp_providers(
    recipe: dict[str, Any],
) -> list:
    from data_designer.config.mcp import LocalStdioMCPProvider, MCPProvider

    providers: list[MCPProvider | LocalStdioMCPProvider] = []
    for provider in recipe.get("mcp_providers", []):
        if not isinstance(provider, dict):
            continue
        provider_type = provider.get("provider_type")
        if provider_type == "stdio":
            env = provider.get("env")
            if not isinstance(env, dict):
                env = {}
            args = provider.get("args")
            if not isinstance(args, list):
                args = []
            providers.append(
                LocalStdioMCPProvider(
                    name = str(provider.get("name", "")),
                    command = str(provider.get("command", "")),
                    args = [str(value) for value in args],
                    env = {str(key): str(value) for key, value in env.items()},
                )
            )
            continue

        if provider_type in {"sse", "streamable_http"}:
            api_key = provider.get("api_key")
            api_key_env = provider.get("api_key_env")
            if not api_key and api_key_env:
                api_key = os.getenv(str(api_key_env))
            providers.append(
                MCPProvider(
                    name = str(provider.get("name", "")),
                    endpoint = str(provider.get("endpoint", "")),
                    provider_type = str(provider_type),
                    api_key = str(api_key) if api_key else None,
                )
            )
    return providers


def build_config_builder(recipe: dict[str, Any]):
    _apply_data_designer_image_context_patch()
    from data_designer.config import DataDesignerConfigBuilder
    from data_designer.config.processors import ProcessorType

    recipe_core = {
        key: value
        for key, value in recipe.items()
        if key not in {"model_providers", "mcp_providers"}
    }
    recipe_core, oxc_local_callable_specs = split_oxc_local_callable_validators(
        recipe_core
    )
    builder = DataDesignerConfigBuilder.from_config({"data_designer": recipe_core})
    register_oxc_local_callable_validators(
        builder = builder,
        specs = oxc_local_callable_specs,
    )

    # DataDesignerConfigBuilder.from_config currently skips processors.
    # Re-attach explicitly so drop_columns/schema_transform survive API payload.
    for processor in recipe_core.get("processors") or []:
        if not isinstance(processor, dict):
            continue
        processor_type_raw = processor.get("processor_type")
        if not isinstance(processor_type_raw, str):
            continue
        kwargs = {k: v for k, v in processor.items() if k != "processor_type"}
        builder.add_processor(
            processor_type = ProcessorType(processor_type_raw),
            **kwargs,
        )

    return builder


def create_data_designer(
    recipe: dict[str, Any],
    *,
    artifact_path: str | None = None,
):
    _apply_data_designer_image_context_patch()
    from data_designer.interface.data_designer import DataDesigner

    model_providers = build_model_providers(recipe)
    _validate_recipe_runtime_support(recipe, model_providers)

    return DataDesigner(
        artifact_path = artifact_path,
        model_providers = model_providers,
        mcp_providers = build_mcp_providers(recipe),
    )


def validate_recipe(recipe: dict[str, Any]) -> None:
    builder = build_config_builder(recipe)
    designer = create_data_designer(recipe)
    designer.validate(builder)


def preview_recipe(
    recipe: dict[str, Any],
    num_records: int,
) -> tuple[list[dict[str, Any]], dict[str, Any] | None, dict[str, Any] | None]:
    builder = build_config_builder(recipe)
    designer = create_data_designer(recipe)
    results = designer.preview(builder, num_records = num_records)

    dataset: list[dict[str, Any]] = []
    if results.dataset is not None:
        raw_rows = results.dataset.to_dict(orient = "records")
        dataset = [to_jsonable(row) for row in raw_rows]

    artifacts = (
        None
        if results.processor_artifacts is None
        else to_jsonable(results.processor_artifacts)
    )
    analysis = (
        None
        if results.analysis is None
        else to_jsonable(results.analysis.model_dump(mode = "json"))
    )

    return dataset, artifacts, analysis


================================================
FILE: studio/backend/core/export/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Export submodule - Model export operations

The default get_export_backend() returns an ExportOrchestrator that
delegates to a subprocess. The original ExportBackend runs inside
the subprocess and can be imported directly from .export when needed.
"""

from .orchestrator import ExportOrchestrator, get_export_backend

# Expose ExportOrchestrator as ExportBackend for backward compat
ExportBackend = ExportOrchestrator

__all__ = [
    "ExportBackend",
    "ExportOrchestrator",
    "get_export_backend",
]


================================================
FILE: studio/backend/core/export/export.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

# backend/export.py
"""
Export backend - handles model exporting in various formats
"""

import glob
import json
import structlog
from loggers import get_logger
import os
import shutil
from pathlib import Path
from typing import Optional, Tuple, List
from peft import PeftModel, PeftModelForCausalLM
from unsloth import FastLanguageModel, FastVisionModel
from huggingface_hub import HfApi, ModelCard
from transformers.modeling_utils import PushToHubMixin
import torch
from utils.hardware import clear_gpu_cache

from utils.models import is_vision_model, get_base_model_from_lora
from utils.models.model_config import detect_audio_type
from utils.paths import ensure_dir, outputs_root, resolve_export_dir, resolve_output_dir
from core.inference import get_inference_backend

logger = get_logger(__name__)


def _is_wsl():
    """Detect if running under Windows Subsystem for Linux."""
    try:
        return "microsoft" in open("/proc/version").read().lower()
    except Exception:
        return False


def _apply_wsl_sudo_patch():
    """On WSL, monkey-patch do_we_need_sudo() to return False.

    WSL doesn't have passwordless sudo, and do_we_need_sudo() runs
    `sudo apt-get update` which hangs waiting for a stdin password
    inside a non-interactive subprocess. setup.sh pre-installs the
    build dependencies on WSL, so sudo is not needed at runtime.
    """
    if not _is_wsl():
        return

    try:
        import unsloth_zoo.llama_cpp as llama_cpp_module

        def _wsl_do_we_need_sudo(system_type = "debian"):
            logger.info(
                "WSL detected — skipping sudo check "
                "(build deps pre-installed by setup.sh)"
            )
            return False

        llama_cpp_module.do_we_need_sudo = _wsl_do_we_need_sudo
        logger.info(
            "Applied WSL sudo patch to " "unsloth_zoo.llama_cpp.do_we_need_sudo"
        )
    except Exception as e:
        logger.warning(f"Could not apply WSL sudo patch: {e}")


# Model card template
MODEL_CARD = """---
base_model: {base_model}
tags:
- text-generation-inference
- transformers
- unsloth
- {model_type}
- {extra}
license: apache-2.0
language:
- en
---

# Uploaded finetuned {method} model

- **Developed by:** {username}
- **License:** apache-2.0
- **Finetuned from model :** {base_model}

This {model_type} model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.

[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
"""


class ExportBackend:
    """Handles model export operations"""

    def __init__(self):
        self.inference_backend = get_inference_backend()
        self.current_checkpoint = None
        self.current_model = None
        self.current_tokenizer = None
        self.is_vision = False
        self.is_peft = False
        self._audio_type = None

    def cleanup_memory(self):
        """Offload and delete all models from memory"""
        try:
            logger.info("Starting memory cleanup...")

            # Unload all models from inference backend
            model_names = list(self.inference_backend.models.keys())
            for model_name in model_names:
                self.inference_backend.unload_model(model_name)

            # Clear current export state
            self.current_model = None
            self.current_tokenizer = None
            self.current_checkpoint = None
            self._audio_type = None

            # Clear GPU memory cache (handles gc + backend-specific cleanup)
            clear_gpu_cache()

            logger.info("Memory cleanup completed successfully")
            return True

        except Exception as e:
            logger.error(f"Error during memory cleanup: {e}")
            return False

    def scan_checkpoints(
        self, outputs_dir: str = str(outputs_root())
    ) -> List[Tuple[str, List[Tuple[str, str]]]]:
        """
        Scan outputs folder for training runs and their checkpoints.

        Returns:
            List of tuples: [(model_name, [(display_name, checkpoint_path), ...]), ...]
        """
        from utils.models.checkpoints import scan_checkpoints

        return scan_checkpoints(outputs_dir = outputs_dir)

    def load_checkpoint(
        self,
        checkpoint_path: str,
        max_seq_length: int = 2048,
        load_in_4bit: bool = True,
        trust_remote_code: bool = False,
    ) -> Tuple[bool, str]:
        """
        Load a checkpoint for export.

        Returns:
            Tuple of (success: bool, message: str)
        """
        try:
            logger.info(f"Loading checkpoint: {checkpoint_path}")

            # First, cleanup existing models
            self.cleanup_memory()

            checkpoint_path_obj = Path(checkpoint_path)

            # Determine the model identity for type detection
            adapter_config = checkpoint_path_obj / "adapter_config.json"
            base_model = None
            if adapter_config.exists():
                base_model = get_base_model_from_lora(checkpoint_path)
                if not base_model:
                    return False, "Could not determine base model for adapter"

            model_id = base_model or checkpoint_path

            # Detect audio type and vision
            self._audio_type = detect_audio_type(model_id)
            self.is_vision = not self._audio_type and is_vision_model(model_id)

            # Load model based on type
            if self._audio_type == "csm":
                from unsloth import FastModel
                from transformers import CsmForConditionalGeneration

                logger.info("Loading as CSM audio model...")
                model, tokenizer = FastModel.from_pretrained(
                    model_name = checkpoint_path,
                    max_seq_length = max_seq_length,
                    dtype = None,
                    auto_model = CsmForConditionalGeneration,
                    load_in_4bit = False,
                    trust_remote_code = trust_remote_code,
                )

            elif self._audio_type == "whisper":
                from unsloth import FastModel
                from transformers import WhisperForConditionalGeneration

                logger.info("Loading as Whisper audio model...")
                model, tokenizer = FastModel.from_pretrained(
                    model_name = checkpoint_path,
                    dtype = None,
                    load_in_4bit = False,
                    auto_model = WhisperForConditionalGeneration,
                    trust_remote_code = trust_remote_code,
                )

            elif self._audio_type == "snac":
                logger.info("Loading as SNAC (Orpheus) audio model...")
                model, tokenizer = FastLanguageModel.from_pretrained(
                    model_name = checkpoint_path,
                    max_seq_length = max_seq_length,
                    dtype = None,
                    load_in_4bit = load_in_4bit,
                    trust_remote_code = trust_remote_code,
                )

            elif self._audio_type == "bicodec":
                from unsloth import FastModel

                logger.info("Loading as BiCodec (Spark-TTS) audio model...")
                model, tokenizer = FastModel.from_pretrained(
                    model_name = checkpoint_path,
                    max_seq_length = max_seq_length,
                    dtype = torch.float32,
                    load_in_4bit = False,
                    trust_remote_code = trust_remote_code,
                )

            elif self._audio_type == "dac":
                from unsloth import FastModel

                logger.info("Loading as DAC (OuteTTS) audio model...")
                model, tokenizer = FastModel.from_pretrained(
                    model_name = checkpoint_path,
                    max_seq_length = max_seq_length,
                    load_in_4bit = False,
                    trust_remote_code = trust_remote_code,
                )

            elif self.is_vision:
                logger.info("Loading as vision model...")
                model, processor = FastVisionModel.from_pretrained(
                    model_name = checkpoint_path,
                    max_seq_length = max_seq_length,
                    dtype = None,
                    load_in_4bit = load_in_4bit,
                    trust_remote_code = trust_remote_code,
                )
                tokenizer = processor  # For vision models, processor acts as tokenizer

            else:
                logger.info("Loading as text model...")
                model, tokenizer = FastLanguageModel.from_pretrained(
                    model_name = checkpoint_path,
                    max_seq_length = max_seq_length,
                    dtype = None,
                    load_in_4bit = load_in_4bit,
                    trust_remote_code = trust_remote_code,
                )

            # Check if PEFT model
            self.is_peft = isinstance(model, (PeftModel, PeftModelForCausalLM))

            # Store loaded model
            self.current_model = model
            self.current_tokenizer = tokenizer
            self.current_checkpoint = checkpoint_path

            if self._audio_type:
                model_type = f"Audio ({self._audio_type})"
            elif self.is_vision:
                model_type = "Vision"
            else:
                model_type = "Text"
            peft_info = " (PEFT Adapter)" if self.is_peft else " (Merged Model)"

            logger.info(f"Successfully loaded {model_type} model{peft_info}")
            return True, f"Loaded {model_type} model{peft_info} successfully"

        except Exception as e:
            logger.error(f"Error loading checkpoint: {e}")
            import traceback

            logger.error(traceback.format_exc())
            return False, f"Failed to load checkpoint: {str(e)}"

    def _write_export_metadata(self, save_directory: str):
        """Write export_metadata.json with base model info for Chat page discovery."""
        try:
            base_model = (
                get_base_model_from_lora(self.current_checkpoint)
                if self.current_checkpoint
                else None
            )
            metadata = {"base_model": base_model}
            metadata_path = os.path.join(save_directory, "export_metadata.json")
            with open(metadata_path, "w") as f:
                json.dump(metadata, f, indent = 2)
            logger.info(f"Wrote export metadata to {metadata_path}")
        except Exception as e:
            logger.warning(f"Could not write export metadata: {e}")

    def export_merged_model(
        self,
        save_directory: str,
        format_type: str = "16-bit (FP16)",
        push_to_hub: bool = False,
        repo_id: Optional[str] = None,
        hf_token: Optional[str] = None,
        private: bool = False,
    ) -> Tuple[bool, str]:
        """
        Export merged model (for PEFT models).

        Args:
            save_directory: Local directory to save model
            format_type: "16-bit (FP16)" or "4-bit (FP4)"
            push_to_hub: Whether to push to Hugging Face Hub
            repo_id: Hub repository ID (username/model-name)
            hf_token: Hugging Face token
            private: Whether to make the repo private

        Returns:
            Tuple of (success: bool, message: str)
        """
        if not self.current_model or not self.current_tokenizer:
            return False, "No model loaded. Please select a checkpoint first."

        if not self.is_peft:
            return False, "This is not a PEFT model. Use 'Export Base Model' instead."

        try:
            # Determine save method
            if format_type == "4-bit (FP4)":
                save_method = "merged_4bit_forced"
            elif self._audio_type == "whisper":
                # Whisper uses save_method=None for local 16-bit merged save
                save_method = None
            else:  # 16-bit (FP16)
                save_method = "merged_16bit"

            # Save locally if requested
            if save_directory:
                save_directory = str(resolve_export_dir(save_directory))
                logger.info(f"Saving merged model locally to: {save_directory}")
                ensure_dir(Path(save_directory))

                self.current_model.save_pretrained_merged(
                    save_directory, self.current_tokenizer, save_method = save_method
                )

                # Write export metadata so the Chat page can identify the base model
                self._write_export_metadata(save_directory)
                logger.info(f"Model saved successfully to {save_directory}")

            # Push to hub if requested
            if push_to_hub:
                if not repo_id or not hf_token:
                    return (
                        False,
                        "Repository ID and Hugging Face token required for Hub upload",
                    )

                logger.info(f"Pushing merged model to Hub: {repo_id}")

                # Whisper uses save_method=None for local but "merged_16bit" for hub push
                hub_save_method = (
                    save_method if save_method is not None else "merged_16bit"
                )
                self.current_model.push_to_hub_merged(
                    repo_id,
                    self.current_tokenizer,
                    save_method = hub_save_method,
                    token = hf_token,
                    private = private,
                )
                logger.info(f"Model pushed successfully to {repo_id}")

            return True, "Model exported successfully"

        except Exception as e:
            logger.error(f"Error exporting merged model: {e}")
            import traceback

            logger.error(traceback.format_exc())
            return False, f"Export failed: {str(e)}"

    def export_base_model(
        self,
        save_directory: str,
        push_to_hub: bool = False,
        repo_id: Optional[str] = None,
        hf_token: Optional[str] = None,
        private: bool = False,
        base_model_id: Optional[str] = None,
    ) -> Tuple[bool, str]:
        """
        Export base model (for non-PEFT models).

        Returns:
            Tuple of (success: bool, message: str)
        """
        if not self.current_model or not self.current_tokenizer:
            return False, "No model loaded. Please select a checkpoint first."

        if self.is_peft:
            return (
                False,
                "This is a PEFT model. Use 'Merged Model' export type instead.",
            )

        try:
            # Save locally if requested
            if save_directory:
                save_directory = str(resolve_export_dir(save_directory))
                logger.info(f"Saving base model locally to: {save_directory}")
                ensure_dir(Path(save_directory))

                self.current_model.save_pretrained(save_directory)
                self.current_tokenizer.save_pretrained(save_directory)

                # Write export metadata so the Chat page can identify the base model
                self._write_export_metadata(save_directory)
                logger.info(f"Model saved successfully to {save_directory}")

            # Push to hub if requested
            if push_to_hub:
                if not repo_id or not hf_token:
                    return (
                        False,
                        "Repository ID and Hugging Face token required for Hub upload",
                    )

                logger.info(f"Pushing base model to Hub: {repo_id}")

                # Get base model name from request or model config
                base_model = (
                    base_model_id
                    or self.current_model.config._name_or_path
                    or "unknown"
                )

                # Create repo
                hf_api = HfApi(token = hf_token)
                repo_id = PushToHubMixin._create_repo(
                    PushToHubMixin,
                    repo_id = repo_id,
                    private = private,
                    token = hf_token,
                )
                username = repo_id.split("/")[0]

                # Create and push model card
                content = MODEL_CARD.format(
                    username = username,
                    base_model = base_model,
                    model_type = self.current_model.config.model_type,
                    method = "",
                    extra = "unsloth",
                )
                card = ModelCard(content)
                card.push_to_hub(
                    repo_id, token = hf_token, commit_message = "Unsloth Model Card"
                )

                # Upload model files
                if save_directory:
                    hf_api.upload_folder(
                        folder_path = save_directory, repo_id = repo_id, repo_type = "model"
                    )
                    logger.info(f"Model pushed successfully to {repo_id}")
                else:
                    return False, "Local save directory required for Hub upload"

            return True, "Model exported successfully"

        except Exception as e:
            logger.error(f"Error exporting base model: {e}")
            import traceback

            logger.error(traceback.format_exc())
            return False, f"Export failed: {str(e)}"

    def export_gguf(
        self,
        save_directory: str,
        quantization_method: str = "Q4_K_M",
        push_to_hub: bool = False,
        repo_id: Optional[str] = None,
        hf_token: Optional[str] = None,
    ) -> Tuple[bool, str]:
        """
        Export model in GGUF format.

        Args:
            save_directory: Local directory to save model
            quantization_method: GGUF quantization method (e.g., "Q4_K_M")
            push_to_hub: Whether to push to Hugging Face Hub
            repo_id: Hub repository ID
            hf_token: Hugging Face token

        Returns:
            Tuple of (success: bool, message: str)
        """
        if not self.current_model or not self.current_tokenizer:
            return False, "No model loaded. Please select a checkpoint first."

        try:
            # Convert quantization method to lowercase for unsloth
            quant_method = quantization_method.lower()

            # Save locally if requested
            if save_directory:
                save_directory = str(resolve_export_dir(save_directory))
                # Resolve to absolute path so unsloth's relative-path internals
                # (check_llama_cpp, use_local_gguf, _download_convert_hf_to_gguf)
                # all resolve against the repo root cwd, NOT the export directory.
                abs_save_dir = os.path.abspath(save_directory)
                logger.info(f"Saving GGUF model locally to: {abs_save_dir}")

                # Create the directory if it doesn't exist
                ensure_dir(Path(abs_save_dir))

                # On WSL, patch out sudo check before llama.cpp build
                _apply_wsl_sudo_patch()

                # Snapshot existing .gguf files in cwd before conversion.
                # unsloth's convert_to_gguf writes output files relative to
                # cwd (repo root), so we diff afterwards and relocate them.
                cwd = os.getcwd()
                pre_existing_ggufs = set(glob.glob(os.path.join(cwd, "*.gguf")))

                # Pass absolute path — no os.chdir needed.
                # unsloth saves intermediate HF model files into model_save_path.
                # unsloth-zoo's check_llama_cpp() uses ~/.unsloth/llama.cpp by default.
                model_save_path = os.path.join(abs_save_dir, "model")
                self.current_model.save_pretrained_gguf(
                    model_save_path,
                    self.current_tokenizer,
                    quantization_method = quant_method,
                )

                # Relocate GGUF artifacts into the export directory.
                # convert_to_gguf writes .gguf files to cwd (repo root)
                # because --outfile is a relative path like "model.Q4_K_M.gguf".
                new_ggufs = (
                    set(glob.glob(os.path.join(cwd, "*.gguf"))) - pre_existing_ggufs
                )
                for src in sorted(new_ggufs):
                    dest = os.path.join(abs_save_dir, os.path.basename(src))
                    shutil.move(src, dest)
                    logger.info(
                        f"Relocated GGUF: {os.path.basename(src)} → {abs_save_dir}/"
                    )

                # Flatten any .gguf files from subdirectories into abs_save_dir.
                # save_pretrained_gguf may create subdirs (e.g. model_gguf/)
                # with a name different from model_save_path.
                for sub in list(Path(abs_save_dir).iterdir()):
                    if not sub.is_dir():
                        continue
                    for src in sub.glob("*.gguf"):
                        dest = os.path.join(abs_save_dir, src.name)
                        shutil.move(str(src), dest)
                        logger.info(f"Relocated GGUF: {src.name} → {abs_save_dir}/")
                    # Clean up the subdirectory (intermediate HF files, etc.)
                    shutil.rmtree(str(sub), ignore_errors = True)
                    logger.info(f"Cleaned up subdirectory: {sub.name}")

                # Write export metadata so the Chat page can identify the base model
                self._write_export_metadata(abs_save_dir)

                # Log final file locations (after relocation) so it's clear
                # where the GGUF files actually ended up.
                final_ggufs = sorted(glob.glob(os.path.join(abs_save_dir, "*.gguf")))
                logger.info(
                    "GGUF export complete. Final files in %s:\n  %s",
                    abs_save_dir,
                    "\n  ".join(os.path.basename(f) for f in final_ggufs) or "(none)",
                )

            # Push to hub if requested
            if push_to_hub:
                if not repo_id or not hf_token:
                    return (
                        False,
                        "Repository ID and Hugging Face token required for Hub upload",
                    )

                logger.info(f"Pushing GGUF model to Hub: {repo_id}")

                self.current_model.push_to_hub_gguf(
                    repo_id,
                    self.current_tokenizer,
                    quantization_method = quant_method,
                    token = hf_token,
                )
                logger.info(f"GGUF model pushed successfully to {repo_id}")

            return True, f"GGUF model exported successfully ({quantization_method})"

        except Exception as e:
            logger.error(f"Error exporting GGUF model: {e}")
            import traceback

            logger.error(traceback.format_exc())
            return False, f"GGUF export failed: {str(e)}"

    def export_lora_adapter(
        self,
        save_directory: str,
        push_to_hub: bool = False,
        repo_id: Optional[str] = None,
        hf_token: Optional[str] = None,
        private: bool = False,
    ) -> Tuple[bool, str]:
        """
        Export LoRA adapter only (not merged).

        Returns:
            Tuple of (success: bool, message: str)
        """
        if not self.current_model or not self.current_tokenizer:
            return False, "No model loaded. Please select a checkpoint first."

        if not self.is_peft:
            return False, "This is not a PEFT model. No adapter to export."

        try:
            # Save locally if requested
            if save_directory:
                save_directory = str(resolve_export_dir(save_directory))
                logger.info(f"Saving LoRA adapter locally to: {save_directory}")
                ensure_dir(Path(save_directory))

                self.current_model.save_pretrained(save_directory)
                self.current_tokenizer.save_pretrained(save_directory)
                logger.info(f"Adapter saved successfully to {save_directory}")

            # Push to hub if requested
            if push_to_hub:
                if not repo_id or not hf_token:
                    return (
                        False,
                        "Repository ID and Hugging Face token required for Hub upload",
                    )

                logger.info(f"Pushing LoRA adapter to Hub: {repo_id}")

                self.current_model.push_to_hub(repo_id, token = hf_token, private = private)
                self.current_tokenizer.push_to_hub(
                    repo_id, token = hf_token, private = private
                )
                logger.info(f"Adapter pushed successfully to {repo_id}")

            return True, "LoRA adapter exported successfully"

        except Exception as e:
            logger.error(f"Error exporting LoRA adapter: {e}")
            import traceback

            logger.error(traceback.format_exc())
            return False, f"Adapter export failed: {str(e)}"


# Global export backend instance
_export_backend = None


def get_export_backend() -> ExportBackend:
    """Get or create the global export backend instance"""
    global _export_backend
    if _export_backend is None:
        _export_backend = ExportBackend()
    return _export_backend


================================================
FILE: studio/backend/core/export/orchestrator.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Export orchestrator — subprocess-based.

Provides the same API as ExportBackend, but delegates all ML work
to a persistent subprocess. The subprocess is spawned on first checkpoint
load and stays alive for subsequent export operations.

When switching between checkpoints that need different transformers versions,
the old subprocess is killed and a new one is spawned with the correct version.

Pattern follows core/inference/orchestrator.py.
"""

import atexit
import structlog
from loggers import get_logger
import multiprocessing as mp
import queue
import threading
import time
from pathlib import Path
from typing import Any, List, Optional, Tuple
from utils.paths import outputs_root

logger = get_logger(__name__)

_CTX = mp.get_context("spawn")


class ExportOrchestrator:
    """
    Export backend orchestrator — subprocess-based.

    Exposes the same API surface as ExportBackend so routes/export.py
    needs minimal changes. Internally, all heavy ML operations happen in
    a persistent subprocess.
    """

    def __init__(self):
        # Subprocess state
        self._proc: Optional[mp.Process] = None
        self._cmd_queue: Any = None
        self._resp_queue: Any = None
        self._lock = threading.Lock()

        # Local state mirrors (updated from subprocess responses)
        self.current_checkpoint: Optional[str] = None
        self.is_vision: bool = False
        self.is_peft: bool = False

        atexit.register(self._cleanup)
        logger.info("ExportOrchestrator initialized (subprocess mode)")

    # ------------------------------------------------------------------
    # Subprocess lifecycle
    # ------------------------------------------------------------------

    def _spawn_subprocess(self, config: dict) -> None:
        """Spawn a new export subprocess."""
        from .worker import run_export_process

        self._cmd_queue = _CTX.Queue()
        self._resp_queue = _CTX.Queue()

        self._proc = _CTX.Process(
            target = run_export_process,
            kwargs = {
                "cmd_queue": self._cmd_queue,
                "resp_queue": self._resp_queue,
                "config": config,
            },
            daemon = True,
        )
        self._proc.start()
        logger.info("Export subprocess started (pid=%s)", self._proc.pid)

    def _shutdown_subprocess(self, timeout: float = 10.0) -> None:
        """Gracefully shut down the export subprocess."""
        if self._proc is None or not self._proc.is_alive():
            self._proc = None
            return

        # 1. Drain stale responses
        self._drain_queue()

        # 2. Send shutdown command
        try:
            self._cmd_queue.put({"type": "shutdown"})
        except (OSError, ValueError):
            pass

        # 3. Wait for graceful shutdown
        try:
            self._proc.join(timeout = timeout)
        except Exception:
            pass

        # 4. Force kill if still alive
        if self._proc is not None and self._proc.is_alive():
            logger.warning("Export subprocess did not exit gracefully, terminating")
            try:
                self._proc.terminate()
                self._proc.join(timeout = 5)
            except Exception:
                pass
            if self._proc is not None and self._proc.is_alive():
                logger.warning("Subprocess still alive after terminate, killing")
                try:
                    self._proc.kill()
                    self._proc.join(timeout = 3)
                except Exception:
                    pass

        self._proc = None
        self._cmd_queue = None
        self._resp_queue = None
        logger.info("Export subprocess shut down")

    def _cleanup(self):
        """atexit handler."""
        self._shutdown_subprocess(timeout = 5.0)

    def _ensure_subprocess_alive(self) -> bool:
        """Check if subprocess is alive."""
        return self._proc is not None and self._proc.is_alive()

    # ------------------------------------------------------------------
    # Queue helpers
    # ------------------------------------------------------------------

    def _send_cmd(self, cmd: dict) -> None:
        """Send a command to the subprocess."""
        if self._cmd_queue is None:
            raise RuntimeError("No export subprocess running")
        try:
            self._cmd_queue.put(cmd)
        except (OSError, ValueError) as exc:
            raise RuntimeError(f"Failed to send command to subprocess: {exc}")

    def _read_resp(self, timeout: float = 1.0) -> Optional[dict]:
        """Read a response from the subprocess (non-blocking with timeout)."""
        if self._resp_queue is None:
            return None
        try:
            return self._resp_queue.get(timeout = timeout)
        except queue.Empty:
            return None
        except (EOFError, OSError, ValueError):
            return None

    def _wait_response(self, expected_type: str, timeout: float = 3600.0) -> dict:
        """Block until a response of the expected type arrives.

        Export operations can take a very long time — GGUF conversion for
        large models (30B+) easily takes 20-30 minutes. Default timeout
        is 1 hour.
        """
        deadline = time.monotonic() + timeout

        while time.monotonic() < deadline:
            remaining = max(0.1, deadline - time.monotonic())
            resp = self._read_resp(timeout = min(remaining, 2.0))

            if resp is None:
                # Check subprocess health
                if not self._ensure_subprocess_alive():
                    raise RuntimeError("Export subprocess crashed during wait")
                continue

            rtype = resp.get("type", "")

            if rtype == expected_type:
                return resp

            if rtype == "error":
                error_msg = resp.get("error", "Unknown error")
                raise RuntimeError(f"Subprocess error: {error_msg}")

            if rtype == "status":
                logger.info("Export subprocess status: %s", resp.get("message", ""))
                continue

            # Other response types during wait — skip
            logger.debug(
                "Skipping response type '%s' while waiting for '%s'",
                rtype,
                expected_type,
            )

        raise RuntimeError(
            f"Timeout waiting for '{expected_type}' response after {timeout}s"
        )

    def _drain_queue(self) -> list:
        """Drain all pending responses."""
        events = []
        if self._resp_queue is None:
            return events
        while True:
            try:
                events.append(self._resp_queue.get_nowait())
            except queue.Empty:
                return events
            except (EOFError, OSError, ValueError):
                return events

    # ------------------------------------------------------------------
    # Public API — same interface as ExportBackend
    # ------------------------------------------------------------------

    def load_checkpoint(
        self,
        checkpoint_path: str,
        max_seq_length: int = 2048,
        load_in_4bit: bool = True,
        trust_remote_code: bool = False,
    ) -> Tuple[bool, str]:
        """Load a checkpoint for export.

        Always spawns a fresh subprocess to ensure a clean Python interpreter.
        """
        sub_config = {
            "checkpoint_path": checkpoint_path,
            "max_seq_length": max_seq_length,
            "load_in_4bit": load_in_4bit,
            "trust_remote_code": trust_remote_code,
        }

        # Always kill existing subprocess and spawn fresh.
        if self._ensure_subprocess_alive():
            self._shutdown_subprocess()
        elif self._proc is not None:
            self._shutdown_subprocess(timeout = 2)

        logger.info("Spawning fresh export subprocess for '%s'", checkpoint_path)
        self._spawn_subprocess(sub_config)

        try:
            resp = self._wait_response("loaded", timeout = 300)
        except RuntimeError as exc:
            self._shutdown_subprocess(timeout = 5)
            self.current_checkpoint = None
            self.is_vision = False
            self.is_peft = False
            return False, str(exc)

        if resp.get("success"):
            self.current_checkpoint = resp.get("checkpoint")
            self.is_vision = resp.get("is_vision", False)
            self.is_peft = resp.get("is_peft", False)
            logger.info("Checkpoint '%s' loaded in subprocess", checkpoint_path)
            return True, resp.get("message", "Loaded successfully")
        else:
            error = resp.get("message", "Failed to load checkpoint")
            logger.error("Failed to load checkpoint: %s", error)
            self.current_checkpoint = None
            self.is_vision = False
            self.is_peft = False
            return False, error

    def export_merged_model(
        self,
        save_directory: str,
        format_type: str = "16-bit (FP16)",
        push_to_hub: bool = False,
        repo_id: Optional[str] = None,
        hf_token: Optional[str] = None,
        private: bool = False,
    ) -> Tuple[bool, str]:
        """Export merged PEFT model."""
        return self._run_export(
            "merged",
            {
                "save_directory": save_directory,
                "format_type": format_type,
                "push_to_hub": push_to_hub,
                "repo_id": repo_id,
                "hf_token": hf_token,
                "private": private,
            },
        )

    def export_base_model(
        self,
        save_directory: str,
        push_to_hub: bool = False,
        repo_id: Optional[str] = None,
        hf_token: Optional[str] = None,
        private: bool = False,
        base_model_id: Optional[str] = None,
    ) -> Tuple[bool, str]:
        """Export base model (non-PEFT)."""
        return self._run_export(
            "base",
            {
                "save_directory": save_directory,
                "push_to_hub": push_to_hub,
                "repo_id": repo_id,
                "hf_token": hf_token,
                "private": private,
                "base_model_id": base_model_id,
            },
        )

    def export_gguf(
        self,
        save_directory: str,
        quantization_method: str = "Q4_K_M",
        push_to_hub: bool = False,
        repo_id: Optional[str] = None,
        hf_token: Optional[str] = None,
    ) -> Tuple[bool, str]:
        """Export model in GGUF format."""
        return self._run_export(
            "gguf",
            {
                "save_directory": save_directory,
                "quantization_method": quantization_method,
                "push_to_hub": push_to_hub,
                "repo_id": repo_id,
                "hf_token": hf_token,
            },
        )

    def export_lora_adapter(
        self,
        save_directory: str,
        push_to_hub: bool = False,
        repo_id: Optional[str] = None,
        hf_token: Optional[str] = None,
        private: bool = False,
    ) -> Tuple[bool, str]:
        """Export LoRA adapter only."""
        return self._run_export(
            "lora",
            {
                "save_directory": save_directory,
                "push_to_hub": push_to_hub,
                "repo_id": repo_id,
                "hf_token": hf_token,
                "private": private,
            },
        )

    def _run_export(self, export_type: str, params: dict) -> Tuple[bool, str]:
        """Send an export command to the subprocess and wait for result."""
        if not self._ensure_subprocess_alive():
            return False, "No export subprocess running. Load a checkpoint first."

        cmd = {"type": "export", "export_type": export_type, **params}

        try:
            self._send_cmd(cmd)
            resp = self._wait_response(
                f"export_{export_type}_done",
                timeout = 3600,  # GGUF for 30B+ models can take 30+ min
            )
            return resp.get("success", False), resp.get("message", "")
        except RuntimeError as exc:
            return False, str(exc)

    def cleanup_memory(self) -> bool:
        """Cleanup export-related models from memory."""
        if not self._ensure_subprocess_alive():
            # No subprocess — just clear local state
            self.current_checkpoint = None
            self.is_vision = False
            self.is_peft = False
            return True

        try:
            self._send_cmd({"type": "cleanup"})
            resp = self._wait_response("cleanup_done", timeout = 30)
            success = resp.get("success", False)
        except RuntimeError:
            success = False

        # Shut down subprocess after cleanup — no model loaded
        self._shutdown_subprocess()

        self.current_checkpoint = None
        self.is_vision = False
        self.is_peft = False
        return success

    def scan_checkpoints(
        self, outputs_dir: str = str(outputs_root())
    ) -> List[Tuple[str, list]]:
        """Scan for checkpoints — no ML imports needed, runs locally."""
        from utils.models.checkpoints import scan_checkpoints

        return scan_checkpoints(outputs_dir = outputs_dir)


# ========== GLOBAL INSTANCE ==========
_export_backend = None


def get_export_backend() -> ExportOrchestrator:
    """Get global export backend instance (orchestrator)."""
    global _export_backend
    if _export_backend is None:
        _export_backend = ExportOrchestrator()
    return _export_backend


================================================
FILE: studio/backend/core/export/worker.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Export subprocess entry point.

Each export session runs in a persistent subprocess (mp.get_context("spawn")).
This gives us a clean Python interpreter with no stale module state —
solving the transformers version-switching problem completely.

The subprocess stays alive while a model is loaded, accepting commands
(load, export_merged, export_base, export_gguf, export_lora, cleanup,
shutdown) via mp.Queue.

Pattern follows core/inference/worker.py and core/training/worker.py.
"""

from __future__ import annotations

import structlog
from loggers import get_logger
import os
import sys
import time
import traceback
from pathlib import Path
from typing import Any

logger = get_logger(__name__)


def _activate_transformers_version(model_name: str) -> None:
    """Activate the correct transformers version BEFORE any ML imports.

    If the model needs transformers 5.x, prepend the pre-installed .venv_t5/
    directory to sys.path. Otherwise do nothing (default 4.57.x in .venv/).
    """
    # Ensure backend is on path for utils imports
    backend_path = str(Path(__file__).resolve().parent.parent.parent)
    if backend_path not in sys.path:
        sys.path.insert(0, backend_path)

    from utils.transformers_version import (
        needs_transformers_5,
        _resolve_base_model,
        _ensure_venv_t5_exists,
        _VENV_T5_DIR,
    )

    resolved = _resolve_base_model(model_name)
    if needs_transformers_5(resolved):
        if not _ensure_venv_t5_exists():
            raise RuntimeError(
                f"Cannot activate transformers 5.x: .venv_t5 missing at {_VENV_T5_DIR}"
            )
        if _VENV_T5_DIR not in sys.path:
            sys.path.insert(0, _VENV_T5_DIR)
        logger.info("Activated transformers 5.x from %s", _VENV_T5_DIR)
        # Propagate to child subprocesses (e.g. GGUF converter)
        _pp = os.environ.get("PYTHONPATH", "")
        os.environ["PYTHONPATH"] = _VENV_T5_DIR + (os.pathsep + _pp if _pp else "")
    else:
        logger.info("Using default transformers (4.57.x) for %s", model_name)


def _send_response(resp_queue: Any, response: dict) -> None:
    """Send a response to the parent process."""
    try:
        resp_queue.put(response)
    except (OSError, ValueError) as exc:
        logger.error("Failed to send response: %s", exc)


def _handle_load(backend, cmd: dict, resp_queue: Any) -> None:
    """Handle a load_checkpoint command."""
    checkpoint_path = cmd["checkpoint_path"]
    max_seq_length = cmd.get("max_seq_length", 2048)
    load_in_4bit = cmd.get("load_in_4bit", True)
    trust_remote_code = cmd.get("trust_remote_code", False)

    try:
        _send_response(
            resp_queue,
            {
                "type": "status",
                "message": f"Loading checkpoint: {checkpoint_path}",
                "ts": time.time(),
            },
        )

        success, message = backend.load_checkpoint(
            checkpoint_path = checkpoint_path,
            max_seq_length = max_seq_length,
            load_in_4bit = load_in_4bit,
            trust_remote_code = trust_remote_code,
        )

        _send_response(
            resp_queue,
            {
                "type": "loaded",
                "success": success,
                "message": message,
                "checkpoint": checkpoint_path if success else None,
                "is_vision": backend.is_vision if success else False,
                "is_peft": backend.is_peft if success else False,
                "ts": time.time(),
            },
        )

    except Exception as exc:
        _send_response(
            resp_queue,
            {
                "type": "loaded",
                "success": False,
                "message": str(exc),
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            },
        )


def _handle_export(backend, cmd: dict, resp_queue: Any) -> None:
    """Handle any export command (merged, base, gguf, lora)."""
    export_type = cmd["export_type"]  # "merged", "base", "gguf", "lora"
    response_type = f"export_{export_type}_done"

    try:
        if export_type == "merged":
            success, message = backend.export_merged_model(
                save_directory = cmd.get("save_directory", ""),
                format_type = cmd.get("format_type", "16-bit (FP16)"),
                push_to_hub = cmd.get("push_to_hub", False),
                repo_id = cmd.get("repo_id"),
                hf_token = cmd.get("hf_token"),
                private = cmd.get("private", False),
            )
        elif export_type == "base":
            success, message = backend.export_base_model(
                save_directory = cmd.get("save_directory", ""),
                push_to_hub = cmd.get("push_to_hub", False),
                repo_id = cmd.get("repo_id"),
                hf_token = cmd.get("hf_token"),
                private = cmd.get("private", False),
                base_model_id = cmd.get("base_model_id"),
            )
        elif export_type == "gguf":
            success, message = backend.export_gguf(
                save_directory = cmd.get("save_directory", ""),
                quantization_method = cmd.get("quantization_method", "Q4_K_M"),
                push_to_hub = cmd.get("push_to_hub", False),
                repo_id = cmd.get("repo_id"),
                hf_token = cmd.get("hf_token"),
            )
        elif export_type == "lora":
            success, message = backend.export_lora_adapter(
                save_directory = cmd.get("save_directory", ""),
                push_to_hub = cmd.get("push_to_hub", False),
                repo_id = cmd.get("repo_id"),
                hf_token = cmd.get("hf_token"),
                private = cmd.get("private", False),
            )
        else:
            success, message = False, f"Unknown export type: {export_type}"

        _send_response(
            resp_queue,
            {
                "type": response_type,
                "success": success,
                "message": message,
                "ts": time.time(),
            },
        )

    except Exception as exc:
        _send_response(
            resp_queue,
            {
                "type": response_type,
                "success": False,
                "message": str(exc),
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            },
        )


def _handle_cleanup(backend, resp_queue: Any) -> None:
    """Handle a cleanup command."""
    try:
        success = backend.cleanup_memory()
        _send_response(
            resp_queue,
            {
                "type": "cleanup_done",
                "success": success,
                "ts": time.time(),
            },
        )
    except Exception as exc:
        _send_response(
            resp_queue,
            {
                "type": "cleanup_done",
                "success": False,
                "message": str(exc),
                "ts": time.time(),
            },
        )


def run_export_process(
    *,
    cmd_queue: Any,
    resp_queue: Any,
    config: dict,
) -> None:
    """Subprocess entrypoint. Persistent — runs command loop until shutdown.

    Args:
        cmd_queue: mp.Queue for receiving commands from parent.
        resp_queue: mp.Queue for sending responses to parent.
        config: Initial configuration dict with checkpoint_path.
    """
    import queue as _queue

    os.environ["TOKENIZERS_PARALLELISM"] = "false"
    os.environ["PYTHONWARNINGS"] = (
        "ignore"  # Suppress warnings at C-level before imports
    )

    import warnings
    from loggers.config import LogConfig

    if os.getenv("ENVIRONMENT_TYPE", "production") == "production":
        warnings.filterwarnings("ignore")

    LogConfig.setup_logging(
        service_name = "unsloth-studio-export-worker",
        env = os.getenv("ENVIRONMENT_TYPE", "production"),
    )

    checkpoint_path = config["checkpoint_path"]

    # ── 1. Activate correct transformers version BEFORE any ML imports ──
    try:
        _activate_transformers_version(checkpoint_path)
    except Exception as exc:
        _send_response(
            resp_queue,
            {
                "type": "error",
                "error": f"Failed to activate transformers version: {exc}",
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            },
        )
        return

    # ── 1b. On Windows, check Triton availability (must be before import torch) ──
    if sys.platform == "win32":
        try:
            import triton  # noqa: F401

            logger.info("Triton available — torch.compile enabled")
        except ImportError:
            os.environ["TORCHDYNAMO_DISABLE"] = "1"
            logger.warning(
                "Triton not found on Windows — torch.compile disabled. "
                'Install for better performance: pip install "triton-windows<3.7"'
            )

    # ── 2. Import ML libraries (fresh in this clean process) ──
    try:
        _send_response(
            resp_queue,
            {
                "type": "status",
                "message": "Importing Unsloth...",
                "ts": time.time(),
            },
        )

        backend_path = str(Path(__file__).resolve().parent.parent.parent)
        if backend_path not in sys.path:
            sys.path.insert(0, backend_path)

        from core.export.export import ExportBackend

        import transformers

        logger.info(
            "Export subprocess loaded transformers %s", transformers.__version__
        )

    except Exception as exc:
        _send_response(
            resp_queue,
            {
                "type": "error",
                "error": f"Failed to import ML libraries: {exc}",
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            },
        )
        return

    # ── 3. Create export backend and load initial checkpoint ──
    try:
        backend = ExportBackend()

        _handle_load(backend, config, resp_queue)

    except Exception as exc:
        _send_response(
            resp_queue,
            {
                "type": "error",
                "error": f"Failed to initialize export backend: {exc}",
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            },
        )
        return

    # ── 4. Command loop — process commands until shutdown ──
    logger.info("Export subprocess ready, entering command loop")

    while True:
        try:
            cmd = cmd_queue.get(timeout = 1.0)
        except _queue.Empty:
            continue
        except (EOFError, OSError):
            logger.info("Command queue closed, shutting down")
            return

        if cmd is None:
            continue

        cmd_type = cmd.get("type", "")
        logger.info("Received command: %s", cmd_type)

        try:
            if cmd_type == "load":
                # Load a new checkpoint (reusing this subprocess)
                backend.cleanup_memory()
                _handle_load(backend, cmd, resp_queue)

            elif cmd_type == "export":
                _handle_export(backend, cmd, resp_queue)

            elif cmd_type == "cleanup":
                _handle_cleanup(backend, resp_queue)

            elif cmd_type == "status":
                _send_response(
                    resp_queue,
                    {
                        "type": "status_response",
                        "checkpoint": backend.current_checkpoint,
                        "is_vision": backend.is_vision,
                        "is_peft": backend.is_peft,
                        "ts": time.time(),
                    },
                )

            elif cmd_type == "shutdown":
                logger.info("Shutdown command received, cleaning up and exiting")
                try:
                    backend.cleanup_memory()
                except Exception:
                    pass
                _send_response(
                    resp_queue,
                    {
                        "type": "shutdown_ack",
                        "ts": time.time(),
                    },
                )
                return

            else:
                logger.warning("Unknown command type: %s", cmd_type)
                _send_response(
                    resp_queue,
                    {
                        "type": "error",
                        "error": f"Unknown command type: {cmd_type}",
                        "ts": time.time(),
                    },
                )

        except Exception as exc:
            logger.error(
                "Error handling command '%s': %s", cmd_type, exc, exc_info = True
            )
            _send_response(
                resp_queue,
                {
                    "type": "error",
                    "error": f"Command '{cmd_type}' failed: {exc}",
                    "stack": traceback.format_exc(limit = 20),
                    "ts": time.time(),
                },
            )


================================================
FILE: studio/backend/core/inference/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Inference submodule - Inference backend for model loading and generation

The default get_inference_backend() returns an InferenceOrchestrator that
delegates to a subprocess. The original InferenceBackend runs inside
the subprocess and can be imported directly from .inference when needed.
"""

from .orchestrator import InferenceOrchestrator, get_inference_backend
from .llama_cpp import LlamaCppBackend

# Expose InferenceOrchestrator as InferenceBackend for backward compat
InferenceBackend = InferenceOrchestrator

__all__ = [
    "InferenceBackend",
    "InferenceOrchestrator",
    "get_inference_backend",
    "LlamaCppBackend",
]


================================================
FILE: studio/backend/core/inference/audio_codecs.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Audio codec loading and decoding for TTS inference.
Supports: SNAC (Orpheus), CSM (Sesame), BiCodec (Spark), DAC (OuteTTS)
"""

import io
import re
import wave
import structlog
from loggers import get_logger
from typing import Optional, Tuple

import numpy as np
import torch

logger = get_logger(__name__)


def _numpy_to_wav_bytes(waveform: np.ndarray, sample_rate: int) -> bytes:
    """Convert a float32 numpy waveform to WAV bytes (16-bit PCM)."""
    waveform = waveform.flatten()
    peak = max(abs(waveform.max()), abs(waveform.min()))
    if peak > 1.0:
        waveform = waveform / peak
    pcm = (waveform * 32767).astype(np.int16)

    buf = io.BytesIO()
    with wave.open(buf, "wb") as wf:
        wf.setnchannels(1)
        wf.setsampwidth(2)
        wf.setframerate(sample_rate)
        wf.writeframes(pcm.tobytes())

    return buf.getvalue()


class AudioCodecManager:
    """Manages loading and caching of audio codec models for TTS decoding."""

    def __init__(self):
        self._snac_model = None
        self._bicodec_tokenizer = None
        self._bicodec_repo_path = None
        self._dac_audio_codec = None

    def load_codec(
        self,
        audio_type: str,
        device: str = "cuda",
        model_repo_path: Optional[str] = None,
    ) -> None:
        """Load the appropriate codec for the given audio type."""
        if audio_type == "snac":
            self._load_snac(device)
        elif audio_type == "bicodec":
            self._load_bicodec(device, model_repo_path)
        elif audio_type == "dac":
            self._load_dac(device)
        elif audio_type == "csm":
            pass  # CSM decoding is built into the model (output_audio=True)
        else:
            raise ValueError(f"Unknown audio_type: {audio_type}")

    # ── Lazy loaders ─────────────────────────────────────────────

    def _load_snac(self, device: str) -> None:
        if self._snac_model is not None:
            return
        from snac import SNAC

        self._snac_model = (
            SNAC.from_pretrained("hubertsiuzdak/snac_24khz").to(device).eval()
        )
        logger.info("Loaded SNAC codec (24kHz)")

    def _load_bicodec(self, device: str, model_repo_path: Optional[str] = None) -> None:
        if self._bicodec_tokenizer is not None:
            return
        import os
        import sys
        import subprocess

        # Clone SparkAudio/Spark-TTS GitHub repo for the sparktts Python package
        # (same approach as training — the HF model repos don't contain the package)
        spark_code_dir = os.path.join(
            os.path.dirname(model_repo_path or "."), "Spark-TTS"
        )
        sparktts_pkg = os.path.join(spark_code_dir, "sparktts")
        if not os.path.isdir(sparktts_pkg):
            logger.info(f"Cloning SparkAudio/Spark-TTS to {spark_code_dir}...")
            subprocess.run(
                [
                    "git",
                    "clone",
                    "--depth",
                    "1",
                    "https://github.com/SparkAudio/Spark-TTS",
                    spark_code_dir,
                ],
                check = True,
            )

        if spark_code_dir not in sys.path:
            sys.path.insert(0, spark_code_dir)

        from sparktts.models.audio_tokenizer import BiCodecTokenizer

        # BiCodecTokenizer needs the MODEL repo path (contains BiCodec/ weights)
        tokenizer_path = model_repo_path or spark_code_dir
        self._bicodec_repo_path = tokenizer_path
        self._bicodec_tokenizer = BiCodecTokenizer(tokenizer_path, device)
        logger.info(f"Loaded BiCodec tokenizer from {tokenizer_path}")

    def _load_dac(self, device: str) -> None:
        if self._dac_audio_codec is not None:
            return
        import os
        import sys
        import subprocess

        # Clone OuteTTS repo (same pattern as Spark-TTS / BiCodec)
        # The pip package has problematic dependencies; the notebook clones and
        # removes gguf_model.py, interface.py, __init__.py before importing.
        base_dir = os.path.dirname(os.path.abspath(__file__))
        outetts_code_dir = os.path.join(base_dir, "OuteTTS")
        outetts_pkg = os.path.join(outetts_code_dir, "outetts")
        if not os.path.isdir(outetts_pkg):
            logger.info(f"Cloning edwko/OuteTTS to {outetts_code_dir}...")
            subprocess.run(
                [
                    "git",
                    "clone",
                    "--depth",
                    "1",
                    "https://github.com/edwko/OuteTTS",
                    outetts_code_dir,
                ],
                check = True,
            )
            # Remove files that pull in heavy / incompatible dependencies
            # (matches notebook: gguf_model.py is under models/, others under outetts/)
            remove_paths = [
                os.path.join(outetts_pkg, "models", "gguf_model.py"),
                os.path.join(outetts_pkg, "interface.py"),
                os.path.join(outetts_pkg, "__init__.py"),
            ]
            for fpath in remove_paths:
                if os.path.exists(fpath):
                    os.remove(fpath)
                    logger.info(f"Removed {fpath}")

        if outetts_code_dir not in sys.path:
            sys.path.insert(0, outetts_code_dir)

        from outetts.version.v3.audio_processor import AudioProcessor
        from outetts.models.config import ModelConfig as OuteTTSModelConfig

        dummy_config = OuteTTSModelConfig(
            tokenizer_path = "OuteAI/Llama-OuteTTS-1.0-1B",
            device = device,
            audio_codec_path = None,
        )
        processor = AudioProcessor(config = dummy_config)
        self._dac_audio_codec = processor.audio_codec
        logger.info("Loaded DAC audio codec")

    # ── Decoders ─────────────────────────────────────────────────

    def decode_snac(
        self, generated_ids: torch.Tensor, device: str
    ) -> Tuple[bytes, int]:
        """
        Decode SNAC tokens (Orpheus) into WAV bytes.

        generated_ids: full model output including prompt tokens.
        Looks for START_OF_SPEECH (128257) marker, extracts codes after it,
        strips EOS (128258), redistributes 7-per-frame codes into 3 SNAC layers.

        Returns (wav_bytes, 24000).
        """
        # Find START_OF_SPEECH token (128257)
        token_indices = (generated_ids == 128257).nonzero(as_tuple = True)
        if len(token_indices[1]) > 0:
            cropped = generated_ids[:, token_indices[1][-1] + 1 :]
        else:
            # Gracefully fall back to using entire output if marker not found
            logger.warning(
                "No START_OF_SPEECH token (128257) found — using full generated output"
            )
            cropped = generated_ids
        row = cropped[0]

        # Remove EOS tokens (128258)
        row = row[row != 128258]

        # Trim to multiple of 7
        row = row[: (len(row) // 7) * 7]
        if len(row) == 0:
            raise ValueError("No valid audio codes found after START_OF_SPEECH token")

        codes = [t.item() - 128266 for t in row]

        # Redistribute into 3 SNAC layers (7 codes per frame → 1+2+4)
        layer_1, layer_2, layer_3 = [], [], []
        for i in range(len(codes) // 7):
            layer_1.append(codes[7 * i])
            layer_2.append(codes[7 * i + 1] - 4096)
            layer_3.append(codes[7 * i + 2] - 8192)
            layer_3.append(codes[7 * i + 3] - 12288)
            layer_2.append(codes[7 * i + 4] - 16384)
            layer_3.append(codes[7 * i + 5] - 20480)
            layer_3.append(codes[7 * i + 6] - 24576)

        snac_codes = [
            torch.tensor(layer).unsqueeze(0).to(device)
            for layer in [layer_1, layer_2, layer_3]
        ]

        with torch.no_grad():
            audio = self._snac_model.decode(snac_codes)

        waveform = audio.squeeze().cpu().numpy()
        return _numpy_to_wav_bytes(waveform, 24000), 24000

    def decode_csm(self, audio_values: torch.Tensor) -> Tuple[bytes, int]:
        """
        Decode CSM output (already a waveform from model.generate(output_audio=True)).
        Returns (wav_bytes, 24000).
        """
        waveform = audio_values[0].to(torch.float32).cpu().numpy()
        return _numpy_to_wav_bytes(waveform, 24000), 24000

    def decode_bicodec(self, generated_text: str, device: str) -> Tuple[bytes, int]:
        """
        Decode BiCodec tokens (Spark-TTS) from generated text.
        Extracts bicodec_semantic_N and bicodec_global_N tokens via regex.
        Returns (wav_bytes, sample_rate).
        """
        semantic_matches = re.findall(r"<\|bicodec_semantic_(\d+)\|>", generated_text)
        global_matches = re.findall(r"<\|bicodec_global_(\d+)\|>", generated_text)

        logger.info(
            f"BiCodec decode: {len(global_matches)} global tokens, {len(semantic_matches)} semantic tokens"
        )
        if len(global_matches) < 10:
            logger.info(
                f"BiCodec generated text (first 500 chars): {generated_text[:500]}"
            )

        if not semantic_matches:
            raise ValueError("No bicodec_semantic tokens found in generated output")

        semantic_ids = (
            torch.tensor([int(t) for t in semantic_matches]).long().unsqueeze(0)
        )

        # Speaker encoder expects exactly 32 global tokens (token_num=32 in BiCodec config).
        # Pad with zeros or truncate to 32.
        GLOBAL_TOKEN_NUM = 32
        if global_matches:
            raw = [int(t) for t in global_matches]
        else:
            raw = []
        if len(raw) < GLOBAL_TOKEN_NUM:
            raw = raw + [0] * (GLOBAL_TOKEN_NUM - len(raw))
        raw = raw[:GLOBAL_TOKEN_NUM]
        global_ids = torch.tensor(raw).long().unsqueeze(0)  # (1, 32)

        self._bicodec_tokenizer.device = device
        self._bicodec_tokenizer.model.to(device)

        wav_np = self._bicodec_tokenizer.detokenize(
            global_ids.to(device),
            semantic_ids.to(device),
        )
        sr = self._bicodec_tokenizer.config.get("sample_rate", 16000)
        return _numpy_to_wav_bytes(wav_np, sr), sr

    def decode_dac(self, generated_text: str, device: str) -> Tuple[bytes, int]:
        """
        Decode DAC tokens (OuteTTS) from generated text.
        Extracts c1_N and c2_N codec code tokens via regex.
        Returns (wav_bytes, 24000).
        """
        c1 = list(map(int, re.findall(r"<\|c1_(\d+)\|>", generated_text)))
        c2 = list(map(int, re.findall(r"<\|c2_(\d+)\|>", generated_text)))

        if not c1 or not c2:
            raise ValueError("No DAC code tokens (c1/c2) found in generated output")

        t = min(len(c1), len(c2))
        c1 = c1[:t]
        c2 = c2[:t]

        codes = torch.tensor([[c1, c2]], dtype = torch.int64).to(device)
        with torch.no_grad():
            audio = self._dac_audio_codec.decode(codes)

        waveform = audio.squeeze().cpu().numpy()
        return _numpy_to_wav_bytes(waveform, 24000), 24000

    def decode(
        self,
        audio_type: str,
        device: str,
        token_ids: Optional[list] = None,
        text: Optional[str] = None,
    ) -> Tuple[bytes, int]:
        """Unified decode — dispatches to the right codec decoder."""
        if audio_type == "snac":
            if not token_ids:
                raise ValueError("SNAC decoding requires token_ids")
            return self.decode_snac(torch.tensor([token_ids], dtype = torch.long), device)
        elif audio_type == "bicodec":
            if not text:
                raise ValueError("BiCodec decoding requires text")
            return self.decode_bicodec(text, device)
        elif audio_type == "dac":
            if not text:
                raise ValueError("DAC decoding requires text")
            return self.decode_dac(text, device)
        raise ValueError(f"Cannot decode audio_type: {audio_type}")

    # ── Cleanup ──────────────────────────────────────────────────

    def unload(self) -> None:
        """Release all codec models from memory."""
        if self._snac_model is not None:
            del self._snac_model
            self._snac_model = None
        if self._bicodec_tokenizer is not None:
            del self._bicodec_tokenizer
            self._bicodec_tokenizer = None
            self._bicodec_repo_path = None
        if self._dac_audio_codec is not None:
            del self._dac_audio_codec
            self._dac_audio_codec = None
        logger.info("Unloaded all audio codecs")


================================================
FILE: studio/backend/core/inference/defaults.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""Default model lists for inference, split by platform."""

import utils.hardware.hardware as hw

DEFAULT_MODELS_GGUF = [
    "unsloth/Llama-3.2-1B-Instruct-GGUF",
    "unsloth/Llama-3.2-3B-Instruct-GGUF",
    "unsloth/Llama-3.1-8B-Instruct-GGUF",
    "unsloth/gemma-3-1b-it-GGUF",
    "unsloth/gemma-3-4b-it-GGUF",
    "unsloth/Qwen3-4B-GGUF",
]

DEFAULT_MODELS_STANDARD = [
    "unsloth/Qwen3-4B-Instruct-2507",
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",
    "unsloth/Gemma-3-4B-it",
    "unsloth/Qwen2-VL-2B-Instruct-bnb-4bit",
]


def get_default_models() -> list[str]:
    hw.get_device()  # ensure detect_hardware() has run
    if hw.CHAT_ONLY:
        return list(DEFAULT_MODELS_GGUF)
    return list(DEFAULT_MODELS_STANDARD)


================================================
FILE: studio/backend/core/inference/inference.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Core inference backend - streamlined
"""

from unsloth import FastLanguageModel, FastVisionModel
from unsloth.chat_templates import get_chat_template
from transformers import TextStreamer
from peft import PeftModel, PeftModelForCausalLM

import json
import sys
import torch
from pathlib import Path
from typing import Optional, Union, Generator, Tuple
from utils.models import ModelConfig, get_base_model_from_lora
from utils.paths import is_model_cached
from utils.utils import format_error_message
from utils.hardware import get_device, clear_gpu_cache, log_gpu_memory
from core.inference.audio_codecs import AudioCodecManager
from io import StringIO
import structlog
from loggers import get_logger


logger = get_logger(__name__)


class HarmonyTextStreamer:
    """Streaming text decoder for gpt-oss harmony channel protocol.

    gpt-oss models emit multi-channel output using special tokens like
    ``<|channel|>analysis<|message|>...`` and ``<|channel|>final<|message|>...``.
    A plain ``TextIteratorStreamer(skip_special_tokens=True)`` strips the special
    tokens but leaves the channel names concatenated with content, producing
    garbled output such as ``analysisWe need to respond...assistantfinalHello!``.

    This streamer decodes with ``skip_special_tokens=False`` so the full
    harmony markup is visible, then uses **stateful incremental** parsing
    to emit properly-formatted text:

    - ``<think>`` emitted once when the ``analysis`` channel is first seen
    - Analysis content streamed incrementally
    - ``</think>`` emitted once when the ``final`` channel is first seen
    - Final content streamed incrementally

    This avoids the delta-on-transformed bug where wrapping tags shift
    position as content grows.

    Implements the same ``put`` / ``end`` / iterator interface as
    ``TextIteratorStreamer`` so ``generate_stream`` can use it as a drop-in
    replacement.
    """

    import re as _re

    _HARMONY_RE = _re.compile(
        r"<\|channel\|>(\w+)<\|message\|>(.*?)(?=<\|end\|>|<\|channel\|>|\Z)",
        _re.DOTALL,
    )

    def __init__(self, tokenizer, *, skip_prompt: bool = True, timeout: float = 0.2):
        import queue

        self.tokenizer = tokenizer
        self.skip_prompt = skip_prompt
        self.timeout = timeout

        self._queue: queue.Queue = queue.Queue()
        self._token_ids: list = []
        self._prompt_len: int = 0
        self._is_first_put: bool = True
        self._stop: bool = False

        # Stateful channel tracking — avoids delta-on-transformed bugs
        self._emitted_think_open: bool = False
        self._emitted_think_close: bool = False
        self._analysis_emitted: int = 0  # chars of analysis content emitted
        self._final_emitted: int = 0  # chars of final content emitted

    # ------------------------------------------------------------------
    # put / end — called from the generation thread
    # ------------------------------------------------------------------

    def put(self, value):
        """Receive new token IDs from model.generate()."""
        import torch

        if isinstance(value, torch.Tensor):
            # value shape: (batch, seq) — take first batch element
            ids = value[0].tolist() if value.dim() > 1 else value.tolist()
        elif isinstance(value, (list, tuple)):
            ids = list(value)
        else:
            ids = [value]

        if self._is_first_put and self.skip_prompt:
            # First call contains the full prompt; remember its length
            self._prompt_len = len(ids)
            self._token_ids = list(ids)
            self._is_first_put = False
            return

        self._token_ids.extend(ids)

        # Decode only the generated part (after the prompt)
        gen_ids = self._token_ids[self._prompt_len :]
        raw = self.tokenizer.decode(gen_ids, skip_special_tokens = False)
        self._process_incremental(raw)

    def end(self):
        """Signal generation is complete."""
        # Final decode to capture any remaining content
        gen_ids = self._token_ids[self._prompt_len :]
        if gen_ids:
            raw = self.tokenizer.decode(gen_ids, skip_special_tokens = False)
            self._process_incremental(raw)

        # Close any open think tags
        if self._emitted_think_open and not self._emitted_think_close:
            self._queue.put("</think>")
            self._emitted_think_close = True

        self._stop = True
        self._queue.put(None)  # sentinel

    # ------------------------------------------------------------------
    # Iterator interface — consumed by the streaming loop
    # ------------------------------------------------------------------

    def __iter__(self):
        return self

    def __next__(self):
        from queue import Empty

        while True:
            try:
                val = self._queue.get(timeout = self.timeout)
            except Empty:
                if self._stop:
                    raise StopIteration
                raise  # propagate Empty so caller can check thread liveness
            if val is None:
                raise StopIteration
            return val

    # ------------------------------------------------------------------
    # Stateful incremental harmony protocol parsing
    # ------------------------------------------------------------------

    def _process_incremental(self, raw: str) -> None:
        """Parse harmony channels and emit deltas per-channel.

        Instead of transforming the entire raw text and computing a string
        delta (which breaks when wrapping ``<think>`` tags shift position),
        this tracks per-channel content lengths and emits:

        - ``<think>`` once when analysis channel first appears
        - analysis content deltas (computed on channel content directly)
        - ``</think>`` once when final channel first appears
        - final content deltas
        """
        # If raw contains <|channel|> but no complete channel+message pair yet,
        # buffer silently — don't emit partial channel names as text.
        has_channel_token = "<|channel|>" in raw
        matches = list(self._HARMONY_RE.finditer(raw))

        if has_channel_token and not matches:
            # Partial harmony markup still building — wait for more tokens
            return

        if not has_channel_token and not matches:
            # No harmony protocol at all — should not happen for gpt-oss
            # but handle gracefully by not emitting anything
            return

        for m in matches:
            channel = m.group(1).lower()
            content = m.group(2)

            if channel == "analysis":
                if not self._emitted_think_open:
                    self._queue.put("<think>")
                    self._emitted_think_open = True

                new_content = content[self._analysis_emitted :]
                if new_content:
                    self._analysis_emitted = len(content)
                    self._queue.put(new_content)

            elif channel in ("final", "assistant"):
                if self._emitted_think_open and not self._emitted_think_close:
                    self._queue.put("</think>")
                    self._emitted_think_close = True

                new_content = content[self._final_emitted :]
                if new_content:
                    self._final_emitted = len(content)
                    self._queue.put(new_content)


class InferenceBackend:
    """Unified inference backend supporting text, vision, and LoRA models"""

    def __init__(self):
        self.models = {}
        self.active_model_name = None
        self.loading_models = set()
        self.loaded_local_models = []  # [(display_name, path), ...]
        from core.inference.defaults import get_default_models

        self.default_models = get_default_models()
        self.device = get_device().value
        self._audio_codec_manager = AudioCodecManager()

        # Thread safety — _generation_lock serializes model.generate() calls.
        # Must be a regular Lock (NOT RLock) because in async FastAPI, multiple
        # requests share the same event-loop thread, so RLock reentrancy lets
        # concurrent compare-mode requests race on the GPU.  The lock is
        # acquired by the *background generation thread*, not the event-loop.
        import threading

        self._generation_lock = threading.Lock()
        self._model_state_lock = threading.Lock()

        logger.info(f"InferenceBackend initialized on {self.device}")

    @staticmethod
    def _normalize_top_k(top_k: int) -> int:
        # API supports -1 as "disable top-k"; transformers expects 0 to disable.
        return 0 if top_k < 0 else top_k

    def load_model(
        self,
        config: ModelConfig,
        max_seq_length: int = 2048,
        dtype = None,
        load_in_4bit: bool = True,
        hf_token: Optional[str] = None,
        trust_remote_code: bool = False,
    ) -> bool:
        """
        Load any model: base, LoRA adapter, text, or vision.
        """
        try:
            model_name = config.identifier

            # Check if already loaded
            if model_name in self.models and self.models[model_name].get("model"):
                logger.info(f"Model {model_name} already loaded")
                self.active_model_name = model_name
                return True

            # Check if currently loading
            if model_name in self.loading_models:
                logger.info(f"Model {model_name} is already being loaded")
                return False

            self.loading_models.add(model_name)

            self.models[model_name] = {
                "is_vision": config.is_vision,
                "is_lora": config.is_lora,
                "is_audio": config.is_audio,
                "audio_type": config.audio_type,
                "has_audio_input": config.has_audio_input,
                "model_path": config.path,
                "base_model": config.base_model if config.is_lora else None,
                "loaded_adapters": {},
                "active_adapter": None,
            }

            # ── Audio model loading path ──────────────────────────
            if config.is_audio:
                audio_type = config.audio_type
                adapter_info = " (LoRA adapter)" if config.is_lora else ""
                logger.info(
                    f"Loading audio ({audio_type}) model{adapter_info}: {model_name}"
                )
                log_gpu_memory(f"Before loading {model_name}")

                if audio_type == "csm":
                    from unsloth import FastModel
                    from transformers import CsmForConditionalGeneration

                    model, processor = FastModel.from_pretrained(
                        config.path,
                        auto_model = CsmForConditionalGeneration,
                        load_in_4bit = False,
                        token = hf_token if hf_token and hf_token.strip() else None,
                        trust_remote_code = trust_remote_code,
                    )
                    FastModel.for_inference(model)
                    self.models[model_name]["model"] = model
                    self.models[model_name]["tokenizer"] = processor
                    self.models[model_name]["processor"] = processor
                elif audio_type == "bicodec":
                    import os
                    from unsloth import FastModel

                    if config.is_lora and config.base_model:
                        # LoRA adapter: load from local adapter path.
                        # base_model is e.g. /home/.../Spark-TTS-0.5B/LLM
                        # The BiCodec weights are in the parent dir (Spark-TTS-0.5B/).
                        base_path = config.base_model
                        if os.path.isdir(base_path):
                            abs_repo_path = os.path.abspath(os.path.dirname(base_path))
                        else:
                            # base_model is an HF ID — download it
                            from huggingface_hub import snapshot_download

                            local_dir = base_path.split("/")[-1]
                            repo_path = snapshot_download(
                                base_path, local_dir = local_dir
                            )
                            abs_repo_path = os.path.abspath(repo_path)

                        logger.info(
                            f"Spark-TTS LoRA: loading adapter from {config.path}, BiCodec from {abs_repo_path}"
                        )
                        model, tokenizer = FastModel.from_pretrained(
                            config.path,
                            dtype = torch.float32,
                            load_in_4bit = False,
                            token = hf_token if hf_token and hf_token.strip() else None,
                            trust_remote_code = trust_remote_code,
                        )
                    else:
                        # Base model: download full HF repo, then load from /LLM subfolder
                        from huggingface_hub import snapshot_download

                        hf_repo = config.path
                        local_dir = hf_repo.split("/")[-1]
                        repo_path = snapshot_download(hf_repo, local_dir = local_dir)
                        abs_repo_path = os.path.abspath(repo_path)
                        llm_path = os.path.join(abs_repo_path, "LLM")
                        logger.info(
                            f"Spark-TTS: downloaded repo to {repo_path}, loading LLM from {llm_path}"
                        )

                        model, tokenizer = FastModel.from_pretrained(
                            llm_path,
                            dtype = torch.float32,
                            load_in_4bit = False,
                            token = hf_token if hf_token and hf_token.strip() else None,
                            trust_remote_code = trust_remote_code,
                        )

                    FastModel.for_inference(model)
                    self.models[model_name]["model"] = model
                    self.models[model_name]["tokenizer"] = tokenizer
                    self.models[model_name]["model_repo_path"] = abs_repo_path
                elif audio_type == "dac":
                    # OuteTTS uses FastModel (not FastLanguageModel)
                    from unsloth import FastModel

                    model, tokenizer = FastModel.from_pretrained(
                        config.path,
                        max_seq_length = max_seq_length,
                        load_in_4bit = False,
                        token = hf_token if hf_token and hf_token.strip() else None,
                        trust_remote_code = trust_remote_code,
                    )
                    FastModel.for_inference(model)
                    self.models[model_name]["model"] = model
                    self.models[model_name]["tokenizer"] = tokenizer
                elif audio_type == "whisper":
                    # Whisper ASR — uses FastModel with WhisperForConditionalGeneration
                    from unsloth import FastModel
                    from transformers import WhisperForConditionalGeneration

                    model, tokenizer = FastModel.from_pretrained(
                        config.path,
                        auto_model = WhisperForConditionalGeneration,
                        whisper_language = "English",
                        whisper_task = "transcribe",
                        load_in_4bit = False,
                        token = hf_token if hf_token and hf_token.strip() else None,
                        trust_remote_code = trust_remote_code,
                    )
                    FastModel.for_inference(model)
                    model.eval()

                    # Create ASR pipeline (per notebook)
                    from transformers import pipeline as hf_pipeline

                    whisper_pipe = hf_pipeline(
                        "automatic-speech-recognition",
                        model = model,
                        tokenizer = tokenizer.tokenizer,
                        feature_extractor = tokenizer.feature_extractor,
                        processor = tokenizer,
                        return_language = True,
                        torch_dtype = torch.float16,
                    )
                    self.models[model_name]["model"] = model
                    self.models[model_name]["tokenizer"] = tokenizer
                    self.models[model_name]["whisper_pipeline"] = whisper_pipe
                else:
                    # SNAC (Orpheus) uses FastLanguageModel
                    model, tokenizer = FastLanguageModel.from_pretrained(
                        model_name = config.path,
                        max_seq_length = max_seq_length,
                        load_in_4bit = False,
                        token = hf_token if hf_token and hf_token.strip() else None,
                        trust_remote_code = trust_remote_code,
                    )
                    FastLanguageModel.for_inference(model)
                    self.models[model_name]["model"] = model
                    self.models[model_name]["tokenizer"] = tokenizer

                # Load the external codec for TTS audio types
                # (Whisper is ASR, audio_vlm is audio input — neither needs a codec)
                if audio_type not in ("whisper", "audio_vlm"):
                    model_repo_path = self.models[model_name].get("model_repo_path")
                    self._audio_codec_manager.load_codec(
                        audio_type, self.device, model_repo_path = model_repo_path
                    )

                self.active_model_name = model_name
                self.loading_models.discard(model_name)
                logger.info(f"Successfully loaded audio model: {model_name}")
                log_gpu_memory(f"After loading {model_name}")
                return True

            model_type = "vision" if config.is_vision else "text"
            adapter_info = (
                " (LoRA adapter)" if self.models[model_name]["is_lora"] else ""
            )
            logger.info(f"Loading {model_type} model{adapter_info}: {model_name}")
            log_gpu_memory(f"Before loading {model_name}")

            # Load model - same approach for base models and LoRA adapters
            if config.is_vision:
                # Vision model (or vision LoRA adapter)
                model, processor = FastVisionModel.from_pretrained(
                    model_name = config.path,  # Can be base model OR LoRA adapter path
                    max_seq_length = max_seq_length,
                    dtype = dtype,
                    load_in_4bit = load_in_4bit,
                    token = hf_token if hf_token and hf_token.strip() else None,
                    trust_remote_code = trust_remote_code,
                )

                # Apply inference optimization
                FastVisionModel.for_inference(model)

                # FastVisionModel may return a raw tokenizer (e.g. GemmaTokenizerFast)
                # instead of a proper Processor for some models (e.g. Gemma-3).
                # In that case, load the real processor from the base model.
                from transformers import ProcessorMixin

                if not (
                    isinstance(processor, ProcessorMixin)
                    or hasattr(processor, "image_processor")
                ):
                    # For LoRA adapters, use the base model. For local merged exports,
                    # read export_metadata.json to find the original base model.
                    processor_source = (
                        config.base_model if config.is_lora else config.identifier
                    )
                    if not config.is_lora and config.is_local:
                        _meta_path = Path(config.path) / "export_metadata.json"
                        try:
                            if _meta_path.exists():
                                _meta = json.loads(_meta_path.read_text())
                                if _meta.get("base_model"):
                                    processor_source = _meta["base_model"]
                        except Exception:
                            pass
                    logger.warning(
                        f"FastVisionModel returned {type(processor).__name__} (no image_processor) "
                        f"for '{model_name}' — loading proper processor from '{processor_source}'"
                    )
                    from transformers import AutoProcessor

                    processor = AutoProcessor.from_pretrained(
                        processor_source,
                        token = hf_token if hf_token and hf_token.strip() else None,
                        trust_remote_code = trust_remote_code,
                    )
                    logger.info(
                        f"Loaded {type(processor).__name__} from {processor_source}"
                    )

                self.models[model_name]["model"] = model
                self.models[model_name]["tokenizer"] = processor
                self.models[model_name]["processor"] = processor

            else:
                # Text model (or text LoRA adapter)
                model, tokenizer = FastLanguageModel.from_pretrained(
                    model_name = config.path,  # Can be base model OR LoRA adapter path
                    max_seq_length = max_seq_length,
                    dtype = dtype,
                    load_in_4bit = load_in_4bit,
                    token = hf_token if hf_token and hf_token.strip() else None,
                    trust_remote_code = trust_remote_code,
                )

                # Apply inference optimization
                FastLanguageModel.for_inference(model)

                self.models[model_name]["model"] = model
                self.models[model_name]["tokenizer"] = tokenizer

            # Load chat template info
            self._load_chat_template_info(model_name)

            self.active_model_name = model_name
            self.loading_models.discard(model_name)

            logger.info(f"Successfully loaded model: {model_name}")
            log_gpu_memory(f"After loading {model_name}")
            return True

        except Exception as e:
            logger.error(f"Failed to load model: {e}")
            error_msg = format_error_message(e, config.identifier)

            # Cleanup on failure
            if model_name in self.models:
                del self.models[model_name]
            self.loading_models.discard(model_name)

            raise Exception(error_msg)

    def unload_model(self, model_name: str) -> bool:
        """
        Completely removes a model from the registry and clears GPU memory.
        """
        if model_name in self.models:
            try:
                # If this was an audio model, clean up codecs
                if self.models[model_name].get("is_audio"):
                    self._audio_codec_manager.unload()

                logger.info(f"Unloading model '{model_name}' from memory.")
                # Delete the model entry from our registry
                del self.models[model_name]

                # Clear the active model if it was the one being unloaded
                if self.active_model_name == model_name:
                    self.active_model_name = None

                # Clear GPU memory cache
                clear_gpu_cache()

                # Remove stale compiled cache so the next model gets a fresh one
                from utils.cache_cleanup import clear_unsloth_compiled_cache

                clear_unsloth_compiled_cache()

                logger.info(f"Model '{model_name}' successfully unloaded.")
                return True
            except Exception as e:
                logger.error(f"Error while unloading model '{model_name}': {e}")
                return False
        else:
            logger.warning(
                f"Attempted to unload model '{model_name}', but it was not found in the registry."
            )
            return True

    def revert_to_base_model(self, base_model_name: str) -> bool:
        """
        Reverts the model to its pristine base state by unloading AND
        deleting all adapter configurations, as instructed.
        """
        if base_model_name not in self.models:
            return False

        model = self.models[base_model_name].get("model")

        try:
            # Step 1: Unload the adapter weights if model is a PeftModel.
            if isinstance(model, (PeftModel, PeftModelForCausalLM)):
                logger.info(f"Unloading LoRA adapters from '{base_model_name}'...")
                unwrapped_base_model = model.unload()
                self.models[base_model_name]["model"] = unwrapped_base_model
                model = unwrapped_base_model

            # Step 2: Clear any lingering peft_config from the unwrapped model.
            # After model.unload(), the base model may still carry a peft_config
            # attribute. Removing it ensures PeftModel.from_pretrained() gets
            # a clean base model without "multiple adapters" warnings.
            if hasattr(model, "peft_config"):
                del model.peft_config

            logger.info(f"Model '{base_model_name}' reverted to clean base state.")
            return True

        except Exception as e:
            logger.error(f"Failed to revert model to base state: {e}")
            import traceback

            logger.error(traceback.format_exc())
            return False

    def load_for_eval(
        self,
        lora_path: str,
        max_seq_length: int = 2048,
        dtype = None,
        load_in_4bit: bool = True,
        hf_token: Optional[str] = None,
    ) -> Tuple[bool, Optional[str], Optional[str]]:
        """
        Final Corrected Version:
        Ensures the base model and the specified adapter are loaded.
        This function is idempotent and handles all states correctly.
        """
        try:
            from utils.models import ModelConfig

            lora_config = ModelConfig.from_lora_path(lora_path, hf_token)
            if not lora_config:
                return False, None, None

            base_model_name = lora_config.base_model

            # 1. Load the base model if it's not already in memory
            if base_model_name not in self.models or not self.models[
                base_model_name
            ].get("model"):
                logger.info(f"Base model '{base_model_name}' not loaded, loading now.")
                base_config = ModelConfig.from_ui_selection(
                    base_model_name, None, is_lora = False
                )
                if not self.load_model(
                    base_config, max_seq_length, dtype, load_in_4bit, hf_token
                ):
                    return False, None, None

            self.active_model_name = base_model_name

            # 2. Determine the required adapter name from the user's selection
            adapter_name = lora_path.split("/")[-1].replace(".", "_")

            # 3. Call our robust load_adapter function to ensure this specific adapter is loaded.
            # It will only load from disk if the model doesn't already have it.
            adapter_success = self.load_adapter(
                base_model_name = base_model_name,
                adapter_path = lora_path,
                adapter_name = adapter_name,
            )
            if not adapter_success:
                return False, base_model_name, None

            # 4. Return the correct, verified adapter name for the UI logic to use.
            return True, base_model_name, adapter_name

        except Exception as e:
            logger.error(f"Error during load_for_eval: {e}")
            import traceback

            logger.error(traceback.format_exc())
            return False, None, None

    def load_adapter(
        self, base_model_name: str, adapter_path: str, adapter_name: str
    ) -> bool:
        """
        Loads an adapter onto the model ONLY if it's not already attached.
        """
        model = self.models[base_model_name].get("model")

        # Check if this adapter name is already part of the model's config. This is the most reliable check.
        if hasattr(model, "peft_config") and adapter_name in model.peft_config:
            logger.info(
                f"Adapter '{adapter_name}' is already attached to the model. Skipping load."
            )
            return True

        try:
            logger.info(
                f"Loading new adapter '{adapter_name}' from '{adapter_path}' onto {base_model_name}"
            )
            model.load_adapter(adapter_path, adapter_name = adapter_name)

            # Update our internal registry ONLY after a successful load.
            if "loaded_adapters" not in self.models[base_model_name]:
                self.models[base_model_name]["loaded_adapters"] = {}
            self.models[base_model_name]["loaded_adapters"][adapter_name] = adapter_path

            total_adapters = len(getattr(model, "peft_config", {}))
            logger.info(
                f"Adapter '{adapter_name}' loaded successfully. (Total unique adapters on model: {total_adapters})"
            )
            return True
        except Exception as e:
            logger.error(f"Failed to load adapter '{adapter_name}': {e}")
            return False

    def set_active_adapter(self, base_model_name: str, adapter_name: str) -> bool:
        """
        Sets the active adapter for generation. This replaces the flawed 'enable_adapter'.
        """
        model = self.models[base_model_name].get("model")
        try:
            logger.info(f"Setting active adapter to: '{adapter_name}'")
            model.set_adapter(adapter_name)
            self.models[base_model_name]["active_adapter"] = adapter_name
            return True
        except Exception as e:
            # This will catch the "adapter not found" error if something goes wrong.
            logger.error(f"Failed to set active adapter to '{adapter_name}': {e}")
            return False

    def _apply_adapter_state(self, use_adapter: Optional[Union[bool, str]]) -> None:
        """
        Apply adapter state before generation. Must be called under _generation_lock.

        Uses PEFT's disable_adapter_layers() / enable_adapter_layers() which toggle
        a boolean flag on each LoRA layer. Unsloth's fast_linear_forward checks this
        flag (proj.disable_adapters) and skips LoRA computation when True.
        This is non-destructive — no model unloading/reloading needed.

        Args:
            use_adapter: None = no change, False = disable (base model),
                         True = enable current adapter, str = enable specific adapter.
        """
        if use_adapter is None:
            return

        base = self.active_model_name
        if not base or base not in self.models:
            return

        model_info = self.models[base]
        model = model_info.get("model")
        if model is None:
            return

        if use_adapter is False:
            # Disable LoRA layers → base model output
            if isinstance(model, (PeftModel, PeftModelForCausalLM)):
                logger.info(
                    f"Compare mode: disabling adapters on '{base}' for base model generation"
                )
                model.base_model.disable_adapter_layers()
            else:
                logger.info(
                    f"Compare mode: model '{base}' is not a PeftModel, already base"
                )

        elif use_adapter is True:
            # Re-enable LoRA layers → adapter output
            if isinstance(model, (PeftModel, PeftModelForCausalLM)):
                logger.info(
                    f"Compare mode: enabling adapters on '{base}' for LoRA generation"
                )
                model.base_model.enable_adapter_layers()
            else:
                logger.warning("use_adapter=true but model is not a PeftModel")

        elif isinstance(use_adapter, str):
            # Enable adapters and set the specific one active
            if isinstance(model, (PeftModel, PeftModelForCausalLM)):
                logger.info(
                    f"Compare mode: enabling adapter '{use_adapter}' on '{base}'"
                )
                model.base_model.enable_adapter_layers()
                self.set_active_adapter(base, use_adapter)
            else:
                logger.warning(
                    f"use_adapter='{use_adapter}' but model is not a PeftModel"
                )

    def generate_with_adapter_control(
        self,
        use_adapter: Optional[Union[bool, str]] = None,
        cancel_event = None,
        **gen_kwargs,
    ) -> Generator[str, None, None]:
        """
        Thread-safe generation with optional adapter toggling.

        The adapter toggle + model.generate() are serialized by _generation_lock
        inside the background generation thread — NOT in the event-loop thread.
        This prevents the RLock-reentrant race that occurs when two async SSE
        handlers share the same event-loop thread.

        Args:
            use_adapter: Adapter control (None/False/True/str). See _apply_adapter_state.
            **gen_kwargs: Forwarded to generate_chat_response.
        """
        yield from self._generate_chat_response_inner(
            cancel_event = cancel_event, _adapter_state = use_adapter, **gen_kwargs
        )

    def generate_chat_response(
        self,
        messages: list,
        system_prompt: str,
        image = None,
        temperature: float = 0.7,
        top_p: float = 0.9,
        top_k: int = 40,
        min_p: float = 0.0,
        max_new_tokens: int = 256,
        repetition_penalty: float = 1.0,
        cancel_event = None,
    ) -> Generator[str, None, None]:
        """
        Generate response for text or vision models.
        The generation lock is acquired by the background generation thread.
        """
        yield from self._generate_chat_response_inner(
            messages = messages,
            system_prompt = system_prompt,
            image = image,
            temperature = temperature,
            top_p = top_p,
            top_k = top_k,
            min_p = min_p,
            max_new_tokens = max_new_tokens,
            repetition_penalty = repetition_penalty,
            cancel_event = cancel_event,
        )

    def _generate_chat_response_inner(
        self,
        messages: list,
        system_prompt: str = "",
        image = None,
        temperature: float = 0.7,
        top_p: float = 0.9,
        top_k: int = 40,
        min_p: float = 0.0,
        max_new_tokens: int = 256,
        repetition_penalty: float = 1.0,
        cancel_event = None,
        _adapter_state = None,
    ) -> Generator[str, None, None]:
        """
        Inner generation logic. Called by both generate_chat_response
        and generate_with_adapter_control.

        _adapter_state is passed to generate_stream/vision so the background
        thread can toggle adapters under the generation lock.
        """
        if not self.active_model_name:
            yield "Error: No active model"
            return

        model_info = self.models[self.active_model_name]
        is_vision = model_info.get("is_vision", False)
        tokenizer = model_info.get("tokenizer") or model_info.get("processor")
        # Unwrap processor → raw tokenizer for VLMs on the text path
        tokenizer = getattr(tokenizer, "tokenizer", tokenizer)
        top_k = self._normalize_top_k(top_k)

        if is_vision and image:
            # Vision model generation (only when an image is actually provided)
            # Check that the stored processor can actually handle images.
            # FastVisionModel may return a raw tokenizer (e.g. GemmaTokenizerFast)
            # instead of a proper ProcessorMixin for some models (e.g. Gemma-3).
            from transformers import ProcessorMixin

            processor = model_info.get("processor")
            has_image_processing = processor is not None and (
                isinstance(processor, ProcessorMixin)
                or hasattr(processor, "image_processor")
            )
            if has_image_processing:
                yield from self._generate_vision_response(
                    messages,
                    system_prompt,
                    image,
                    temperature,
                    top_p,
                    top_k,
                    min_p,
                    max_new_tokens,
                    repetition_penalty,
                    cancel_event = cancel_event,
                )
                return
            else:
                logger.warning(
                    f"Model '{self.active_model_name}' is marked as vision but its processor "
                    f"({type(processor).__name__}) has no image_processor — "
                    f"falling back to text-only generation (image will be ignored)."
                )

        # Text path: Use training pipeline approach
        # Messages are already in ChatML format from eval.py

        # Step 1: Apply get_chat_template if model is in mapper
        try:
            from utils.datasets import (
                MODEL_TO_TEMPLATE_MAPPER,
                get_tokenizer_chat_template,
            )

            model_name_lower = self.active_model_name.lower()

            # Check if model has a registered template
            if model_name_lower in MODEL_TO_TEMPLATE_MAPPER:
                template_name = MODEL_TO_TEMPLATE_MAPPER[model_name_lower]
                logger.info(
                    f"Applying chat template '{template_name}' for {self.active_model_name}"
                )

                # This modifies the tokenizer with the correct template
                tokenizer = get_chat_template(
                    tokenizer,
                    chat_template = template_name,
                )
            else:
                logger.info(
                    f"No registered Unsloth template for {self.active_model_name}, using tokenizer default"
                )
        except Exception as e:
            logger.warning(f"Could not apply get_chat_template: {e}")

        # Step 2: Format with tokenizer.apply_chat_template()
        try:
            if not (hasattr(tokenizer, "chat_template") and tokenizer.chat_template):
                raise ValueError(
                    f"Model '{self.active_model_name}' has no chat_template set in its "
                    f"tokenizer_config.json. This is usually a problem with the model's "
                    f"HuggingFace repository — it is missing a 'chat_template' key. "
                    f"Please use a model that includes a chat template, or manually set "
                    f"one via tokenizer.chat_template before inference."
                )
            formatted_prompt = tokenizer.apply_chat_template(
                messages, tokenize = False, add_generation_prompt = True
            )
            logger.debug(f"Formatted prompt: {formatted_prompt[:200]}...")
        except Exception as e:
            logger.error(f"Error applying chat template: {e}")
            # Fallback to manual formatting
            formatted_prompt = self.format_chat_prompt(messages, system_prompt)

        # Step 3: Generate
        yield from self.generate_stream(
            formatted_prompt,
            temperature,
            top_p,
            top_k,
            min_p,
            max_new_tokens,
            repetition_penalty,
            cancel_event = cancel_event,
            _adapter_state = _adapter_state,
        )

    def _generate_vision_response(
        self,
        messages,
        system_prompt,
        image,
        temperature,
        top_p,
        top_k,
        min_p,
        max_new_tokens,
        repetition_penalty,
        cancel_event = None,
    ) -> Generator[str, None, None]:
        """Handle vision model generation with true token-by-token streaming."""
        model_info = self.models[self.active_model_name]
        model = model_info["model"]
        processor = model_info["processor"]
        # FastVisionModel may return a raw tokenizer (e.g. GemmaTokenizerFast)
        # instead of a Processor for some models. Safe unwrap for tokenize-only ops.
        raw_tokenizer = getattr(processor, "tokenizer", processor)

        # Extract user message
        user_message = ""
        if messages and messages[-1]["role"] == "user":
            import re

            user_message = messages[-1]["content"]
            user_message = re.sub(r"<img[^>]*>", "", user_message).strip()

        if not user_message:
            user_message = "Describe this image." if image else "Hello"

        # Prepare vision messages
        if image:
            vision_messages = [
                {
                    "role": "user",
                    "content": [
                        {"type": "image"},
                        {"type": "text", "text": user_message},
                    ],
                }
            ]

            input_text = processor.apply_chat_template(
                vision_messages, add_generation_prompt = True, tokenize = False
            )
            inputs = processor(
                image,
                input_text,
                add_special_tokens = False,
                return_tensors = "pt",
            ).to(self.device)
        else:
            # Text-only for vision model
            formatted_prompt = self.format_chat_prompt(messages, system_prompt)
            inputs = raw_tokenizer(formatted_prompt, return_tensors = "pt").to(
                self.device
            )

        # Stream with TextIteratorStreamer + background thread
        try:
            from transformers import TextIteratorStreamer
            import threading

            streamer = TextIteratorStreamer(
                raw_tokenizer,
                skip_prompt = True,
                skip_special_tokens = True,
                timeout = 0.2,
            )

            generation_kwargs = dict(
                **inputs,
                streamer = streamer,
                max_new_tokens = max_new_tokens,
                use_cache = True,
                do_sample = temperature > 0,
                temperature = temperature,
                top_p = top_p,
                top_k = top_k,
                min_p = min_p,
            )

            err: dict[str, str] = {}

            def generate_fn():
                with self._generation_lock:
                    try:
                        model.generate(**generation_kwargs)
                    except Exception as e:
                        err["msg"] = str(e)
                        logger.error(f"Vision generation error in thread: {e}")
                    finally:
                        try:
                            streamer.end()
                        except Exception:
                            pass

            thread = threading.Thread(target = generate_fn)
            thread.start()

            output = ""
            from queue import Empty

            generation_complete = False
            try:
                while True:
                    if cancel_event is not None and cancel_event.is_set():
                        break
                    try:
                        new_token = next(streamer)
                    except StopIteration:
                        generation_complete = True
                        break
                    except Empty:
                        if not thread.is_alive():
                            generation_complete = True
                            break
                        continue
                    if new_token:
                        output += new_token
                        cleaned = self._clean_generated_text(output)
                        yield cleaned
            finally:
                if cancel_event is not None and not generation_complete:
                    cancel_event.set()
                thread.join(timeout = 10)
                if thread.is_alive():
                    logger.warning(
                        "Vision generation thread did not exit after cancel/join timeout"
                    )

            if err.get("msg"):
                yield f"Error: {err['msg']}"

        except Exception as e:
            logger.error(f"Vision generation error: {e}")
            yield f"Error: {str(e)}"

    def generate_audio_input_response(
        self,
        messages,
        system_prompt,
        audio_array,
        temperature,
        top_p,
        top_k,
        min_p,
        max_new_tokens,
        repetition_penalty,
        cancel_event = None,
    ) -> Generator[str, None, None]:
        """Handle audio input (ASR) generation — accepts audio numpy array, streams text output.

        Uses processor.apply_chat_template with audio embedded in messages (Gemma 3n pattern).
        """
        import threading
        import numpy as np

        model_info = self.models[self.active_model_name]
        model = model_info["model"]
        processor = model_info.get("processor") or model_info.get("tokenizer")
        raw_tokenizer = getattr(processor, "tokenizer", processor)

        # Extract last user text — default matches notebook prompt
        user_text = "Please transcribe this audio."
        if messages:
            for msg in reversed(messages):
                if msg["role"] == "user" and msg.get("content"):
                    user_text = msg["content"]
                    break

        # Use ASR-specific system prompt if user hasn't set a custom one
        if not system_prompt:
            system_prompt = "You are an assistant that transcribes speech accurately."

        # Build messages in Gemma 3n format — audio goes INTO apply_chat_template
        audio_messages = [
            {"role": "system", "content": [{"type": "text", "text": system_prompt}]},
            {
                "role": "user",
                "content": [
                    {"type": "audio", "audio": audio_array},
                    {"type": "text", "text": user_text},
                ],
            },
        ]

        # apply_chat_template handles audio embedding + tokenization in one step
        inputs = processor.apply_chat_template(
            audio_messages,
            add_generation_prompt = True,
            tokenize = True,
            return_dict = True,
            return_tensors = "pt",
            truncation = False,
        ).to(self.device)

        try:
            from transformers import TextIteratorStreamer
            from queue import Empty

            streamer = TextIteratorStreamer(
                raw_tokenizer,
                skip_prompt = True,
                skip_special_tokens = True,
                timeout = 0.2,
            )

            # Notebook uses do_sample=False for ASR (greedy decoding for accuracy)
            generation_kwargs = dict(
                **inputs,
                streamer = streamer,
                max_new_tokens = max_new_tokens,
                use_cache = True,
                do_sample = False,
            )

            err: dict[str, str] = {}

            def generate_fn():
                with self._generation_lock:
                    try:
                        model.generate(**generation_kwargs)
                    except Exception as e:
                        err["msg"] = str(e)
                        logger.error(f"Audio input generation error in thread: {e}")
                    finally:
                        try:
                            streamer.end()
                        except Exception:
                            pass

            thread = threading.Thread(target = generate_fn)
            thread.start()

            output = ""
            try:
                while True:
                    if cancel_event is not None and cancel_event.is_set():
                        break
                    try:
                        new_token = next(streamer)
                    except StopIteration:
                        break
                    except Empty:
                        if not thread.is_alive():
                            break
                        continue
                    if new_token:
                        output += new_token
                        yield new_token
            finally:
                if cancel_event is not None:
                    cancel_event.set()
                thread.join(timeout = 10)
                if thread.is_alive():
                    logger.warning(
                        "Audio input generation thread did not exit after cancel/join timeout"
                    )

            if err.get("msg"):
                yield f"Error: {err['msg']}"

        except Exception as e:
            logger.error(f"Audio input generation error: {e}")
            yield f"Error: {str(e)}"

    def generate_whisper_response(
        self, audio_array, cancel_event = None
    ) -> Generator[str, None, None]:
        """Whisper ASR — takes audio numpy array, yields transcribed text.

        Uses the pre-built transformers pipeline (created during model loading).
        """
        model_info = self.models[self.active_model_name]
        whisper_pipe = model_info.get("whisper_pipeline")
        if not whisper_pipe:
            yield "Error: Whisper pipeline not initialized"
            return

        try:
            with self._generation_lock:
                result = whisper_pipe({"raw": audio_array, "sampling_rate": 16000})

            text = result.get("text", "") if isinstance(result, dict) else str(result)
            if text:
                yield text
        except Exception as e:
            logger.error(f"Whisper ASR error: {e}")
            yield f"Error: {str(e)}"

    def _is_gpt_oss_model(self, model_name: str = None) -> bool:
        """Check if the given (or active) model uses the gpt-oss harmony protocol."""
        name = (model_name or self.active_model_name or "").lower()
        try:
            from utils.datasets import MODEL_TO_TEMPLATE_MAPPER

            # Exact match
            if MODEL_TO_TEMPLATE_MAPPER.get(name) == "gpt-oss":
                return True
            # Partial match (e.g. name-bnb-4bit variants)
            for key, tmpl in MODEL_TO_TEMPLATE_MAPPER.items():
                if tmpl == "gpt-oss" and (key in name or name in key):
                    return True
        except Exception:
            pass
        return "gpt-oss" in name

    def generate_stream(
        self,
        prompt: str,
        temperature: float = 0.7,
        top_p: float = 0.9,
        top_k: int = 40,
        min_p: float = 0.0,
        max_new_tokens: int = 256,
        repetition_penalty: float = 1.0,
        cancel_event = None,
        _adapter_state = None,
    ) -> Generator[str, None, None]:
        """Generate streaming text response (text models only).

        _adapter_state: if not None, the background thread toggles adapters
        before model.generate(), all under _generation_lock.
        """
        if not self.active_model_name:
            yield "Error: No active model"
            return

        model_info = self.models[self.active_model_name]
        model = model_info["model"]
        # For VLMs the stored "tokenizer" is actually the processor.
        # Unwrap to get the real tokenizer so TextIteratorStreamer's
        # skip_prompt / skip_special_tokens work correctly.
        tokenizer = model_info["tokenizer"]
        tokenizer = getattr(tokenizer, "tokenizer", tokenizer)

        try:
            inputs = tokenizer(prompt, return_tensors = "pt").to(model.device)

            from transformers import TextIteratorStreamer
            import threading

            # Use HarmonyTextStreamer for gpt-oss models to properly parse
            # the multi-channel harmony protocol into <think> tags
            if self._is_gpt_oss_model():
                try:
                    streamer = HarmonyTextStreamer(
                        tokenizer,
                        skip_prompt = True,
                        timeout = 0.2,
                    )
                except Exception as e:
                    logger.warning(
                        f"HarmonyTextStreamer init failed, falling back: {e}"
                    )
                    streamer = TextIteratorStreamer(
                        tokenizer,
                        skip_prompt = True,
                        skip_special_tokens = True,
                        timeout = 0.2,
                    )
            else:
                streamer = TextIteratorStreamer(
                    tokenizer,
                    skip_prompt = True,
                    skip_special_tokens = True,
                    timeout = 0.2,
                )

            generation_kwargs = dict(
                **inputs,
                streamer = streamer,
                max_new_tokens = max_new_tokens,
                temperature = temperature,
                top_p = top_p,
                top_k = top_k,
                min_p = min_p,
                repetition_penalty = repetition_penalty,
                do_sample = temperature > 0,
                eos_token_id = tokenizer.eos_token_id,
                pad_token_id = tokenizer.eos_token_id
                if tokenizer.pad_token_id is None
                else tokenizer.pad_token_id,
            )
            if cancel_event is not None:
                from transformers.generation.stopping_criteria import (
                    StoppingCriteria,
                    StoppingCriteriaList,
                )

                class _CancelCriteria(StoppingCriteria):
                    def __init__(self, ev):
                        self.ev = ev

                    def __call__(self, input_ids, scores, **kwargs):
                        return self.ev.is_set()

                generation_kwargs["stopping_criteria"] = StoppingCriteriaList(
                    [_CancelCriteria(cancel_event)]
                )

            def generate_fn():
                with self._generation_lock:
                    try:
                        if _adapter_state is not None:
                            self._apply_adapter_state(_adapter_state)
                        model.generate(**generation_kwargs)
                    except Exception as e:
                        err["msg"] = str(e)
                        logger.error(f"Generation error: {e}")
                    finally:
                        try:
                            streamer.end()
                        except Exception:
                            pass

            err: dict[str, str] = {}
            thread = threading.Thread(target = generate_fn)
            thread.start()

            output = ""
            from queue import Empty

            generation_complete = False
            try:
                while True:
                    if cancel_event is not None and cancel_event.is_set():
                        break
                    try:
                        new_token = next(streamer)
                    except StopIteration:
                        generation_complete = True
                        break
                    except Empty:
                        if not thread.is_alive():
                            generation_complete = True
                            break
                        continue
                    if new_token:
                        output += new_token
                        cleaned = self._clean_generated_text(output)
                        yield cleaned
            finally:
                # Only set cancel_event when we exited early (user cancel),
                # NOT on normal completion.  cancel_event is a shared mp.Event
                # — setting it unconditionally would leave a stale cancel
                # signal that could interfere with the next serialized
                # generation request (e.g. in compare mode).
                if cancel_event is not None and not generation_complete:
                    cancel_event.set()
                thread.join(timeout = 10)
                if thread.is_alive():
                    logger.warning(
                        "Generation thread did not exit after cancel/join timeout"
                    )

            if err.get("msg"):
                yield f"Error: {err['msg']}"

        except Exception as e:
            logger.error(f"Error during generation: {e}")
            yield f"Error: {str(e)}"

    # ── Audio (TTS) Generation ────────────────────────────────────

    def generate_audio_response(
        self,
        text: str,
        temperature: float = 0.6,
        top_p: float = 0.95,
        top_k: int = 50,
        min_p: float = 0.0,
        max_new_tokens: int = 2048,
        repetition_penalty: float = 1.0,
        use_adapter: Optional[Union[bool, str]] = None,
    ) -> Tuple[bytes, int]:
        """
        Generate audio from text for TTS models.
        Returns (wav_bytes, sample_rate).
        Blocking — generates complete audio before returning.
        """
        if not self.active_model_name:
            raise RuntimeError("No active model")

        model_info = self.models[self.active_model_name]
        audio_type = model_info.get("audio_type")
        model = model_info["model"]
        tokenizer = model_info.get("tokenizer")

        if not audio_type:
            raise RuntimeError(f"Model {self.active_model_name} is not an audio model")

        top_k = self._normalize_top_k(top_k)

        with self._generation_lock:
            if use_adapter is not None:
                self._apply_adapter_state(use_adapter)

            if audio_type == "snac":
                return self._generate_snac(
                    model,
                    tokenizer,
                    text,
                    temperature,
                    top_p,
                    max_new_tokens,
                    repetition_penalty,
                )
            elif audio_type == "csm":
                processor = model_info.get("processor", tokenizer)
                return self._generate_csm(model, processor, text, max_new_tokens)
            elif audio_type == "bicodec":
                return self._generate_bicodec(
                    model, tokenizer, text, temperature, top_k, max_new_tokens
                )
            elif audio_type == "dac":
                return self._generate_dac(
                    model,
                    tokenizer,
                    text,
                    temperature,
                    top_k,
                    top_p,
                    min_p,
                    max_new_tokens,
                    repetition_penalty,
                )
            else:
                raise RuntimeError(f"Unknown audio_type: {audio_type}")

    def _generate_snac(
        self,
        model,
        tokenizer,
        text,
        temperature,
        top_p,
        max_new_tokens,
        repetition_penalty,
    ):
        """Generate audio using SNAC codec (Orpheus)."""
        device = model.device
        start_token = torch.tensor([[128259]], device = device)  # START_OF_HUMAN
        end_tokens = torch.tensor(
            [[128009, 128260]], device = device
        )  # EOT, END_OF_HUMAN
        text_ids = tokenizer(text, return_tensors = "pt").input_ids.to(device)
        input_ids = torch.cat([start_token, text_ids, end_tokens], dim = 1)
        attention_mask = torch.ones_like(input_ids)

        generated = model.generate(
            input_ids = input_ids,
            attention_mask = attention_mask,
            max_new_tokens = max_new_tokens,
            do_sample = True,
            temperature = temperature,
            top_p = top_p,
            repetition_penalty = repetition_penalty,
            eos_token_id = 128258,  # END_OF_SPEECH
            use_cache = True,
        )
        return self._audio_codec_manager.decode_snac(generated, str(device))

    def _generate_csm(self, model, processor, text, max_new_tokens):
        """Generate audio using CSM (Sesame)."""
        speaker_id = 0
        inputs = processor(
            f"[{speaker_id}]{text}", add_special_tokens = True, return_tensors = "pt"
        ).to(model.device)
        audio_values = model.generate(
            **inputs, max_new_tokens = max_new_tokens, output_audio = True
        )
        return self._audio_codec_manager.decode_csm(audio_values)

    def _generate_bicodec(
        self, model, tokenizer, text, temperature, top_k, max_new_tokens
    ):
        """Generate audio using BiCodec (Spark-TTS)."""
        prompt = (
            "<|task_tts|><|start_content|>"
            + text
            + "<|end_content|><|start_global_token|>"
        )
        inputs = tokenizer([prompt], return_tensors = "pt").to(model.device)
        generated = model.generate(
            **inputs,
            max_new_tokens = max_new_tokens,
            do_sample = True,
            temperature = temperature,
            top_k = top_k,
            eos_token_id = tokenizer.eos_token_id,
            pad_token_id = tokenizer.pad_token_id,
        )
        new_tokens = generated[:, inputs.input_ids.shape[1] :]
        decoded_text = tokenizer.batch_decode(new_tokens, skip_special_tokens = False)[0]
        return self._audio_codec_manager.decode_bicodec(decoded_text, str(model.device))

    def _generate_dac(
        self,
        model,
        tokenizer,
        text,
        temperature,
        top_k,
        top_p,
        min_p,
        max_new_tokens,
        repetition_penalty,
    ):
        """Generate audio using DAC (OuteTTS). Follows Oute_TTS_(1B).ipynb exactly."""
        # Monkey-patch RepetitionPenaltyLogitsProcessor with a 64-token penalty
        # window (same as the OuteTTS notebook) to avoid degenerate repetition.
        self._patch_repetition_penalty_processor()

        prompt = (
            "<|im_start|>\n<|text_start|>"
            + text
            + "<|text_end|>\n<|audio_start|><|global_features_start|>\n"
        )
        with torch.inference_mode():
            with torch.amp.autocast("cuda", dtype = model.dtype):
                inputs = tokenizer([prompt], return_tensors = "pt").to(model.device)
                generated = model.generate(
                    **inputs,
                    temperature = temperature,
                    top_k = top_k,
                    top_p = top_p,
                    min_p = min_p,
                    repetition_penalty = repetition_penalty,
                    max_new_tokens = max_new_tokens,
                )
        decoded_text = tokenizer.batch_decode(generated, skip_special_tokens = False)[0]
        return self._audio_codec_manager.decode_dac(decoded_text, str(model.device))

    _repetition_penalty_patched = False

    @classmethod
    def _patch_repetition_penalty_processor(cls):
        """
        Monkey-patch transformers' RepetitionPenaltyLogitsProcessor with a
        64-token sliding window variant (from the OuteTTS notebook).
        Only applied once per process.
        """
        if cls._repetition_penalty_patched:
            return
        cls._repetition_penalty_patched = True

        from transformers import LogitsProcessor
        import transformers.generation.utils as generation_utils

        class RepetitionPenaltyLogitsProcessorPatch(LogitsProcessor):
            def __init__(self, penalty: float):
                self.penalty_last_n = 64
                if not isinstance(penalty, float) or penalty <= 0:
                    raise ValueError(
                        f"`penalty` has to be a positive float, but is {penalty}"
                    )
                self.penalty = penalty

            @torch.no_grad()
            def __call__(
                self, input_ids: torch.LongTensor, scores: torch.FloatTensor
            ) -> torch.FloatTensor:
                if self.penalty_last_n == 0 or self.penalty == 1.0:
                    return scores
                batch_size, seq_len = input_ids.shape
                vocab_size = scores.shape[-1]
                for b in range(batch_size):
                    start_index = max(0, seq_len - self.penalty_last_n)
                    window_indices = input_ids[b, start_index:]
                    if window_indices.numel() == 0:
                        continue
                    for token_id in set(window_indices.tolist()):
                        if token_id >= vocab_size:
                            continue
                        logit = scores[b, token_id]
                        scores[b, token_id] = (
                            logit * self.penalty if logit <= 0 else logit / self.penalty
                        )
                return scores

        generation_utils.RepetitionPenaltyLogitsProcessor = (
            RepetitionPenaltyLogitsProcessorPatch
        )
        logger.info(
            "Patched RepetitionPenaltyLogitsProcessor with 64-token window for OuteTTS"
        )

    def format_chat_prompt(self, messages: list, system_prompt: str = None) -> str:
        if not self.active_model_name or self.active_model_name not in self.models:
            logger.error("No active model available")
            return ""

        if self.models[self.active_model_name].get("tokenizer") is None:
            logger.error("Tokenizer not loaded for active model")
            return ""

        chat_template_info = self.models[self.active_model_name].get(
            "chat_template_info", {}
        )
        tokenizer = self.models[self.active_model_name]["tokenizer"]
        tokenizer = getattr(tokenizer, "tokenizer", tokenizer)

        chat_messages = []

        if system_prompt:
            chat_messages.append({"role": "system", "content": system_prompt})

        last_role = "system" if system_prompt else None

        for msg in messages:
            role = msg.get("role", "")
            content = msg.get("content", "")

            if role in ["system", "user", "assistant"] and content.strip():
                if role == last_role:
                    logger.debug(
                        f"Skipping consecutive {role} message to maintain alternation"
                    )
                    continue

                if role == "user":
                    import re

                    clean_content = re.sub(r"<[^>]+>", "", content).strip()
                    if clean_content:
                        chat_messages.append({"role": role, "content": clean_content})
                        last_role = role
                elif role == "assistant" and content.strip():
                    chat_messages.append({"role": role, "content": content})
                    last_role = role
                elif role == "system":
                    continue

        if chat_messages and chat_messages[-1]["role"] == "assistant":
            logger.debug(
                "Removing final assistant message to ensure proper alternation"
            )
            chat_messages.pop()

        logger.info(f"Sending {len(chat_messages)} messages to tokenizer:")
        for i, msg in enumerate(chat_messages):
            logger.info(f"  {i}: {msg['role']} - {msg['content'][:50]}...")

        try:
            formatted_prompt = tokenizer.apply_chat_template(
                chat_messages, tokenize = False, add_generation_prompt = True
            )
            logger.info(f"Successfully applied tokenizer's native chat template")
            return formatted_prompt
        except Exception as e:
            error_msg = str(e).lower()
            if (
                "chat_template is not set" in error_msg
                or "no template argument" in error_msg
            ):
                logger.info(
                    f"Base model detected - no built-in chat template available, using fallback formatting"
                )
            else:
                logger.warning(f"Failed to apply tokenizer chat template: {e}")
            logger.debug(
                f"""Failed with messages: {[f"{m['role']}: {m['content'][:30]}..." for m in chat_messages]}"""
            )

        if chat_template_info.get("has_template", False):
            logger.info(
                "Falling back to manual template formatting based on detected patterns"
            )
            template_type = chat_template_info.get("format_type", "generic")
            manual_prompt = self._format_chat_manual(
                chat_messages,
                template_type,
                chat_template_info.get("special_tokens", {}),
            )
            logger.info(f"Manual template result: {manual_prompt[:200]}...")
            return manual_prompt
        else:
            logger.info("Using generic chat formatting for base model")
            return self._format_generic_template(chat_messages, {})

    def _format_chat_manual(
        self, messages: list, template_type: str, special_tokens: dict
    ) -> str:
        """
        Manual chat formatting fallback for when tokenizer template fails

        Args:
            messages: List of message dictionaries
            template_type: Detected template type
            special_tokens: Dictionary of special tokens

        Returns:
            str: Manually formatted prompt
        """
        if template_type == "llama3":
            return self._format_llama3_template(messages, special_tokens)
        elif template_type == "mistral":
            return self._format_mistral_template(messages, special_tokens)
        elif template_type == "chatml":
            return self._format_chatml_template(messages, special_tokens)
        elif template_type == "alpaca":
            return self._format_alpaca_template(messages, special_tokens)
        else:
            return self._format_generic_template(messages, special_tokens)

    def _format_llama3_template(self, messages: list, special_tokens: dict) -> str:
        """Format messages using Llama 3 template"""
        bos_token = special_tokens.get("bos_token", "<|begin_of_text|>")
        formatted = bos_token

        for msg in messages:
            role = msg["role"]
            content = msg["content"]
            formatted += (
                f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
            )

        formatted += "<|start_header_id|>assistant<|end_header_id|>\n\n"
        return formatted

    def _format_mistral_template(self, messages: list, special_tokens: dict) -> str:
        """Format messages using Mistral template"""
        bos_token = special_tokens.get("bos_token", "<s>")
        formatted = bos_token

        system_msg = None
        conversation = []

        for msg in messages:
            if msg["role"] == "system":
                system_msg = msg["content"]
            else:
                conversation.append(msg)

        i = 0
        while i < len(conversation):
            if conversation[i]["role"] == "user":
                user_content = conversation[i]["content"]

                if system_msg and i == 0:
                    user_content = f"{system_msg}\n\n{user_content}"

                formatted += f"[INST] {user_content} [/INST]"

                if (
                    i + 1 < len(conversation)
                    and conversation[i + 1]["role"] == "assistant"
                ):
                    formatted += f" {conversation[i + 1]['content']}</s>"
                    i += 2
                else:
                    formatted += " "
                    break
            else:
                i += 1

        return formatted

    def _format_chatml_template(self, messages: list, special_tokens: dict) -> str:
        """Format messages using ChatML template"""
        formatted = ""

        for msg in messages:
            role = msg["role"]
            content = msg["content"]
            formatted += f"<|im_start|>{role}\n{content}<|im_end|>\n"

        formatted += "<|im_start|>assistant\n"
        return formatted

    def _format_alpaca_template(self, messages: list, special_tokens: dict) -> str:
        """Format messages using Alpaca template"""
        formatted = ""
        system_msg = None

        for msg in messages:
            if msg["role"] == "system":
                system_msg = msg["content"]
            elif msg["role"] == "user":
                if system_msg:
                    formatted += f"### Instruction:\n{system_msg}\n\n### Input:\n{msg['content']}\n\n### Response:\n"
                    system_msg = None
                else:
                    formatted += f"### Human:\n{msg['content']}\n\n### Assistant:\n"
            elif msg["role"] == "assistant":
                formatted += f"{msg['content']}\n\n"

        return formatted

    def _format_generic_template(self, messages: list, special_tokens: dict) -> str:
        """Generic fallback formatting"""
        formatted = ""

        for msg in messages:
            role = msg["role"].title()
            content = msg["content"]
            formatted += f"{role}: {content}\n"

        formatted += "Assistant: "
        return formatted

    def check_vision_model_compatibility(self) -> bool:
        """
        Check if current model supports vision.

        Returns:
            bool: True if current model supports vision, False otherwise
        """
        current_model = self.get_current_model()
        if current_model and current_model in self.models:
            return self.models[current_model].get("is_vision", False)
        return False

    def _reset_model_generation_state(self, model_name: str):
        """Reset generation state for a specific model to prevent contamination."""
        if model_name not in self.models:
            return

        model = self.models[model_name].get("model")
        if not model:
            return

        try:
            # This is a common pattern for Unsloth/Hugging Face models
            if hasattr(model, "past_key_values"):
                model.past_key_values = None
            if hasattr(model, "generation_config"):
                if hasattr(model.generation_config, "past_key_values"):
                    model.generation_config.past_key_values = None

            logger.debug(f"Reset generation state for model: {model_name}")
        except Exception as e:
            logger.warning(f"Could not fully reset model state for {model_name}: {e}")

    def reset_generation_state(self):
        """Reset any cached generation state to prevent hanging after errors"""
        try:
            # Clear cached states for ALL loaded models
            for model_name in self.models.keys():
                self._reset_model_generation_state(model_name)

            clear_gpu_cache()
            logger.debug("Cleared GPU cache")

            import gc

            gc.collect()
            logger.info("Performed comprehensive generation state reset")

        except Exception as e:
            logger.warning(f"Could not fully reset generation state: {e}")

    def resize_image(self, img, max_size: int = 800):
        """Resize image while maintaining aspect ratio if either dimension exceeds max_size"""
        if img is None:
            return None
        if img.size[0] > max_size or img.size[1] > max_size:
            from PIL import Image

            ratio = min(max_size / img.size[0], max_size / img.size[1])
            new_size = (int(img.size[0] * ratio), int(img.size[1] * ratio))
            return img.resize(new_size, Image.Resampling.LANCZOS)
        return img

    def _clean_generated_text(self, text: str) -> str:
        """Strip leaked special tokens using the tokenizer's own token list."""
        if self._is_gpt_oss_model():
            # HarmonyTextStreamer produces clean <think>...</think> output.
            # Strip harmony protocol tokens and other gpt-oss added tokens
            # (e.g. <|return|>) that may leak past the streamer.
            import re

            text = re.sub(r"<\|[a-z_]+\|>", "", text)
            return text.strip()

        tokenizer = self.models.get(self.active_model_name, {}).get("tokenizer")
        if tokenizer:
            for token in getattr(tokenizer, "all_special_tokens", []):
                if token in text:
                    text = text.replace(token, "")
        return text.strip()

    def _load_chat_template_info(self, model_name: str):
        if model_name not in self.models or not self.models[model_name].get(
            "tokenizer"
        ):
            return

        tokenizer = self.models[model_name]["tokenizer"]
        chat_template_info = {
            "has_template": False,
            "template": None,
            "format_type": "generic",
            "special_tokens": {},
            "template_name": None,
        }

        try:
            from utils.datasets import MODEL_TO_TEMPLATE_MAPPER

            # Try exact match first
            model_name_lower = model_name.lower()
            if model_name_lower in MODEL_TO_TEMPLATE_MAPPER:
                chat_template_info["template_name"] = MODEL_TO_TEMPLATE_MAPPER[
                    model_name_lower
                ]
                logger.info(
                    f"Detected template '{chat_template_info['template_name']}' for {model_name} from mapper"
                )
            else:
                # Try partial match (for variants like model_name-bnb-4bit)
                for key in MODEL_TO_TEMPLATE_MAPPER:
                    if key in model_name_lower or model_name_lower in key:
                        chat_template_info["template_name"] = MODEL_TO_TEMPLATE_MAPPER[
                            key
                        ]
                        logger.info(
                            f"Detected template '{chat_template_info['template_name']}' for {model_name} (partial match)"
                        )
                        break
        except Exception as e:
            logger.warning(
                f"Could not detect template from mapper for {model_name}: {e}"
            )

        try:
            if hasattr(tokenizer, "chat_template") and tokenizer.chat_template:
                chat_template_info["has_template"] = True
                chat_template_info["template"] = tokenizer.chat_template

                template_str = tokenizer.chat_template.lower()

                if (
                    "start_header_id" in template_str
                    and "end_header_id" in template_str
                ):
                    chat_template_info["format_type"] = "llama3"
                elif "[inst]" in template_str and "[/inst]" in template_str:
                    chat_template_info["format_type"] = "mistral"
                elif "<|im_start|>" in template_str and "<|im_end|>" in template_str:
                    chat_template_info["format_type"] = "chatml"
                elif "### instruction:" in template_str or "### human:" in template_str:
                    chat_template_info["format_type"] = "alpaca"
                else:
                    chat_template_info["format_type"] = "custom"

                logger.info(
                    f"Loaded chat template for {model_name} (detected as {chat_template_info['format_type']} format)"
                )
                logger.debug(f"Template preview: {tokenizer.chat_template[:200]}...")

                special_tokens = {}
                if hasattr(tokenizer, "bos_token") and tokenizer.bos_token:
                    special_tokens["bos_token"] = tokenizer.bos_token
                if hasattr(tokenizer, "eos_token") and tokenizer.eos_token:
                    special_tokens["eos_token"] = tokenizer.eos_token
                if hasattr(tokenizer, "pad_token") and tokenizer.pad_token:
                    special_tokens["pad_token"] = tokenizer.pad_token

                chat_template_info["special_tokens"] = special_tokens

            else:
                logger.info(
                    f"No chat template found for {model_name}, will use generic formatting"
                )

        except Exception as e:
            logger.error(f"Error loading chat template info for {model_name}: {e}")

        self.models[model_name]["chat_template_info"] = chat_template_info

        if chat_template_info["has_template"]:
            logger.info(
                f"Chat template loaded for {model_name}: {chat_template_info['format_type']} format"
            )
        else:
            logger.info(
                f"No built-in chat template for {model_name}, will use generic formatting"
            )

    def get_current_model(self) -> Optional[str]:
        """Get currently active model name"""
        return self.active_model_name

    def is_model_loading(self) -> bool:
        """Check if any model is currently loading"""
        return len(self.loading_models) > 0

    def get_loading_model(self) -> Optional[str]:
        """Get name of currently loading model"""
        return next(iter(self.loading_models)) if self.loading_models else None

    def load_model_simple(
        self,
        model_path: str,
        hf_token: Optional[str] = None,
        max_seq_length: int = 2048,
        load_in_4bit: bool = True,
    ) -> bool:
        """
        Simple model loading wrapper for chat interface.
        Accepts model path as string and handles ModelConfig creation internally.

        Args:
            model_path: Model name or path (e.g., "unsloth/llama-3-8b")
            hf_token: HuggingFace token for gated models
            max_seq_length: Maximum sequence length
            load_in_4bit: Whether to use 4-bit quantization

        Returns:
            bool: True if successful, False otherwise
        """
        try:
            # Create config from string path
            config = ModelConfig.from_ui_selection(
                model_path,
                lora_path = None,  # No LoRA for chat
                is_lora = False,
            )

            # Call existing load_model with config
            return self.load_model(
                config = config,
                max_seq_length = max_seq_length,
                dtype = None,  # Auto-detect
                load_in_4bit = load_in_4bit,
                hf_token = hf_token,
            )

        except Exception as e:
            logger.error(f"Error in load_model_simple: {e}")
            return False


# Global inference backend instance
inference_backend = InferenceBackend()


def get_inference_backend() -> InferenceBackend:
    return inference_backend


================================================
FILE: studio/backend/core/inference/llama_cpp.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
llama-server inference backend for GGUF models.

Manages a llama-server subprocess and proxies chat completions
through its OpenAI-compatible /v1/chat/completions endpoint.
"""

import atexit
import contextlib
import json
import struct
import structlog
from loggers import get_logger
import shutil
import signal
import socket
import subprocess
import threading
import time
from pathlib import Path
from typing import Generator, Optional

import httpx

logger = get_logger(__name__)


class LlamaCppBackend:
    """
    Manages a llama-server subprocess for GGUF model inference.

    Lifecycle:
        1. load_model()  — starts llama-server with the GGUF file
        2. generate_chat_completion() — proxies to /v1/chat/completions, streams back
        3. unload_model() — terminates llama-server subprocess
    """

    def __init__(self):
        self._process: Optional[subprocess.Popen] = None
        self._port: Optional[int] = None
        self._model_identifier: Optional[str] = None
        self._gguf_path: Optional[str] = None
        self._hf_repo: Optional[str] = None
        self._hf_variant: Optional[str] = None
        self._is_vision: bool = False
        self._healthy = False
        self._context_length: Optional[int] = None
        self._chat_template: Optional[str] = None
        self._supports_reasoning: bool = False
        self._supports_tools: bool = False
        self._cache_type_kv: Optional[str] = None
        self._reasoning_default: bool = True
        self._lock = threading.Lock()
        self._stdout_lines: list[str] = []
        self._stdout_thread: Optional[threading.Thread] = None
        self._cancel_event = threading.Event()

        self._kill_orphaned_servers()
        atexit.register(self._cleanup)

    # ── Properties ────────────────────────────────────────────────

    @property
    def is_loaded(self) -> bool:
        return self._process is not None and self._healthy

    @property
    def is_active(self) -> bool:
        """True if a llama-server process exists (loading or loaded)."""
        return self._process is not None

    @property
    def base_url(self) -> str:
        return f"http://127.0.0.1:{self._port}"

    @property
    def model_identifier(self) -> Optional[str]:
        return self._model_identifier

    @property
    def is_vision(self) -> bool:
        return self._is_vision

    @property
    def hf_variant(self) -> Optional[str]:
        return self._hf_variant

    @property
    def context_length(self) -> Optional[int]:
        return self._context_length

    @property
    def chat_template(self) -> Optional[str]:
        return self._chat_template

    @property
    def supports_reasoning(self) -> bool:
        return self._supports_reasoning

    @property
    def reasoning_default(self) -> bool:
        return self._reasoning_default

    @property
    def supports_tools(self) -> bool:
        return self._supports_tools

    @property
    def cache_type_kv(self) -> Optional[str]:
        return self._cache_type_kv

    # ── Binary discovery ──────────────────────────────────────────

    @staticmethod
    def _find_llama_server_binary() -> Optional[str]:
        """
        Locate the llama-server binary.

        Search order:
        1.  LLAMA_SERVER_PATH environment variable (direct path to binary)
        1b. UNSLOTH_LLAMA_CPP_PATH env var (custom llama.cpp install dir)
        2.  ~/.unsloth/llama.cpp/llama-server        (make build, root dir)
        3.  ~/.unsloth/llama.cpp/build/bin/llama-server  (cmake build, Linux)
        4.  ~/.unsloth/llama.cpp/build/bin/Release/llama-server.exe  (cmake build, Windows)
        5.  ./llama.cpp/llama-server                 (legacy: make build, root dir)
        6.  ./llama.cpp/build/bin/llama-server        (legacy: cmake in-tree build)
        7.  llama-server on PATH                     (system install)
        8.  ./bin/llama-server                       (legacy: extracted binary)
        """
        import os
        import sys

        binary_name = "llama-server.exe" if sys.platform == "win32" else "llama-server"

        # 1. Env var — direct path to binary
        env_path = os.environ.get("LLAMA_SERVER_PATH")
        if env_path and Path(env_path).is_file():
            return env_path

        # 1b. UNSLOTH_LLAMA_CPP_PATH — custom llama.cpp install directory
        custom_llama_cpp = os.environ.get("UNSLOTH_LLAMA_CPP_PATH")
        if custom_llama_cpp:
            custom_dir = Path(custom_llama_cpp)
            # Root dir (make builds)
            root_bin = custom_dir / binary_name
            if root_bin.is_file():
                return str(root_bin)
            # build/bin/ (cmake builds on Linux)
            cmake_bin = custom_dir / "build" / "bin" / binary_name
            if cmake_bin.is_file():
                return str(cmake_bin)
            # build/bin/Release/ (cmake builds on Windows)
            if sys.platform == "win32":
                win_bin = custom_dir / "build" / "bin" / "Release" / binary_name
                if win_bin.is_file():
                    return str(win_bin)

        # 2–4. ~/.unsloth/llama.cpp (primary — setup.sh / setup.ps1 build here)
        unsloth_home = Path.home() / ".unsloth" / "llama.cpp"
        # Root dir (make builds copy binaries here)
        home_root = unsloth_home / binary_name
        if home_root.is_file():
            return str(home_root)
        # build/bin/ (cmake builds on Linux)
        home_linux = unsloth_home / "build" / "bin" / binary_name
        if home_linux.is_file():
            return str(home_linux)

        # 3. Windows MSVC build has Release subdir
        if sys.platform == "win32":
            home_win = unsloth_home / "build" / "bin" / "Release" / binary_name
            if home_win.is_file():
                return str(home_win)

        # 5–6. Legacy: in-tree build (older setup.sh / setup.ps1 versions)
        project_root = Path(__file__).resolve().parents[4]
        # Root dir (make builds)
        root_path = project_root / "llama.cpp" / binary_name
        if root_path.is_file():
            return str(root_path)
        # build/bin/ (cmake builds)
        build_path = project_root / "llama.cpp" / "build" / "bin" / binary_name
        if build_path.is_file():
            return str(build_path)
        if sys.platform == "win32":
            win_path = (
                project_root / "llama.cpp" / "build" / "bin" / "Release" / binary_name
            )
            if win_path.is_file():
                return str(win_path)

        # 7. System PATH
        system_path = shutil.which("llama-server")
        if system_path:
            return system_path

        # 8. Legacy: extracted to bin/
        bin_path = project_root / "bin" / binary_name
        if bin_path.is_file():
            return str(bin_path)

        return None

    # ── GPU allocation ────────────────────────────────────────────

    @staticmethod
    def _get_gguf_size_bytes(model_path: str) -> int:
        """Get total GGUF size in bytes, including split shards."""
        import re

        main = Path(model_path)
        total = main.stat().st_size

        # Check for split shards (e.g., model-00001-of-00003.gguf)
        shard_pat = re.compile(r"^(.*)-(\d{5})-of-(\d{5})\.gguf$")
        m = shard_pat.match(main.name)
        if m:
            prefix, _, num_total = m.group(1), m.group(2), m.group(3)
            sibling_pat = re.compile(
                r"^"
                + re.escape(prefix)
                + r"-\d{5}-of-"
                + re.escape(num_total)
                + r"\.gguf$"
            )
            for sibling in main.parent.iterdir():
                if sibling != main and sibling_pat.match(sibling.name):
                    total += sibling.stat().st_size

        return total

    @staticmethod
    def _get_gpu_free_memory() -> list[tuple[int, int]]:
        """Query free memory per GPU via nvidia-smi.

        Returns list of (gpu_index, free_mib) sorted by index.
        Respects CUDA_VISIBLE_DEVICES if set.
        Returns empty list if nvidia-smi is not available.
        """
        import os

        try:
            result = subprocess.run(
                [
                    "nvidia-smi",
                    "--query-gpu=index,memory.free",
                    "--format=csv,noheader,nounits",
                ],
                capture_output = True,
                text = True,
                timeout = 10,
            )
            if result.returncode != 0:
                return []

            # Parse which GPUs are allowed by existing CUDA_VISIBLE_DEVICES
            allowed = None
            cvd = os.environ.get("CUDA_VISIBLE_DEVICES")
            if cvd is not None and cvd.strip():
                try:
                    allowed = set(int(x.strip()) for x in cvd.split(","))
                except ValueError:
                    pass  # Non-numeric (e.g., "GPU-uuid"), ignore filter

            gpus = []
            for line in result.stdout.strip().splitlines():
                parts = line.split(",")
                if len(parts) == 2:
                    idx = int(parts[0].strip())
                    free_mib = int(parts[1].strip())
                    if allowed is not None and idx not in allowed:
                        continue
                    gpus.append((idx, free_mib))
            return gpus
        except Exception:
            return []

    @staticmethod
    def _select_gpus(
        model_size_bytes: int,
        gpus: list[tuple[int, int]],
    ) -> tuple[Optional[list[int]], bool]:
        """Pick GPU(s) for a model based on file size and free memory.

        Uses GGUF file size as a rough proxy for VRAM usage (actual usage
        is higher due to KV cache and compute buffers, but 70% threshold
        accounts for that).

        Returns (gpu_indices, use_fit):
          - ([1], False)       model fits on 1 GPU at 70% of free
          - ([1, 2], False)    model needs 2 GPUs
          - (None, True)       model too large, let --fit handle it
        """
        if not gpus:
            return None, True

        model_size_mib = model_size_bytes / (1024 * 1024)

        # Sort GPUs by free memory descending
        ranked = sorted(gpus, key = lambda g: g[1], reverse = True)

        # Try fitting on 1 GPU (70% of free memory threshold)
        if ranked[0][1] * 0.70 >= model_size_mib:
            return [ranked[0][0]], False

        # Try fitting on N GPUs (accumulate free memory from most-free)
        cumulative = 0
        selected = []
        for idx, free_mib in ranked:
            selected.append(idx)
            cumulative += free_mib * 0.70
            if cumulative >= model_size_mib:
                return sorted(selected), False

        # Model is too large even for all GPUs, let --fit handle it
        return None, True

    # ── Variant fallback ────────────────────────────────────────────

    @staticmethod
    def _find_smallest_fitting_variant(
        hf_repo: str,
        free_bytes: int,
        hf_token: Optional[str] = None,
    ) -> Optional[tuple[str, int]]:
        """Find the smallest GGUF variant (including all shards) that fits.

        Groups split shards by variant prefix and sums their sizes.
        For example, UD-Q4_K_XL with 9 shards of 50 GB each = 450 GB total.

        Returns (first_shard_filename, total_size_bytes) or None if nothing fits.
        """
        import re

        try:
            from huggingface_hub import get_paths_info, list_repo_files

            files = list_repo_files(hf_repo, token = hf_token)
            gguf_files = [
                f for f in files if f.endswith(".gguf") and "mmproj" not in f.lower()
            ]
            if not gguf_files:
                return None

            # Get sizes for all GGUF files
            path_infos = list(get_paths_info(hf_repo, gguf_files, token = hf_token))
            size_map = {p.path: (p.size or 0) for p in path_infos}

            # Group files by variant: shards share a prefix before -NNNNN-of-NNNNN
            shard_pat = re.compile(r"^(.*)-\d{5}-of-\d{5}\.gguf$")
            variants: dict[str, list[str]] = {}
            for f in gguf_files:
                m = shard_pat.match(f)
                key = m.group(1) if m else f
                variants.setdefault(key, []).append(f)

            # Sum shard sizes per variant, track the first shard (for download)
            variant_sizes: list[tuple[str, int, list[str]]] = []
            for key, shard_files in variants.items():
                total = sum(size_map.get(f, 0) for f in shard_files)
                first = sorted(shard_files)[0]
                variant_sizes.append((first, total, shard_files))

            # Sort by total size ascending and pick the smallest that fits
            variant_sizes.sort(key = lambda x: x[1])
            for first_file, total_size, _ in variant_sizes:
                if total_size > 0 and total_size <= free_bytes:
                    return first_file, total_size

            return None
        except Exception:
            return None

    # ── Port allocation ───────────────────────────────────────────

    @staticmethod
    def _find_free_port() -> int:
        """Find an available TCP port."""
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
            s.bind(("127.0.0.1", 0))
            return s.getsockname()[1]

    # ── Stdout drain (prevents pipe deadlock on Windows) ─────────

    def _drain_stdout(self):
        """
        Read lines from the subprocess stdout in a background thread.

        This prevents a pipe-buffer deadlock on Windows where the default
        pipe buffer is only ~4 KB.  Without draining, llama-server blocks
        on writes and never becomes healthy.
        """
        try:
            for line in self._process.stdout:
                line = line.rstrip()
                if line:
                    self._stdout_lines.append(line)
                    logger.debug(f"[llama-server] {line}")
        except (ValueError, OSError):
            # Pipe closed — process is terminating
            pass

    # GGUF KV type sizes for fast skipping
    _GGUF_TYPE_SIZE = {
        0: 1,
        1: 1,
        2: 2,
        3: 2,
        4: 4,
        5: 4,
        6: 4,
        7: 1,
        10: 8,
        11: 8,
        12: 8,
    }

    @staticmethod
    def _gguf_skip_value(f, vtype: int) -> None:
        """Skip a GGUF KV value without reading it."""
        sz = LlamaCppBackend._GGUF_TYPE_SIZE.get(vtype)
        if sz is not None:
            f.seek(sz, 1)
        elif vtype == 8:  # STRING
            slen = struct.unpack("<Q", f.read(8))[0]
            f.seek(slen, 1)
        elif vtype == 9:  # ARRAY
            atype = struct.unpack("<I", f.read(4))[0]
            alen = struct.unpack("<Q", f.read(8))[0]
            elem_sz = LlamaCppBackend._GGUF_TYPE_SIZE.get(atype)
            if elem_sz is not None:
                f.seek(elem_sz * alen, 1)
            elif atype == 8:
                for _ in range(alen):
                    slen = struct.unpack("<Q", f.read(8))[0]
                    f.seek(slen, 1)
            else:
                for _ in range(alen):
                    LlamaCppBackend._gguf_skip_value(f, atype)

    def _read_gguf_metadata(self, gguf_path: str) -> None:
        """Read context_length and chat_template from a GGUF file's KV header.

        Parses only the KV pairs we need (~30ms even for multi-GB files).
        For split GGUFs, metadata is always in shard 1.
        """
        # Reset metadata from any previously loaded model so stale flags
        # (eg _supports_reasoning) do not carry over when switching models.
        self._context_length = None
        self._chat_template = None
        self._supports_reasoning = False
        self._supports_tools = False

        try:
            WANTED = {"general.architecture", "tokenizer.chat_template"}
            arch = None
            ctx_key = None

            with open(gguf_path, "rb") as f:
                magic = struct.unpack("<I", f.read(4))[0]
                if magic != 0x46554747:  # b"GGUF" as little-endian u32
                    return
                _version = struct.unpack("<I", f.read(4))[0]
                _tensor_count, kv_count = struct.unpack("<QQ", f.read(16))

                for _ in range(kv_count):
                    key_len = struct.unpack("<Q", f.read(8))[0]
                    key = f.read(key_len).decode("utf-8")
                    vtype = struct.unpack("<I", f.read(4))[0]

                    if key in WANTED or (ctx_key and key == ctx_key):
                        # Read this value
                        if vtype == 8:  # STRING
                            slen = struct.unpack("<Q", f.read(8))[0]
                            val_s = f.read(slen).decode("utf-8")
                            if key == "general.architecture":
                                arch = val_s
                                ctx_key = f"{arch}.context_length"
                            elif key == "tokenizer.chat_template":
                                self._chat_template = val_s
                        elif vtype == 4:  # UINT32
                            val_i = struct.unpack("<I", f.read(4))[0]
                            if ctx_key and key == ctx_key:
                                self._context_length = val_i
                        elif vtype == 10:  # UINT64
                            val_i = struct.unpack("<Q", f.read(8))[0]
                            if ctx_key and key == ctx_key:
                                self._context_length = val_i
                        else:
                            self._gguf_skip_value(f, vtype)
                    else:
                        self._gguf_skip_value(f, vtype)

            if self._context_length:
                logger.info(f"GGUF metadata: context_length={self._context_length}")
            if self._chat_template:
                logger.info(
                    f"GGUF metadata: chat_template={len(self._chat_template)} chars"
                )
                # Detect thinking/reasoning support from chat template
                tpl = self._chat_template
                if "enable_thinking" in tpl:
                    self._supports_reasoning = True
                    logger.info(
                        "GGUF metadata: model supports reasoning (enable_thinking)"
                    )
                elif "thinking" in tpl:
                    # DeepSeek uses 'thinking' instead of 'enable_thinking'
                    normalized_id = (self._model_identifier or "").lower()
                    if "deepseek" in normalized_id:
                        self._supports_reasoning = True
                        logger.info(
                            "GGUF metadata: model supports reasoning (DeepSeek thinking)"
                        )
                # Detect tool calling support from chat template
                tool_markers = [
                    "{%- if tools %}",
                    "{% if tools %}",
                    '"role" == "tool"',
                    "'role' == 'tool'",
                    'message.role == "tool"',
                    "message.role == 'tool'",
                ]
                if any(marker in tpl for marker in tool_markers):
                    self._supports_tools = True
                    logger.info("GGUF metadata: model supports tool calling")
        except Exception as e:
            logger.warning(f"Failed to read GGUF metadata: {e}")

    # ── HF download (no lock held) ───────────────────────────────

    def _download_gguf(
        self,
        *,
        hf_repo: str,
        hf_variant: Optional[str] = None,
        hf_token: Optional[str] = None,
    ) -> str:
        """Download GGUF file(s) from HuggingFace. Returns local path.

        Runs WITHOUT self._lock so that unload_model() can set
        _cancel_event at any time. Checks _cancel_event between
        each shard download.
        """
        try:
            from huggingface_hub import hf_hub_download
        except ImportError:
            raise RuntimeError(
                "huggingface_hub is required for HF model loading. "
                "Install it with: pip install huggingface_hub"
            )

        # Determine the filename from the variant
        gguf_filename = None
        gguf_extra_shards: list[str] = []
        if hf_variant:
            try:
                import re
                from huggingface_hub import list_repo_files

                files = list_repo_files(hf_repo, token = hf_token)
                variant_lower = hf_variant.lower()
                boundary = re.compile(
                    r"(?<![a-zA-Z0-9])" + re.escape(variant_lower) + r"(?![a-zA-Z0-9])"
                )
                gguf_files = sorted(
                    f
                    for f in files
                    if f.endswith(".gguf") and boundary.search(f.lower())
                )
                if gguf_files:
                    gguf_filename = gguf_files[0]
                    shard_pat = re.compile(r"^(.*)-\d{5}-of-(\d{5})\.gguf$")
                    m = shard_pat.match(gguf_filename)
                    if m:
                        prefix = m.group(1)
                        total = m.group(2)
                        sibling_pat = re.compile(
                            r"^"
                            + re.escape(prefix)
                            + r"-\d{5}-of-"
                            + re.escape(total)
                            + r"\.gguf$"
                        )
                        gguf_extra_shards = [
                            f for f in gguf_files[1:] if sibling_pat.match(f)
                        ]
            except Exception as e:
                logger.warning(f"Could not list repo files: {e}")

            if not gguf_filename:
                repo_name = hf_repo.split("/")[-1].replace("-GGUF", "")
                gguf_filename = f"{repo_name}-{hf_variant}.gguf"

        # Check disk space and fall back to a smaller variant if needed
        all_gguf_files = [gguf_filename] + gguf_extra_shards
        try:
            import os

            from huggingface_hub import get_paths_info

            path_infos = list(get_paths_info(hf_repo, all_gguf_files, token = hf_token))
            total_download_bytes = sum((p.size or 0) for p in path_infos)

            if total_download_bytes > 0:
                cache_dir = os.environ.get(
                    "HF_HUB_CACHE",
                    str(Path.home() / ".cache" / "huggingface" / "hub"),
                )
                Path(cache_dir).mkdir(parents = True, exist_ok = True)
                free_bytes = shutil.disk_usage(cache_dir).free

                total_gb = total_download_bytes / (1024**3)
                free_gb = free_bytes / (1024**3)

                logger.info(
                    f"GGUF download: {total_gb:.1f} GB needed, "
                    f"{free_gb:.1f} GB free on disk"
                )

                if total_download_bytes > free_bytes:
                    smaller = self._find_smallest_fitting_variant(
                        hf_repo,
                        free_bytes,
                        hf_token,
                    )
                    if smaller:
                        fallback_file, fallback_size = smaller
                        logger.info(
                            f"Selected variant too large ({total_gb:.1f} GB), "
                            f"falling back to {fallback_file} ({fallback_size / (1024**3):.1f} GB)"
                        )
                        gguf_filename = fallback_file
                        import re as _re

                        _shard_pat = _re.compile(r"^(.*)-\d{5}-of-\d{5}\.gguf$")
                        _m = _shard_pat.match(gguf_filename)
                        _prefix = _m.group(1) if _m else None
                        if _prefix:
                            gguf_extra_shards = sorted(
                                f
                                for f in all_gguf_files
                                if f.startswith(_prefix)
                                and f != gguf_filename
                                and "mmproj" not in f.lower()
                            )
                        else:
                            gguf_extra_shards = []
                    else:
                        raise RuntimeError(
                            f"Not enough disk space to download any variant. "
                            f"Only {free_gb:.1f} GB free in {cache_dir}"
                        )
        except RuntimeError:
            raise
        except Exception as e:
            logger.warning(f"Could not check disk space: {e}")

        gguf_label = f"{hf_repo}/{gguf_filename}" + (
            f" (+{len(gguf_extra_shards)} shards)" if gguf_extra_shards else ""
        )
        logger.info(f"Resolving GGUF: {gguf_label}")
        try:
            if self._cancel_event.is_set():
                raise RuntimeError("Cancelled")
            dl_start = time.monotonic()
            local_path = hf_hub_download(
                repo_id = hf_repo,
                filename = gguf_filename,
                token = hf_token,
            )
            for shard in gguf_extra_shards:
                if self._cancel_event.is_set():
                    raise RuntimeError("Cancelled")
                logger.info(f"Resolving GGUF shard: {shard}")
                hf_hub_download(
                    repo_id = hf_repo,
                    filename = shard,
                    token = hf_token,
                )
        except RuntimeError as e:
            if "Cancelled" in str(e):
                raise
            raise RuntimeError(
                f"Failed to download GGUF file '{gguf_filename}' from {hf_repo}: {e}"
            )
        except Exception as e:
            raise RuntimeError(
                f"Failed to download GGUF file '{gguf_filename}' from {hf_repo}: {e}"
            )

        dl_elapsed = time.monotonic() - dl_start
        if dl_elapsed < 2.0:
            logger.info(f"GGUF resolved from cache: {local_path}")
        else:
            logger.info(f"GGUF downloaded in {dl_elapsed:.1f}s: {local_path}")
        return local_path

    def _download_mmproj(
        self,
        *,
        hf_repo: str,
        hf_token: Optional[str] = None,
    ) -> Optional[str]:
        """Download the mmproj (vision projection) file from a GGUF repo.

        Prefers mmproj-F16.gguf, falls back to any mmproj*.gguf file.
        Returns the local path, or None if no mmproj file exists.
        """
        try:
            from huggingface_hub import hf_hub_download, list_repo_files

            files = list_repo_files(hf_repo, token = hf_token)
            mmproj_files = sorted(
                f for f in files if f.endswith(".gguf") and "mmproj" in f.lower()
            )
            if not mmproj_files:
                return None

            # Prefer F16 variant
            target = None
            for f in mmproj_files:
                if "f16" in f.lower():
                    target = f
                    break
            if target is None:
                target = mmproj_files[0]

            logger.info(f"Downloading mmproj: {hf_repo}/{target}")
            local_path = hf_hub_download(
                repo_id = hf_repo,
                filename = target,
                token = hf_token,
            )
            return local_path
        except Exception as e:
            logger.warning(f"Could not download mmproj: {e}")
            return None

    # ── Lifecycle ─────────────────────────────────────────────────

    def load_model(
        self,
        *,
        # Local mode: pass a path to a .gguf file
        gguf_path: Optional[str] = None,
        # Vision projection (mmproj) for local vision models
        mmproj_path: Optional[str] = None,
        # HF mode: let llama-server download via -hf "repo:quant"
        hf_repo: Optional[str] = None,
        hf_variant: Optional[str] = None,
        hf_token: Optional[str] = None,
        # Common
        model_identifier: str,
        is_vision: bool = False,
        n_ctx: int = 4096,
        chat_template_override: Optional[str] = None,
        cache_type_kv: Optional[str] = None,
        n_threads: Optional[int] = None,
        n_gpu_layers: Optional[int] = None,  # Accepted for caller compat, unused
    ) -> bool:
        """
        Start llama-server with a GGUF model.

        Two modes:
        - Local: ``gguf_path="/path/to/model.gguf"`` → uses ``-m``
        - HF:    ``hf_repo="unsloth/gemma-3-4b-it-GGUF", hf_variant="Q4_K_M"`` → uses ``-hf``

        In HF mode, llama-server handles downloading, caching, and
        auto-loading mmproj files for vision models.

        Returns True if server started and health check passed.
        """
        self._cancel_event.clear()

        # ── Phase 1: kill old process (under lock, fast) ──────────
        with self._lock:
            self._kill_process()

        binary = self._find_llama_server_binary()
        if not binary:
            raise RuntimeError(
                "llama-server binary not found. "
                "Run setup.sh to build it, install llama.cpp, "
                "or set LLAMA_SERVER_PATH environment variable."
            )

        # ── Phase 2: download (NO lock held, so cancel can proceed) ──
        if hf_repo:
            model_path = self._download_gguf(
                hf_repo = hf_repo,
                hf_variant = hf_variant,
                hf_token = hf_token,
            )
            # Auto-download mmproj for vision models
            if is_vision and not mmproj_path:
                mmproj_path = self._download_mmproj(
                    hf_repo = hf_repo,
                    hf_token = hf_token,
                )
        elif gguf_path:
            if not Path(gguf_path).is_file():
                raise FileNotFoundError(f"GGUF file not found: {gguf_path}")
            model_path = gguf_path
        else:
            raise ValueError("Either gguf_path or hf_repo must be provided")

        # Set identifier early so _read_gguf_metadata can use it for DeepSeek detection
        self._model_identifier = model_identifier

        # Read GGUF metadata (context_length, chat_template) -- fast, header only
        self._read_gguf_metadata(model_path)

        # Check cancel after download
        if self._cancel_event.is_set():
            logger.info("Load cancelled after download phase")
            return False

        # ── Phase 3: start llama-server (under lock) ──────────────
        with self._lock:
            # Re-check cancel inside lock
            if self._cancel_event.is_set():
                logger.info("Load cancelled before server start")
                return False

            self._port = self._find_free_port()

            # Select GPU(s) based on model size and free memory
            try:
                model_size = self._get_gguf_size_bytes(model_path)
                gpus = self._get_gpu_free_memory()
                gpu_indices, use_fit = self._select_gpus(model_size, gpus)
                logger.info(
                    f"GGUF size: {model_size / (1024**3):.1f} GB, "
                    f"GPUs free: {gpus}, selected: {gpu_indices}, fit: {use_fit}"
                )
            except Exception as e:
                logger.warning(f"GPU selection failed ({e}), using --fit on")
                gpu_indices, use_fit = None, True

            cmd = [
                binary,
                "-m",
                model_path,
                "--port",
                str(self._port),
                "-c",
                "0",  # 0 = use model's native context size
                "--parallel",
                "1",  # Single-user studio, saves VRAM
                "--flash-attn",
                "on",  # Force flash attention for speed
            ]

            if use_fit:
                cmd.extend(["--fit", "on"])

            if n_threads is not None:
                cmd.extend(["--threads", str(n_threads)])

            # Always enable Jinja chat template rendering for proper template support
            cmd.extend(["--jinja"])

            # KV cache data type
            _valid_cache_types = {
                "f16",
                "bf16",
                "q8_0",
                "q4_0",
                "q4_1",
                "q5_0",
                "q5_1",
                "iq4_nl",
                "f32",
            }
            if cache_type_kv and cache_type_kv in _valid_cache_types:
                cmd.extend(
                    ["--cache-type-k", cache_type_kv, "--cache-type-v", cache_type_kv]
                )
                self._cache_type_kv = cache_type_kv
                logger.info(f"KV cache type: {cache_type_kv}")
            else:
                self._cache_type_kv = None

            # Apply custom chat template override if provided
            if chat_template_override:
                import tempfile

                self._chat_template_file = tempfile.NamedTemporaryFile(
                    mode = "w",
                    suffix = ".jinja",
                    delete = False,
                    prefix = "unsloth_chat_template_",
                )
                self._chat_template_file.write(chat_template_override)
                self._chat_template_file.close()
                cmd.extend(["--chat-template-file", self._chat_template_file.name])
                logger.info(
                    f"Using custom chat template file: {self._chat_template_file.name}"
                )

            # For reasoning models, set default thinking mode.
            # Qwen3.5 models below 9B (0.8B, 2B, 4B) disable thinking by default.
            # Only 9B and larger enable thinking.
            if self._supports_reasoning:
                import re

                thinking_default = True
                mid = (model_identifier or "").lower()
                if "qwen3.5" in mid:
                    # Extract size like "0.8b", "4b", "35b" etc.
                    size_match = re.search(r"(\d+\.?\d*)\s*b", mid)
                    if size_match:
                        size_val = float(size_match.group(1))
                        if size_val < 9:
                            thinking_default = False
                self._reasoning_default = thinking_default
                cmd.extend(
                    [
                        "--chat-template-kwargs",
                        json.dumps({"enable_thinking": thinking_default}),
                    ]
                )
                logger.info(
                    f"Reasoning model: enable_thinking={thinking_default} by default"
                )

            if mmproj_path:
                if not Path(mmproj_path).is_file():
                    logger.warning(f"mmproj file not found: {mmproj_path}")
                else:
                    cmd.extend(["--mmproj", mmproj_path])
                    logger.info(f"Using mmproj for vision: {mmproj_path}")

            logger.info(f"Starting llama-server: {' '.join(cmd)}")

            # Set library paths so llama-server can find its shared libs and CUDA DLLs
            import os
            import sys

            env = os.environ.copy()
            binary_dir = str(Path(binary).parent)

            if sys.platform == "win32":
                # On Windows, CUDA DLLs (cublas64_12.dll, cudart64_12.dll, etc.)
                # must be on PATH. Add CUDA_PATH\bin if available.
                path_dirs = [binary_dir]
                cuda_path = os.environ.get("CUDA_PATH", "")
                if cuda_path:
                    cuda_bin = os.path.join(cuda_path, "bin")
                    if os.path.isdir(cuda_bin):
                        path_dirs.append(cuda_bin)
                    # Some CUDA installs put DLLs in bin\x64
                    cuda_bin_x64 = os.path.join(cuda_path, "bin", "x64")
                    if os.path.isdir(cuda_bin_x64):
                        path_dirs.append(cuda_bin_x64)
                existing_path = env.get("PATH", "")
                env["PATH"] = ";".join(path_dirs) + ";" + existing_path
            else:
                # Linux: set LD_LIBRARY_PATH for shared libs next to the binary
                # and CUDA runtime libs (libcudart, libcublas, etc.)
                import platform

                lib_dirs = [binary_dir]
                _arch = platform.machine()  # x86_64, aarch64, etc.
                for cuda_lib in [
                    "/usr/local/cuda/lib64",
                    f"/usr/local/cuda/targets/{_arch}-linux/lib",
                    # Fallback CUDA compat paths (e.g. binary built with
                    # CUDA 12 on a system where default /usr/local/cuda
                    # points to CUDA 13+).
                    "/usr/local/cuda-12/lib64",
                    "/usr/local/cuda-12.8/lib64",
                    f"/usr/local/cuda-12/targets/{_arch}-linux/lib",
                    f"/usr/local/cuda-12.8/targets/{_arch}-linux/lib",
                ]:
                    if os.path.isdir(cuda_lib):
                        lib_dirs.append(cuda_lib)
                existing_ld = env.get("LD_LIBRARY_PATH", "")
                new_ld = ":".join(lib_dirs)
                env["LD_LIBRARY_PATH"] = (
                    f"{new_ld}:{existing_ld}" if existing_ld else new_ld
                )

            # Pin to selected GPU(s) via CUDA_VISIBLE_DEVICES
            if gpu_indices is not None:
                env["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in gpu_indices)

            self._stdout_lines = []
            self._process = subprocess.Popen(
                cmd,
                stdout = subprocess.PIPE,
                stderr = subprocess.STDOUT,
                text = True,
                env = env,
            )

            # Start background thread to drain stdout and prevent pipe deadlock
            self._stdout_thread = threading.Thread(
                target = self._drain_stdout, daemon = True, name = "llama-stdout"
            )
            self._stdout_thread.start()

            self._gguf_path = gguf_path
            self._hf_repo = hf_repo
            self._hf_variant = hf_variant
            self._is_vision = is_vision
            self._model_identifier = model_identifier

            # Wait for llama-server to become healthy
            if not self._wait_for_health(timeout = 120.0):
                self._kill_process()
                raise RuntimeError(
                    "llama-server failed to start. "
                    "Check that the GGUF file is valid and you have enough memory."
                )

            self._healthy = True

            logger.info(
                f"llama-server ready on port {self._port} "
                f"for model '{model_identifier}'"
            )
            return True

    def unload_model(self) -> bool:
        """Terminate the llama-server subprocess and cancel any in-flight download."""
        self._cancel_event.set()
        with self._lock:
            self._kill_process()
            logger.info(f"Unloaded GGUF model: {self._model_identifier}")
            self._model_identifier = None
            self._gguf_path = None
            self._hf_repo = None
            self._hf_variant = None
            self._is_vision = False
            self._is_audio = False
            self._audio_type = None
            self._port = None
            self._healthy = False
            self._context_length = None
            self._chat_template = None
            self._supports_reasoning = False
            self._supports_tools = False
            self._cache_type_kv = None
            # Clean up temp chat template file
            if hasattr(self, "_chat_template_file") and self._chat_template_file:
                try:
                    import os

                    os.unlink(self._chat_template_file.name)
                except Exception:
                    pass
                self._chat_template_file = None
            # Free audio codec GPU memory
            if LlamaCppBackend._codec_mgr is not None:
                LlamaCppBackend._codec_mgr.unload()
                LlamaCppBackend._codec_mgr = None
                import torch

                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
            return True

    def _kill_process(self):
        """Terminate the subprocess if running."""
        if self._process is None:
            return
        try:
            self._process.terminate()
            self._process.wait(timeout = 5)
        except subprocess.TimeoutExpired:
            logger.warning("llama-server did not exit on SIGTERM, sending SIGKILL")
            self._process.kill()
            self._process.wait(timeout = 5)
        except Exception as e:
            logger.warning(f"Error killing llama-server process: {e}")
        finally:
            self._process = None
            if self._stdout_thread is not None:
                self._stdout_thread.join(timeout = 2)
                self._stdout_thread = None

    @staticmethod
    def _kill_orphaned_servers():
        """Kill orphaned llama-server processes started by studio.

        Only kills processes whose binary lives under ~/.unsloth/llama.cpp/
        to avoid terminating unrelated llama-server instances on the machine.
        """
        import os
        import signal

        try:
            # Use pgrep with full command match to identify studio-managed servers
            result = subprocess.run(
                ["pgrep", "-a", "-f", "llama-server"],
                capture_output = True,
                text = True,
                timeout = 5,
            )
            if result.returncode != 0:
                return
            for line in result.stdout.strip().splitlines():
                parts = line.strip().split(None, 1)
                if len(parts) < 2:
                    continue
                pid = int(parts[0])
                cmdline = parts[1]
                if pid == os.getpid():
                    continue
                # Only kill if it's a studio-managed server (lives under .unsloth/)
                if ".unsloth/" not in cmdline and "unsloth" not in cmdline.lower():
                    continue
                try:
                    os.kill(pid, signal.SIGKILL)
                    logger.info(f"Killed orphaned llama-server process (pid={pid})")
                except ProcessLookupError:
                    pass
                except PermissionError:
                    pass
        except Exception:
            pass

    def _cleanup(self):
        """atexit handler to ensure llama-server is terminated."""
        self._kill_process()

    def _wait_for_health(self, timeout: float = 120.0, interval: float = 0.5) -> bool:
        """
        Poll llama-server's /health endpoint until it responds 200.

        Also monitors subprocess for early exit/crash.
        """
        deadline = time.monotonic() + timeout
        url = f"http://127.0.0.1:{self._port}/health"

        while time.monotonic() < deadline:
            # Check if process crashed
            if self._process.poll() is not None:
                # Give the drain thread a moment to collect final output
                if self._stdout_thread is not None:
                    self._stdout_thread.join(timeout = 2)
                output = "\n".join(self._stdout_lines[-50:])
                logger.error(
                    f"llama-server exited with code {self._process.returncode}. "
                    f"Output: {output[:2000]}"
                )
                return False

            try:
                resp = httpx.get(url, timeout = 2.0)
                if resp.status_code == 200:
                    return True
            except (httpx.ConnectError, httpx.TimeoutException):
                pass

            time.sleep(interval)

        logger.error(f"llama-server health check timed out after {timeout}s")
        return False

    # ── Message building (OpenAI format) ──────────────────────────

    @staticmethod
    def _parse_tool_calls_from_text(content: str) -> list[dict]:
        """
        Parse tool calls from XML markup in content text.

        Handles formats like:
          <tool_call>{"name":"web_search","arguments":{"query":"..."}}</tool_call>
          <tool_call><function=web_search><parameter=query>...</parameter></function></tool_call>
        Closing tags (</tool_call>, </function>, </parameter>) are all optional
        since models frequently omit them.
        """
        import re

        tool_calls = []

        # Pattern 1: JSON inside <tool_call> tags.
        # Use balanced-brace extraction that skips braces inside JSON strings.
        for m in re.finditer(r"<tool_call>\s*\{", content):
            brace_start = m.end() - 1  # position of the opening {
            depth, i = 0, brace_start
            in_string = False
            while i < len(content):
                ch = content[i]
                if in_string:
                    if ch == "\\" and i + 1 < len(content):
                        i += 2  # skip escaped character
                        continue
                    if ch == '"':
                        in_string = False
                elif ch == '"':
                    in_string = True
                elif ch == "{":
                    depth += 1
                elif ch == "}":
                    depth -= 1
                    if depth == 0:
                        break
                i += 1
            if depth == 0:
                json_str = content[brace_start : i + 1]
                try:
                    obj = json.loads(json_str)
                    tc = {
                        "id": f"call_{len(tool_calls)}",
                        "type": "function",
                        "function": {
                            "name": obj.get("name", ""),
                            "arguments": obj.get("arguments", {}),
                        },
                    }
                    if isinstance(tc["function"]["arguments"], dict):
                        tc["function"]["arguments"] = json.dumps(
                            tc["function"]["arguments"]
                        )
                    tool_calls.append(tc)
                except (json.JSONDecodeError, ValueError):
                    pass

        # Pattern 2: XML-style <function=name><parameter=key>value</parameter></function>
        # All closing tags optional -- models frequently omit </parameter>,
        # </function>, and/or </tool_call>.
        if not tool_calls:
            # Step 1: Find all <function=name> positions and extract their bodies.
            # Body boundary: use only </tool_call> or next <function= as hard
            # boundaries.  We avoid using </function> as a boundary because
            # code parameter values can contain that literal string.
            # After extracting, we trim a trailing </function> if present.
            func_starts = list(re.finditer(r"<function=(\w+)>\s*", content))
            for idx, fm in enumerate(func_starts):
                func_name = fm.group(1)
                body_start = fm.end()
                # Hard boundaries: next <function= tag or </tool_call>
                next_func = (
                    func_starts[idx + 1].start()
                    if idx + 1 < len(func_starts)
                    else len(content)
                )
                end_tag = re.search(r"</tool_call>", content[body_start:])
                if end_tag:
                    body_end = body_start + end_tag.start()
                else:
                    body_end = len(content)
                body_end = min(body_end, next_func)
                body = content[body_start:body_end]
                # Trim trailing </function> if present (it's the real closing tag)
                body = re.sub(r"\s*</function>\s*$", "", body)

                # Step 2: Extract parameters from body.
                # For single-parameter functions (the common case: code, command,
                # query), use body end as the only boundary to avoid false matches
                # on </parameter> inside code strings.
                arguments = {}
                param_starts = list(re.finditer(r"<parameter=(\w+)>\s*", body))
                if len(param_starts) == 1:
                    # Single parameter: value is everything from after the tag
                    # to end of body, trimming any trailing </parameter>.
                    pm = param_starts[0]
                    val = body[pm.end() :]
                    val = re.sub(r"\s*</parameter>\s*$", "", val)
                    arguments[pm.group(1)] = val.strip()
                else:
                    for pidx, pm in enumerate(param_starts):
                        param_name = pm.group(1)
                        val_start = pm.end()
                        # Value ends at next <parameter= or end of body
                        next_param = (
                            param_starts[pidx + 1].start()
                            if pidx + 1 < len(param_starts)
                            else len(body)
                        )
                        val = body[val_start:next_param]
                        # Trim trailing </parameter> if present
                        val = re.sub(r"\s*</parameter>\s*$", "", val)
                        arguments[param_name] = val.strip()

                tc = {
                    "id": f"call_{len(tool_calls)}",
                    "type": "function",
                    "function": {
                        "name": func_name,
                        "arguments": json.dumps(arguments),
                    },
                }
                tool_calls.append(tc)

        return tool_calls

    @staticmethod
    def _build_openai_messages(
        messages: list[dict],
        image_b64: Optional[str] = None,
    ) -> list[dict]:
        """
        Build OpenAI-format messages, optionally injecting an image_url
        content part into the last user message for vision models.

        If no image is provided, returns messages as-is.
        """
        if not image_b64:
            return messages

        # Find the last user message and convert to multimodal content parts
        result = [msg.copy() for msg in messages]
        last_user_idx = None
        for i, msg in enumerate(result):
            if msg["role"] == "user":
                last_user_idx = i

        if last_user_idx is not None:
            text_content = result[last_user_idx].get("content", "")
            result[last_user_idx]["content"] = [
                {"type": "text", "text": text_content},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/png;base64,{image_b64}",
                    },
                },
            ]

        return result

    # ── Generation (proxy to llama-server) ────────────────────────

    @staticmethod
    def _iter_text_cancellable(
        response: "httpx.Response",
        cancel_event: Optional[threading.Event] = None,
    ) -> Generator[str, None, None]:
        """Iterate over an httpx streaming response with cancel support.

        Checks cancel_event between chunks and on ReadTimeout.  The
        cancel watcher in _stream_with_retry also calls response.close()
        on cancel, which unblocks iter_text() once the response exists.
        During normal streaming llama-server sends tokens frequently,
        so the cancel check between chunks is the primary mechanism.
        """
        text_iter = response.iter_text()
        while True:
            if cancel_event is not None and cancel_event.is_set():
                response.close()
                return
            try:
                chunk = next(text_iter)
                yield chunk
            except StopIteration:
                return
            except httpx.ReadTimeout:
                # No data within the timeout window -- just loop back
                # and re-check cancel_event.
                continue

    @staticmethod
    @contextlib.contextmanager
    def _stream_with_retry(
        client: "httpx.Client",
        url: str,
        payload: dict,
        cancel_event: Optional[threading.Event] = None,
    ):
        """Open an httpx streaming POST with cancel support.

        Sends the request once with a long read timeout (120 s) so
        prompt processing (prefill) can finish without triggering a
        retry storm.  The previous 0.5 s timeout caused duplicate POST
        requests every half second, forcing llama-server to restart
        processing each time.

        A background watcher thread provides cancel by closing the
        response when cancel_event is set.  Limitation: httpx does not
        allow interrupting a blocked read from another thread before
        the response object exists, so cancel during the initial
        header wait (prefill phase) only takes effect once headers
        arrive.  After that, response.close() unblocks reads promptly.
        In practice llama-server prefill is 1-5 s for typical prompts,
        during which cancel is deferred -- still much better than the
        old retry storm which made prefill slower.
        """
        if cancel_event is not None and cancel_event.is_set():
            raise GeneratorExit

        # Background watcher: close the response if cancel is requested.
        # Only effective after response headers arrive (httpx limitation).
        _cancel_closed = threading.Event()
        _response_ref: list = [None]

        def _cancel_watcher():
            while not _cancel_closed.is_set():
                if cancel_event.wait(timeout = 0.3):
                    # Cancel requested. Keep polling until the response object
                    # exists so we can close it, or until the main thread
                    # finishes on its own (_cancel_closed is set in finally).
                    while not _cancel_closed.is_set():
                        r = _response_ref[0]
                        if r is not None:
                            try:
                                r.close()
                                return
                            except Exception as e:
                                logger.debug(
                                    f"Error closing response in cancel watcher: {e}"
                                )
                        # Response not created yet -- wait briefly and retry
                        _cancel_closed.wait(timeout = 0.1)
                    return

        watcher = None
        if cancel_event is not None:
            watcher = threading.Thread(
                target = _cancel_watcher, daemon = True, name = "prefill-cancel"
            )
            watcher.start()

        try:
            # Long read timeout so prefill (prompt processing) can finish
            # without triggering a retry storm.  Cancel during both
            # prefill and streaming is handled by the watcher thread
            # which closes the response, unblocking any httpx read.
            prefill_timeout = httpx.Timeout(
                connect = 30,
                read = 120.0,
                write = 10,
                pool = 10,
            )
            with client.stream(
                "POST", url, json = payload, timeout = prefill_timeout
            ) as response:
                _response_ref[0] = response
                if cancel_event is not None and cancel_event.is_set():
                    raise GeneratorExit
                yield response
                return
        except (httpx.ReadError, httpx.RemoteProtocolError, httpx.CloseError):
            # Response was closed by the cancel watcher
            if cancel_event is not None and cancel_event.is_set():
                raise GeneratorExit
            raise
        finally:
            _cancel_closed.set()

    def generate_chat_completion(
        self,
        messages: list[dict],
        image_b64: Optional[str] = None,
        temperature: float = 0.6,
        top_p: float = 0.95,
        top_k: int = 20,
        min_p: float = 0.01,
        max_tokens: Optional[int] = None,
        repetition_penalty: float = 1.0,
        presence_penalty: float = 0.0,
        stop: Optional[list[str]] = None,
        cancel_event: Optional[threading.Event] = None,
        enable_thinking: Optional[bool] = None,
    ) -> Generator[str, None, None]:
        """
        Send a chat completion request to llama-server and stream tokens back.

        Uses /v1/chat/completions — llama-server handles chat template
        application and vision (multimodal image_url parts) natively.

        Yields cumulative text (matching InferenceBackend's convention).
        """
        if not self.is_loaded:
            raise RuntimeError("llama-server is not loaded")

        openai_messages = self._build_openai_messages(messages, image_b64)

        payload = {
            "messages": openai_messages,
            "stream": True,
            "temperature": temperature,
            "top_p": top_p,
            "top_k": top_k if top_k >= 0 else 0,
            "min_p": min_p,
            "repeat_penalty": repetition_penalty,
            "presence_penalty": presence_penalty,
        }
        # Pass enable_thinking per-request for reasoning models
        if self._supports_reasoning and enable_thinking is not None:
            payload["chat_template_kwargs"] = {"enable_thinking": enable_thinking}
        if max_tokens is not None:
            payload["max_tokens"] = max_tokens
        if stop:
            payload["stop"] = stop

        url = f"{self.base_url}/v1/chat/completions"
        cumulative = ""
        in_thinking = False

        try:
            # _stream_with_retry uses a 120 s read timeout so prefill
            # can finish.  Cancel during streaming is handled by the
            # watcher thread (closes the response on cancel_event).
            stream_timeout = httpx.Timeout(connect = 10, read = 0.5, write = 10, pool = 10)
            with httpx.Client(timeout = stream_timeout) as client:
                with self._stream_with_retry(
                    client, url, payload, cancel_event
                ) as response:
                    if response.status_code != 200:
                        error_body = response.read().decode()
                        raise RuntimeError(
                            f"llama-server returned {response.status_code}: {error_body}"
                        )

                    buffer = ""
                    has_content_tokens = False
                    reasoning_text = ""
                    for raw_chunk in self._iter_text_cancellable(
                        response, cancel_event
                    ):
                        buffer += raw_chunk
                        while "\n" in buffer:
                            line, buffer = buffer.split("\n", 1)
                            line = line.strip()

                            if not line:
                                continue
                            if line == "data: [DONE]":
                                if in_thinking:
                                    if has_content_tokens:
                                        # Real thinking + content: close the tag
                                        cumulative += "</think>"
                                        yield cumulative
                                    else:
                                        # Only reasoning_content, no content tokens:
                                        # the model put its entire reply in reasoning
                                        # (e.g. Qwen3 always-think mode). Show it
                                        # as the main response, not as a thinking block.
                                        cumulative = reasoning_text
                                        yield cumulative
                                return
                            if not line.startswith("data: "):
                                continue

                            try:
                                data = json.loads(line[6:])
                                choices = data.get("choices", [])
                                if choices:
                                    delta = choices[0].get("delta", {})

                                    # Handle reasoning/thinking tokens
                                    # llama-server sends these as "reasoning_content"
                                    # Wrap in <think> tags for the frontend parser
                                    reasoning = delta.get("reasoning_content", "")
                                    if reasoning:
                                        reasoning_text += reasoning
                                        if not in_thinking:
                                            cumulative += "<think>"
                                            in_thinking = True
                                        cumulative += reasoning
                                        yield cumulative

                                    token = delta.get("content", "")
                                    if token:
                                        has_content_tokens = True
                                        if in_thinking:
                                            cumulative += "</think>"
                                            in_thinking = False
                                        cumulative += token
                                        yield cumulative
                            except json.JSONDecodeError:
                                logger.debug(
                                    f"Skipping malformed SSE line: {line[:100]}"
                                )

        except httpx.ConnectError:
            raise RuntimeError("Lost connection to llama-server")
        except Exception as e:
            if cancel_event is not None and cancel_event.is_set():
                return
            raise

    # ── Tool-calling agentic loop ──────────────────────────────

    def generate_chat_completion_with_tools(
        self,
        messages: list[dict],
        tools: list[dict],
        temperature: float = 0.6,
        top_p: float = 0.95,
        top_k: int = 20,
        min_p: float = 0.01,
        max_tokens: Optional[int] = None,
        repetition_penalty: float = 1.0,
        presence_penalty: float = 0.0,
        stop: Optional[list[str]] = None,
        cancel_event: Optional[threading.Event] = None,
        enable_thinking: Optional[bool] = None,
        max_tool_iterations: int = 10,
        auto_heal_tool_calls: bool = True,
        tool_call_timeout: int = 300,
        session_id: Optional[str] = None,
    ) -> Generator[dict, None, None]:
        """
        Agentic loop: let the model call tools, execute them, and continue.

        Yields dicts with:
          {"type": "status", "text": "Searching: ..."}   -- tool status updates
          {"type": "content", "text": "token"}            -- streamed content tokens (cumulative)
          {"type": "reasoning", "text": "token"}          -- streamed reasoning tokens (cumulative)
        """
        from core.inference.tools import execute_tool

        if not self.is_loaded:
            raise RuntimeError("llama-server is not loaded")

        conversation = list(messages)
        url = f"{self.base_url}/v1/chat/completions"

        for iteration in range(max_tool_iterations):
            if cancel_event is not None and cancel_event.is_set():
                return

            # Build payload for non-streaming tool detection pass
            payload = {
                "messages": conversation,
                "stream": False,
                "temperature": temperature,
                "top_p": top_p,
                "top_k": top_k if top_k >= 0 else 0,
                "min_p": min_p,
                "repeat_penalty": repetition_penalty,
                "presence_penalty": presence_penalty,
                "tools": tools,
                "tool_choice": "auto",
            }
            if self._supports_reasoning and enable_thinking is not None:
                payload["chat_template_kwargs"] = {"enable_thinking": enable_thinking}
            if max_tokens is not None:
                payload["max_tokens"] = max_tokens
            if stop:
                payload["stop"] = stop

            try:
                with httpx.Client(timeout = None) as client:
                    resp = client.post(url, json = payload)
                    if resp.status_code != 200:
                        raise RuntimeError(
                            f"llama-server returned {resp.status_code}: {resp.text}"
                        )
                    data = resp.json()
            except httpx.ConnectError:
                raise RuntimeError("Lost connection to llama-server")

            choices = data.get("choices", [])
            if not choices:
                return

            choice = choices[0]
            finish_reason = choice.get("finish_reason", "")
            message = choice.get("message", {})

            # If model wants to call tools
            tool_calls = message.get("tool_calls")

            # Fallback: detect tool calls embedded as XML/text in content
            # Some models output <tool_call> XML instead of structured tool_calls,
            # or bare <function=...> tags without <tool_call> wrapper.
            content_text = message.get("content", "") or ""
            if (
                auto_heal_tool_calls
                and not tool_calls
                and ("<tool_call>" in content_text or "<function=" in content_text)
            ):
                tool_calls = self._parse_tool_calls_from_text(content_text)
                if tool_calls:
                    # Strip the tool call markup from content.
                    # Use greedy match within <tool_call> blocks since they
                    # can contain arbitrary content including code.
                    import re

                    # Strip <tool_call>...</tool_call> blocks (greedy inside)
                    content_text = re.sub(
                        r"<tool_call>.*?</tool_call>",
                        "",
                        content_text,
                        flags = re.DOTALL,
                    )
                    # Strip unterminated <tool_call>... to end
                    content_text = re.sub(
                        r"<tool_call>.*$",
                        "",
                        content_text,
                        flags = re.DOTALL,
                    )
                    # Strip bare <function=...>...</function> blocks
                    content_text = re.sub(
                        r"<function=\w+>.*?</function>",
                        "",
                        content_text,
                        flags = re.DOTALL,
                    )
                    # Strip unterminated bare <function=...> to end
                    content_text = re.sub(
                        r"<function=\w+>.*$",
                        "",
                        content_text,
                        flags = re.DOTALL,
                    ).strip()
                    logger.info(
                        f"Parsed {len(tool_calls)} tool call(s) from content text"
                    )

            if finish_reason == "tool_calls" or (tool_calls and len(tool_calls) > 0):
                # Append the assistant message with tool_calls to conversation
                assistant_msg = {"role": "assistant", "content": content_text}
                if tool_calls:
                    assistant_msg["tool_calls"] = tool_calls
                conversation.append(assistant_msg)

                # Execute each tool call
                for tc in tool_calls or []:
                    func = tc.get("function", {})
                    tool_name = func.get("name", "")
                    raw_args = func.get("arguments", {})

                    # Handle arguments as either string or dict
                    if isinstance(raw_args, str):
                        try:
                            arguments = json.loads(raw_args)
                        except (json.JSONDecodeError, ValueError):
                            if auto_heal_tool_calls:
                                arguments = {"query": raw_args}
                            else:
                                arguments = {"raw": raw_args}
                    else:
                        arguments = raw_args

                    # Yield status update
                    if tool_name == "web_search":
                        status_text = f"Searching: {arguments.get('query', '')}"
                    elif tool_name == "python":
                        preview = (
                            (arguments.get("code") or "").strip().split("\n")[0][:60]
                        )
                        status_text = (
                            f"Running Python: {preview}"
                            if preview
                            else "Running Python..."
                        )
                    elif tool_name == "terminal":
                        cmd_preview = (arguments.get("command") or "")[:60]
                        status_text = (
                            f"Running: {cmd_preview}"
                            if cmd_preview
                            else "Running command..."
                        )
                    else:
                        status_text = f"Calling: {tool_name}"
                    yield {"type": "status", "text": status_text}

                    # Emit tool_start so the frontend can record inputs
                    yield {
                        "type": "tool_start",
                        "tool_name": tool_name,
                        "tool_call_id": tc.get("id", ""),
                        "arguments": arguments,
                    }

                    # Execute the tool
                    _effective_timeout = (
                        None if tool_call_timeout >= 9999 else tool_call_timeout
                    )
                    result = execute_tool(
                        tool_name,
                        arguments,
                        cancel_event = cancel_event,
                        timeout = _effective_timeout,
                        session_id = session_id,
                    )

                    # Emit tool_end so the frontend can record outputs
                    yield {
                        "type": "tool_end",
                        "tool_name": tool_name,
                        "tool_call_id": tc.get("id", ""),
                        "result": result,
                    }

                    # Append tool result to conversation
                    tool_msg = {
                        "role": "tool",
                        "name": tool_name,
                        "content": result,
                    }
                    tool_call_id = tc.get("id")
                    if tool_call_id:
                        tool_msg["tool_call_id"] = tool_call_id
                    conversation.append(tool_msg)

                # Continue the loop to let model respond with context
                continue

            # No tool calls -- model answered directly.
            # If no tools were executed at all, just yield the content
            # from this response instead of making a redundant second request.
            if iteration == 0 and content_text:
                yield {"type": "status", "text": ""}
                yield {"type": "content", "text": content_text}
                return

            # Tools were called in previous iterations; do a final
            # streaming pass so the model can synthesize a response
            # incorporating the tool results.
            break

        # Clear status
        yield {"type": "status", "text": ""}

        # Final streaming pass with the full conversation context
        stream_payload = {
            "messages": conversation,
            "stream": True,
            "temperature": temperature,
            "top_p": top_p,
            "top_k": top_k if top_k >= 0 else 0,
            "min_p": min_p,
            "repeat_penalty": repetition_penalty,
            "presence_penalty": presence_penalty,
        }
        if self._supports_reasoning and enable_thinking is not None:
            stream_payload["chat_template_kwargs"] = {
                "enable_thinking": enable_thinking
            }
        if max_tokens is not None:
            stream_payload["max_tokens"] = max_tokens
        if stop:
            stream_payload["stop"] = stop

        import re as _re_final

        # Closed blocks only -- safe to strip mid-stream without shrinking later.
        _TOOL_CLOSED_PATTERNS = [
            _re_final.compile(r"<tool_call>.*?</tool_call>", _re_final.DOTALL),
            _re_final.compile(r"<function=\w+>.*?</function>", _re_final.DOTALL),
        ]
        # Open-ended patterns strip from an opening tag to end-of-string.
        # Only applied on the final flush to avoid non-monotonic shrinking.
        _TOOL_ALL_PATTERNS = _TOOL_CLOSED_PATTERNS + [
            _re_final.compile(r"<tool_call>.*$", _re_final.DOTALL),
            _re_final.compile(r"<function=\w+>.*$", _re_final.DOTALL),
        ]

        def _strip_tool_markup(text: str, *, final: bool = False) -> str:
            if not auto_heal_tool_calls:
                return text
            patterns = _TOOL_ALL_PATTERNS if final else _TOOL_CLOSED_PATTERNS
            for pat in patterns:
                text = pat.sub("", text)
            return text.strip() if final else text

        cumulative = ""
        _last_emitted = ""
        in_thinking = False
        has_content_tokens = False
        reasoning_text = ""

        try:
            stream_timeout = httpx.Timeout(connect = 10, read = 0.5, write = 10, pool = 10)
            with httpx.Client(timeout = stream_timeout) as client:
                with self._stream_with_retry(
                    client, url, stream_payload, cancel_event
                ) as response:
                    if response.status_code != 200:
                        error_body = response.read().decode()
                        raise RuntimeError(
                            f"llama-server returned {response.status_code}: {error_body}"
                        )

                    buffer = ""
                    for raw_chunk in self._iter_text_cancellable(
                        response, cancel_event
                    ):
                        buffer += raw_chunk
                        while "\n" in buffer:
                            line, buffer = buffer.split("\n", 1)
                            line = line.strip()

                            if not line:
                                continue
                            if line == "data: [DONE]":
                                if in_thinking:
                                    if has_content_tokens:
                                        cumulative += "</think>"
                                        yield {
                                            "type": "content",
                                            "text": _strip_tool_markup(
                                                cumulative, final = True
                                            ),
                                        }
                                    else:
                                        cumulative = reasoning_text
                                        yield {"type": "content", "text": cumulative}
                                return
                            if not line.startswith("data: "):
                                continue

                            try:
                                chunk_data = json.loads(line[6:])
                                choices = chunk_data.get("choices", [])
                                if choices:
                                    delta = choices[0].get("delta", {})

                                    reasoning = delta.get("reasoning_content", "")
                                    if reasoning:
                                        reasoning_text += reasoning
                                        if not in_thinking:
                                            cumulative += "<think>"
                                            in_thinking = True
                                        cumulative += reasoning
                                        yield {"type": "content", "text": cumulative}

                                    token = delta.get("content", "")
                                    if token:
                                        has_content_tokens = True
                                        if in_thinking:
                                            cumulative += "</think>"
                                            in_thinking = False
                                        cumulative += token
                                        cleaned = _strip_tool_markup(cumulative)
                                        # Only emit when cleaned text grows (monotonic).
                                        if len(cleaned) > len(_last_emitted):
                                            _last_emitted = cleaned
                                            yield {"type": "content", "text": cleaned}
                            except json.JSONDecodeError:
                                logger.debug(
                                    f"Skipping malformed SSE line: {line[:100]}"
                                )

        except httpx.ConnectError:
            raise RuntimeError("Lost connection to llama-server")
        except Exception as e:
            if cancel_event is not None and cancel_event.is_set():
                return
            raise

    # ── TTS support ────────────────────────────────────────────

    def detect_audio_type(self) -> Optional[str]:
        """Detect audio/TTS codec by probing the loaded model's vocabulary."""
        if not self.is_loaded:
            return None
        try:
            with httpx.Client(timeout = 10) as client:

                def _detok(tid: int) -> str:
                    r = client.post(
                        f"{self.base_url}/detokenize", json = {"tokens": [tid]}
                    )
                    return r.json().get("content", "") if r.status_code == 200 else ""

                def _tok(text: str) -> list[int]:
                    r = client.post(
                        f"{self.base_url}/tokenize",
                        json = {"content": text, "add_special": False},
                    )
                    return r.json().get("tokens", []) if r.status_code == 200 else []

                # Check codec-specific tokens (not generic ones that may exist in non-audio models)
                if "<custom_token_" in _detok(128258) and "<custom_token_" in _detok(
                    128259
                ):
                    return "snac"
                if len(_tok("<|AUDIO|>")) == 1 and len(_tok("<|audio_eos|>")) == 1:
                    return "csm"
                if len(_tok("<|startoftranscript|>")) == 1:
                    return "whisper"
                if (
                    len(_tok("<|bicodec_semantic_0|>")) == 1
                    and len(_tok("<|bicodec_global_0|>")) == 1
                ):
                    return "bicodec"
                if len(_tok("<|c1_0|>")) == 1 and len(_tok("<|c2_0|>")) == 1:
                    return "dac"
        except Exception as e:
            logger.debug(f"Audio type detection failed: {e}")
        return None

    # Prompt format per codec: (template, stop_tokens, needs_token_ids)
    # Matches prompts in InferenceBackend._generate_snac/bicodec/dac
    _TTS_PROMPTS = {
        "snac": (
            "<custom_token_3>{text}<|eot_id|><custom_token_4>",
            ["<custom_token_2>"],
            True,
        ),
        "bicodec": (
            "<|task_tts|><|start_content|>{text}<|end_content|><|start_global_token|>",
            ["<|im_end|>", "</s>"],
            False,
        ),
        "dac": (
            "<|im_start|>\n<|text_start|>{text}<|text_end|>\n<|audio_start|><|global_features_start|>\n",
            ["<|im_end|>", "<|audio_end|>"],
            False,
        ),
    }

    _codec_mgr = None  # Shared AudioCodecManager instance

    def init_audio_codec(self, audio_type: str) -> None:
        """Load the audio codec at model load time (mirrors non-GGUF path)."""
        import torch
        from core.inference.audio_codecs import AudioCodecManager

        if LlamaCppBackend._codec_mgr is None:
            LlamaCppBackend._codec_mgr = AudioCodecManager()

        device = "cuda" if torch.cuda.is_available() else "cpu"
        model_repo_path = None

        # BiCodec needs a repo with BiCodec/ weights — download canonical SparkTTS
        if audio_type == "bicodec":
            from huggingface_hub import snapshot_download
            import os

            repo_path = snapshot_download(
                "unsloth/Spark-TTS-0.5B", local_dir = "Spark-TTS-0.5B"
            )
            model_repo_path = os.path.abspath(repo_path)

        LlamaCppBackend._codec_mgr.load_codec(
            audio_type, device, model_repo_path = model_repo_path
        )
        logger.info(f"Loaded audio codec for GGUF TTS: {audio_type}")

    def generate_audio_response(
        self,
        text: str,
        audio_type: str,
        temperature: float = 0.6,
        top_p: float = 0.95,
        top_k: int = 50,
        min_p: float = 0.0,
        max_new_tokens: int = 2048,
        repetition_penalty: float = 1.1,
    ) -> tuple:
        """
        Generate TTS audio via llama-server /completion + codec decoding.
        Returns (wav_bytes, sample_rate).
        """
        if audio_type not in self._TTS_PROMPTS:
            raise RuntimeError(f"GGUF TTS does not support '{audio_type}' codec.")

        tpl, stop, need_ids = self._TTS_PROMPTS[audio_type]

        payload: dict = {
            "prompt": tpl.format(text = text),
            "stream": False,
            "n_predict": max_new_tokens,
            "temperature": temperature,
            "top_p": top_p,
            "top_k": top_k if top_k >= 0 else 0,
            "min_p": min_p,
            "repeat_penalty": repetition_penalty,
        }
        if stop:
            payload["stop"] = stop
        if need_ids:
            payload["n_probs"] = 1

        with httpx.Client(timeout = httpx.Timeout(300, connect = 10)) as client:
            resp = client.post(f"{self.base_url}/completion", json = payload)
            if resp.status_code != 200:
                raise RuntimeError(
                    f"llama-server returned {resp.status_code}: {resp.text}"
                )

        data = resp.json()
        token_ids = (
            [p["id"] for p in data.get("completion_probabilities", []) if "id" in p]
            if need_ids
            else None
        )

        import torch

        device = "cuda" if torch.cuda.is_available() else "cpu"
        return LlamaCppBackend._codec_mgr.decode(
            audio_type, device, token_ids = token_ids, text = data.get("content", "")
        )


================================================
FILE: studio/backend/core/inference/orchestrator.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Inference orchestrator — subprocess-based.

Provides the same API as InferenceBackend, but delegates all ML work
to a persistent subprocess. The subprocess is spawned on first model load
and stays alive for subsequent requests.

When switching between models that need different transformers versions
(e.g. GLM-4.7-Flash needs 5.x, Qwen needs 4.57.x), the old subprocess
is killed and a new one is spawned with the correct version.

Pattern follows core/training/training.py.
"""

import atexit
import base64
import structlog
from loggers import get_logger
import multiprocessing as mp
import queue
import threading
import time
import uuid
from io import BytesIO
from pathlib import Path
from typing import Any, Generator, Optional, Tuple, Union

logger = get_logger(__name__)

_CTX = mp.get_context("spawn")

# Dispatcher timeout constants (seconds)
_DISPATCH_READ_TIMEOUT = 30.0
_DISPATCH_POLL_INTERVAL = 0.5
_DISPATCH_STOP_TIMEOUT = 5.0
_DISPATCH_IDLE_TIMEOUT = 30.0
_DISPATCH_DRAIN_TIMEOUT = 5.0


class InferenceOrchestrator:
    """
    Inference backend orchestrator — subprocess-based.

    Exposes the same API surface as InferenceBackend so routes/inference.py
    needs minimal changes. Internally, all heavy ML operations happen in
    a persistent subprocess.
    """

    def __init__(self):
        # Subprocess state
        self._proc: Optional[mp.Process] = None
        self._cmd_queue: Any = None
        self._resp_queue: Any = None
        self._cancel_event: Any = None  # mp.Event — set to cancel generation instantly
        self._lock = threading.Lock()
        self._gen_lock = (
            threading.Lock()
        )  # Serializes generation — one request at a time

        # Dispatcher state — for compare mode (adapter-controlled requests).
        # Instead of serializing via _gen_lock, adapter-controlled requests
        # send commands directly to the subprocess and read from per-request
        # mailboxes. A dispatcher thread routes resp_queue events by request_id.
        self._mailboxes: dict[str, queue.Queue] = {}
        self._mailbox_lock = threading.Lock()  # Protects _mailboxes dict
        self._dispatcher_thread: Optional[threading.Thread] = None
        self._dispatcher_stop = threading.Event()

        # Local state mirrors (updated from subprocess responses)
        self.active_model_name: Optional[str] = None
        self.models: dict = {}
        self.loading_models: set = set()
        self.loaded_local_models: list = []
        from core.inference.defaults import get_default_models

        self._static_models = get_default_models()
        self._top_gguf_cache: Optional[list[str]] = None
        self._top_hub_cache: Optional[list[str]] = None
        self._top_models_ready = threading.Event()

        # Version tracking for subprocess reuse
        self._current_transformers_major: Optional[str] = None  # "4" or "5"

        atexit.register(self._cleanup)
        logger.info("InferenceOrchestrator initialized (subprocess mode)")

        # Kick off background fetch of top models from HF
        threading.Thread(
            target = self._fetch_top_models, daemon = True, name = "top-models"
        ).start()

    # ------------------------------------------------------------------
    # Default models (top GGUFs fetched dynamically from HF)
    # ------------------------------------------------------------------

    @property
    def default_models(self) -> list[str]:
        # Wait up to 5s for background HF fetch to finish
        self._top_models_ready.wait(timeout = 5)
        top_gguf = self._top_gguf_cache or []
        top_hub = self._top_hub_cache or []
        # GGUFs first, then hub models, then static fallbacks.
        # Send extras so the frontend still has 4 per category
        # after removing already-downloaded models.
        result: list[str] = []
        seen: set[str] = set()
        for m in top_gguf + top_hub + self._static_models:
            if m not in seen:
                result.append(m)
                seen.add(m)
        return result

    def _fetch_top_models(self) -> None:
        """Fetch top GGUF and non-GGUF repos from unsloth by downloads."""
        try:
            import httpx

            resp = httpx.get(
                "https://huggingface.co/api/models",
                params = {
                    "author": "unsloth",
                    "sort": "downloads",
                    "direction": "-1",
                    "limit": "80",
                },
                timeout = 15,
            )
            if resp.status_code == 200:
                models = resp.json()
                # Top 40 GGUFs - frontend pages through them on-demand via
                # infinite scroll, so we send a deep pool.
                gguf_ids = [
                    m["id"] for m in models if m.get("id", "").upper().endswith("-GGUF")
                ][:40]
                # Top 40 non-GGUF hub models
                hub_ids = [
                    m["id"]
                    for m in models
                    if not m.get("id", "").upper().endswith("-GGUF")
                ][:40]
                if gguf_ids:
                    self._top_gguf_cache = gguf_ids
                    logger.info("Top GGUF models: %s", gguf_ids)
                if hub_ids:
                    self._top_hub_cache = hub_ids
                    logger.info("Top hub models: %s", hub_ids)
        except Exception as e:
            logger.warning("Failed to fetch top models: %s", e)
        finally:
            self._top_models_ready.set()

    # ------------------------------------------------------------------
    # Subprocess lifecycle
    # ------------------------------------------------------------------

    def _spawn_subprocess(self, config: dict) -> None:
        """Spawn a new inference subprocess."""
        from .worker import run_inference_process

        self._cmd_queue = _CTX.Queue()
        self._resp_queue = _CTX.Queue()
        self._cancel_event = _CTX.Event()

        self._proc = _CTX.Process(
            target = run_inference_process,
            kwargs = {
                "cmd_queue": self._cmd_queue,
                "resp_queue": self._resp_queue,
                "cancel_event": self._cancel_event,
                "config": config,
            },
            daemon = True,
        )
        self._proc.start()
        logger.info("Inference subprocess started (pid=%s)", self._proc.pid)

    def _cancel_generation(self) -> None:
        """Cancel any ongoing generation in the subprocess (instant)."""
        if self._cancel_event is not None:
            self._cancel_event.set()

    def _shutdown_subprocess(self, timeout: float = 10.0) -> None:
        """Gracefully shut down the inference subprocess."""
        self._stop_dispatcher()  # Stop dispatcher before killing subprocess
        if self._proc is None or not self._proc.is_alive():
            self._proc = None
            return

        # 1. Cancel any ongoing generation first (instant via mp.Event)
        self._cancel_generation()
        time.sleep(0.5)  # Brief wait for generation to stop

        # 2. Drain stale responses from queue
        self._drain_queue()

        # 3. Send shutdown command
        try:
            self._cmd_queue.put({"type": "shutdown"})
        except (OSError, ValueError):
            pass

        # 4. Wait for graceful shutdown
        try:
            self._proc.join(timeout = timeout)
        except Exception:
            pass

        # 5. Force kill if still alive
        if self._proc is not None and self._proc.is_alive():
            logger.warning("Inference subprocess did not exit gracefully, terminating")
            try:
                self._proc.terminate()
                self._proc.join(timeout = 5)
            except Exception:
                pass
            if self._proc is not None and self._proc.is_alive():
                logger.warning("Subprocess still alive after terminate, killing")
                try:
                    self._proc.kill()
                    self._proc.join(timeout = 3)
                except Exception:
                    pass

        self._proc = None
        self._cmd_queue = None
        self._resp_queue = None
        self._cancel_event = None
        logger.info("Inference subprocess shut down")

    def _cleanup(self):
        """atexit handler."""
        self._shutdown_subprocess(timeout = 5.0)

    def _ensure_subprocess_alive(self) -> bool:
        """Check if subprocess is alive."""
        return self._proc is not None and self._proc.is_alive()

    # ------------------------------------------------------------------
    # Queue helpers
    # ------------------------------------------------------------------

    def _send_cmd(self, cmd: dict) -> None:
        """Send a command to the subprocess."""
        if self._cmd_queue is None:
            raise RuntimeError("No inference subprocess running")
        try:
            self._cmd_queue.put(cmd)
        except (OSError, ValueError) as exc:
            raise RuntimeError(f"Failed to send command to subprocess: {exc}")

    def _read_resp(self, timeout: float = 1.0) -> Optional[dict]:
        """Read a response from the subprocess (non-blocking with timeout)."""
        if self._resp_queue is None:
            return None
        try:
            return self._resp_queue.get(timeout = timeout)
        except queue.Empty:
            return None
        except (EOFError, OSError, ValueError):
            return None

    def _wait_response(self, expected_type: str, timeout: float = 120.0) -> dict:
        """Block until a response of the expected type arrives.

        Also handles 'status' and 'error' events during the wait.
        Returns the matching response dict.
        Raises RuntimeError on timeout or subprocess crash.
        """
        deadline = time.monotonic() + timeout

        while time.monotonic() < deadline:
            remaining = max(0.1, deadline - time.monotonic())
            resp = self._read_resp(timeout = min(remaining, 1.0))

            if resp is None:
                # Check subprocess health
                if not self._ensure_subprocess_alive():
                    raise RuntimeError("Inference subprocess crashed during wait")
                continue

            rtype = resp.get("type", "")

            if rtype == expected_type:
                return resp

            if rtype == "error":
                error_msg = resp.get("error", "Unknown error")
                raise RuntimeError(f"Subprocess error: {error_msg}")

            if rtype == "status":
                logger.info("Subprocess status: %s", resp.get("message", ""))
                continue

            # Other response types during wait — skip
            logger.debug(
                "Skipping response type '%s' while waiting for '%s'",
                rtype,
                expected_type,
            )

        raise RuntimeError(
            f"Timeout waiting for '{expected_type}' response after {timeout}s"
        )

    def _drain_queue(self) -> list:
        """Drain all pending responses."""
        events = []
        if self._resp_queue is None:
            return events
        while True:
            try:
                events.append(self._resp_queue.get_nowait())
            except queue.Empty:
                return events
            except (EOFError, OSError, ValueError):
                return events

    def _drain_until_gen_done(self, timeout: float = 5.0) -> None:
        """Consume resp_queue events until gen_done/gen_error, discarding them.

        Called after cancel to ensure stale tokens from the cancelled
        generation don't leak into the next request.
        """
        deadline = time.monotonic() + timeout
        while time.monotonic() < deadline:
            resp = self._read_resp(timeout = min(0.5, deadline - time.monotonic()))
            if resp is None:
                if not self._ensure_subprocess_alive():
                    return
                continue
            rtype = resp.get("type", "")
            if rtype in ("gen_done", "gen_error"):
                return
        logger.warning("Timed out waiting for gen_done after cancel")

    # ------------------------------------------------------------------
    # Dispatcher — per-request mailbox routing for compare mode
    # ------------------------------------------------------------------

    def _start_dispatcher(self) -> None:
        """Start the dispatcher thread if not already running.

        The dispatcher reads from the shared resp_queue and routes
        responses to per-request mailbox queues. This allows multiple
        adapter-controlled (compare) requests to be in-flight without
        holding _gen_lock.
        """
        if self._dispatcher_thread is not None and self._dispatcher_thread.is_alive():
            return

        self._dispatcher_stop.clear()
        self._dispatcher_thread = threading.Thread(
            target = self._dispatcher_loop,
            daemon = True,
            name = "inference-dispatcher",
        )
        self._dispatcher_thread.start()
        logger.debug("Dispatcher thread started")

    def _stop_dispatcher(self) -> None:
        """Signal the dispatcher to stop and wait for it."""
        if self._dispatcher_thread is None:
            return
        self._dispatcher_stop.set()
        self._dispatcher_thread.join(timeout = _DISPATCH_STOP_TIMEOUT)
        self._dispatcher_thread = None
        logger.debug("Dispatcher thread stopped")

    def _dispatcher_loop(self) -> None:
        """Background loop: read resp_queue → route to mailboxes by request_id."""
        while not self._dispatcher_stop.is_set():
            if self._resp_queue is None:
                break

            try:
                resp = self._resp_queue.get(timeout = _DISPATCH_POLL_INTERVAL)
            except queue.Empty:
                continue
            except (EOFError, OSError, ValueError):
                break

            rid = resp.get("request_id")
            rtype = resp.get("type", "")

            # Status messages — log and skip
            if rtype == "status":
                logger.info("Subprocess status: %s", resp.get("message", ""))
                continue

            # Route to mailbox if a matching request_id exists
            if rid:
                with self._mailbox_lock:
                    mbox = self._mailboxes.get(rid)
                if mbox is not None:
                    mbox.put(resp)
                    continue

            # No matching mailbox — might be for a _gen_lock reader or orphaned
            # Push it back so _read_resp can pick it up. But we can't un-get
            # from mp.Queue, so log a warning.
            if rtype not in ("status",):
                logger.debug(
                    "Dispatcher: no mailbox for request_id=%s type=%s, dropping",
                    rid,
                    rtype,
                )

    def _generate_dispatched(
        self,
        messages: list = None,
        system_prompt: str = "",
        image = None,
        temperature: float = 0.7,
        top_p: float = 0.9,
        top_k: int = 40,
        min_p: float = 0.0,
        max_new_tokens: int = 256,
        repetition_penalty: float = 1.0,
        cancel_event = None,
        use_adapter = None,
    ) -> Generator[str, None, None]:
        """Dispatched generation — sends command without holding _gen_lock.

        Uses a per-request mailbox to receive tokens. This allows two
        compare-mode requests to be queued in the subprocess simultaneously,
        eliminating the inter-generation round-trip overhead.

        The subprocess processes commands sequentially from its cmd_queue,
        so generation is still serialized at the GPU level — we just avoid
        the orchestrator-level lock contention.
        """
        if not self._ensure_subprocess_alive():
            yield "Error: Inference subprocess is not running"
            return

        if not self.active_model_name:
            yield "Error: No active model"
            return

        # Ensure dispatcher is running
        self._start_dispatcher()

        request_id = str(uuid.uuid4())

        # Convert PIL Image to base64 if needed
        image_b64 = None
        if image is not None:
            image_b64 = self._pil_to_base64(image)

        cmd = {
            "type": "generate",
            "request_id": request_id,
            "messages": messages or [],
            "system_prompt": system_prompt,
            "image_base64": image_b64,
            "temperature": temperature,
            "top_p": top_p,
            "top_k": top_k,
            "min_p": min_p,
            "max_new_tokens": max_new_tokens,
            "repetition_penalty": repetition_penalty,
        }

        if use_adapter is not None:
            cmd["use_adapter"] = use_adapter

        # Create mailbox BEFORE sending command
        mailbox: queue.Queue = queue.Queue()
        with self._mailbox_lock:
            self._mailboxes[request_id] = mailbox

        try:
            self._send_cmd(cmd)
        except RuntimeError as exc:
            with self._mailbox_lock:
                self._mailboxes.pop(request_id, None)
            yield f"Error: {exc}"
            return

        # Read tokens from our private mailbox
        try:
            while True:
                try:
                    resp = mailbox.get(timeout = _DISPATCH_READ_TIMEOUT)
                except queue.Empty:
                    # Timeout — check subprocess health
                    if not self._ensure_subprocess_alive():
                        yield "Error: Inference subprocess crashed during generation"
                        return
                    continue

                rtype = resp.get("type", "")

                if rtype == "token":
                    # Check cancel from route (e.g. SSE connection closed)
                    if cancel_event is not None and cancel_event.is_set():
                        self._cancel_generation()
                        # Drain remaining events for this request
                        self._drain_mailbox(mailbox, timeout = 5.0)
                        return
                    yield resp.get("text", "")

                elif rtype == "gen_done":
                    return

                elif rtype == "gen_error":
                    yield f"Error: {resp.get('error', 'Unknown error')}"
                    return
        finally:
            with self._mailbox_lock:
                self._mailboxes.pop(request_id, None)

    def _drain_mailbox(self, mailbox: queue.Queue, timeout: float = 5.0) -> None:
        """Drain a mailbox until gen_done/gen_error, discarding tokens."""
        deadline = time.monotonic() + timeout
        while time.monotonic() < deadline:
            try:
                resp = mailbox.get(
                    timeout = min(_DISPATCH_POLL_INTERVAL, deadline - time.monotonic())
                )
            except queue.Empty:
                continue
            rtype = resp.get("type", "")
            if rtype in ("gen_done", "gen_error"):
                return
        logger.warning("Timed out draining mailbox after cancel")

    def _wait_dispatcher_idle(self) -> None:
        """Wait for all dispatched requests to complete, then stop dispatcher.

        Called by _generate_inner before using the _gen_lock path, to ensure
        the dispatcher thread isn't competing for resp_queue reads.
        """
        if self._dispatcher_thread is None or not self._dispatcher_thread.is_alive():
            return

        # Wait for all mailboxes to be emptied (dispatched requests complete)
        deadline = time.monotonic() + _DISPATCH_IDLE_TIMEOUT
        while time.monotonic() < deadline:
            with self._mailbox_lock:
                if not self._mailboxes:
                    break
            time.sleep(0.1)

        # Only stop dispatcher if all mailboxes drained.  If compare
        # requests are still active, leave the dispatcher running so
        # their token routing isn't killed mid-stream.
        with self._mailbox_lock:
            still_active = bool(self._mailboxes)
        if still_active:
            logger.warning(
                "Dispatcher still has %d active mailbox(es); "
                "leaving dispatcher running for compare requests",
                len(self._mailboxes),
            )
        else:
            self._stop_dispatcher()

    # ------------------------------------------------------------------
    # Public API — same interface as InferenceBackend
    # ------------------------------------------------------------------

    def load_model(
        self,
        config,  # ModelConfig
        max_seq_length: int = 2048,
        dtype = None,
        load_in_4bit: bool = True,
        hf_token: Optional[str] = None,
        trust_remote_code: bool = False,
    ) -> bool:
        """Load a model for inference.

        Always spawns a fresh subprocess for each model load. This ensures
        a clean Python interpreter — no stale unsloth patches, torch.compile
        caches, or inspect.getsource() failures from a previous model.
        """
        from utils.transformers_version import needs_transformers_5

        model_name = config.identifier
        self.loading_models.add(model_name)

        try:
            needed_major = "5" if needs_transformers_5(model_name) else "4"

            # Build config dict for subprocess
            sub_config = {
                "model_name": model_name,
                "max_seq_length": max_seq_length,
                "load_in_4bit": load_in_4bit,
                "hf_token": hf_token or "",
                "gguf_variant": getattr(config, "gguf_variant", None),
                "trust_remote_code": trust_remote_code,
            }

            # Always kill existing subprocess and spawn fresh.
            # Reusing a subprocess after unsloth patches torch internals
            # causes inspect.getsource() failures on the next model load.
            if self._ensure_subprocess_alive():
                self._cancel_generation()
                time.sleep(0.3)
                self._shutdown_subprocess()

            elif self._proc is not None:
                # Dead subprocess — clean up
                self._shutdown_subprocess(timeout = 2)

            logger.info(
                "Spawning fresh inference subprocess for '%s' (transformers %s.x)",
                model_name,
                needed_major,
            )
            self._spawn_subprocess(sub_config)
            resp = self._wait_response("loaded", timeout = 180)

            # Update local state from response
            if resp.get("success"):
                self._current_transformers_major = needed_major
                model_info = resp.get("model_info", {})
                self.active_model_name = model_info.get("identifier", model_name)
                self.models[self.active_model_name] = {
                    "is_vision": model_info.get("is_vision", False),
                    "is_lora": model_info.get("is_lora", False),
                    "display_name": model_info.get("display_name", model_name),
                    "is_audio": model_info.get("is_audio", False),
                    "audio_type": model_info.get("audio_type"),
                    "has_audio_input": model_info.get("has_audio_input", False),
                }
                self.loading_models.discard(model_name)
                logger.info("Model '%s' loaded successfully in subprocess", model_name)
                return True
            else:
                error = resp.get("error", "Failed to load model")
                self.loading_models.discard(model_name)
                self.active_model_name = None
                self.models.clear()
                raise Exception(error)

        except Exception:
            self.loading_models.discard(model_name)
            self.active_model_name = None
            self.models.clear()
            raise

    def unload_model(self, model_name: str) -> bool:
        """Unload a model from the subprocess."""
        if not self._ensure_subprocess_alive():
            # No subprocess — just clear local state
            self.models.pop(model_name, None)
            if self.active_model_name == model_name:
                self.active_model_name = None
            return True

        try:
            self._send_cmd(
                {
                    "type": "unload",
                    "model_name": model_name,
                }
            )
            resp = self._wait_response("unloaded", timeout = 30)

            # Update local state
            self.models.pop(model_name, None)
            if self.active_model_name == model_name:
                self.active_model_name = None

            logger.info("Model '%s' unloaded from subprocess", model_name)
            return True

        except Exception as exc:
            logger.error("Error unloading model '%s': %s", model_name, exc)
            # Clear local state anyway
            self.models.pop(model_name, None)
            if self.active_model_name == model_name:
                self.active_model_name = None
            return False

    def generate_chat_response(
        self,
        messages: list,
        system_prompt: str = "",
        image = None,
        temperature: float = 0.7,
        top_p: float = 0.9,
        top_k: int = 40,
        min_p: float = 0.0,
        max_new_tokens: int = 256,
        repetition_penalty: float = 1.0,
        cancel_event = None,
    ) -> Generator[str, None, None]:
        """Generate response, streaming tokens from subprocess."""
        yield from self._generate_inner(
            messages = messages,
            system_prompt = system_prompt,
            image = image,
            temperature = temperature,
            top_p = top_p,
            top_k = top_k,
            min_p = min_p,
            max_new_tokens = max_new_tokens,
            repetition_penalty = repetition_penalty,
            cancel_event = cancel_event,
            use_adapter = None,
        )

    def generate_with_adapter_control(
        self,
        use_adapter: Optional[Union[bool, str]] = None,
        cancel_event = None,
        **gen_kwargs,
    ) -> Generator[str, None, None]:
        """Generate with adapter control, streaming tokens from subprocess.

        Uses the dispatcher path (no _gen_lock) so that compare-mode
        requests don't block each other. The subprocess naturally
        serializes them via its sequential command loop.
        """
        yield from self._generate_dispatched(
            use_adapter = use_adapter,
            cancel_event = cancel_event,
            **gen_kwargs,
        )

    def _generate_inner(
        self,
        messages: list = None,
        system_prompt: str = "",
        image = None,
        temperature: float = 0.7,
        top_p: float = 0.9,
        top_k: int = 40,
        min_p: float = 0.0,
        max_new_tokens: int = 256,
        repetition_penalty: float = 1.0,
        cancel_event = None,
        use_adapter = None,
    ) -> Generator[str, None, None]:
        """Inner generation logic — sends command to subprocess, yields tokens.

        Serialized by _gen_lock: only one generation runs at a time.
        This prevents concurrent readers from consuming each other's
        tokens off the shared resp_queue.
        """
        if not self._ensure_subprocess_alive():
            yield "Error: Inference subprocess is not running"
            return

        if not self.active_model_name:
            yield "Error: No active model"
            return

        # If the dispatcher is running (from a previous compare-mode request),
        # wait for all dispatched requests to finish, then stop the dispatcher
        # so we can safely read from resp_queue directly.
        self._wait_dispatcher_idle()

        # Serialize generation — single GPU, one generation at a time.
        # Without this lock, two concurrent readers on the same resp_queue
        # can consume and drop each other's token events.
        with self._gen_lock:
            yield from self._generate_locked(
                messages = messages,
                system_prompt = system_prompt,
                image = image,
                temperature = temperature,
                top_p = top_p,
                top_k = top_k,
                min_p = min_p,
                max_new_tokens = max_new_tokens,
                repetition_penalty = repetition_penalty,
                cancel_event = cancel_event,
                use_adapter = use_adapter,
            )

    def _generate_locked(
        self,
        messages: list = None,
        system_prompt: str = "",
        image = None,
        temperature: float = 0.7,
        top_p: float = 0.9,
        top_k: int = 40,
        min_p: float = 0.0,
        max_new_tokens: int = 256,
        repetition_penalty: float = 1.0,
        cancel_event = None,
        use_adapter = None,
    ) -> Generator[str, None, None]:
        """Actual generation logic — must be called under _gen_lock."""
        request_id = str(uuid.uuid4())

        # Convert PIL Image to base64 if needed
        image_b64 = None
        if image is not None:
            image_b64 = self._pil_to_base64(image)

        cmd = {
            "type": "generate",
            "request_id": request_id,
            "messages": messages or [],
            "system_prompt": system_prompt,
            "image_base64": image_b64,
            "temperature": temperature,
            "top_p": top_p,
            "top_k": top_k,
            "min_p": min_p,
            "max_new_tokens": max_new_tokens,
            "repetition_penalty": repetition_penalty,
        }

        if use_adapter is not None:
            cmd["use_adapter"] = use_adapter

        try:
            self._send_cmd(cmd)
        except RuntimeError as exc:
            yield f"Error: {exc}"
            return

        # Yield tokens from response queue — we are the only reader
        # because _gen_lock is held.
        while True:
            resp = self._read_resp(timeout = 30.0)

            if resp is None:
                # Check subprocess health
                if not self._ensure_subprocess_alive():
                    yield "Error: Inference subprocess crashed during generation"
                    return
                continue

            rtype = resp.get("type", "")

            # Status messages — skip
            if rtype == "status":
                continue

            # Error without request_id = subprocess-level error
            resp_rid = resp.get("request_id")
            if rtype == "error" and not resp_rid:
                yield f"Error: {resp.get('error', 'Unknown error')}"
                return

            if rtype == "token":
                # Check cancel from route (e.g. SSE connection closed)
                if cancel_event is not None and cancel_event.is_set():
                    self._cancel_generation()
                    # Wait for the subprocess to acknowledge cancellation
                    # (gen_done/gen_error) so stale events don't leak into
                    # the next generation request.
                    self._drain_until_gen_done(timeout = 5.0)
                    return
                yield resp.get("text", "")

            elif rtype == "gen_done":
                return

            elif rtype == "gen_error":
                yield f"Error: {resp.get('error', 'Unknown error')}"
                return

    def reset_generation_state(self):
        """Cancel any ongoing generation and reset state."""
        self._cancel_generation()
        if not self._ensure_subprocess_alive():
            return
        try:
            self._send_cmd({"type": "reset"})
        except RuntimeError:
            pass

    # ------------------------------------------------------------------
    # Audio generation — TTS, ASR, audio input
    # ------------------------------------------------------------------

    def generate_audio_response(
        self,
        text: str,
        temperature: float = 0.6,
        top_p: float = 0.95,
        top_k: int = 50,
        min_p: float = 0.0,
        max_new_tokens: int = 2048,
        repetition_penalty: float = 1.0,
        use_adapter: Optional[Union[bool, str]] = None,
    ) -> Tuple[bytes, int]:
        """Generate TTS audio. Returns (wav_bytes, sample_rate).

        Blocking — sends command and waits for the complete audio response.
        """
        if not self._ensure_subprocess_alive():
            raise RuntimeError("Inference subprocess is not running")
        if not self.active_model_name:
            raise RuntimeError("No active model")

        import uuid

        request_id = str(uuid.uuid4())

        cmd = {
            "type": "generate_audio",
            "request_id": request_id,
            "text": text,
            "temperature": temperature,
            "top_p": top_p,
            "top_k": top_k,
            "min_p": min_p,
            "max_new_tokens": max_new_tokens,
            "repetition_penalty": repetition_penalty,
        }
        if use_adapter is not None:
            cmd["use_adapter"] = use_adapter

        self._send_cmd(cmd)

        # Wait for audio_done or audio_error
        deadline = time.monotonic() + 120.0
        while time.monotonic() < deadline:
            remaining = max(0.1, deadline - time.monotonic())
            resp = self._read_resp(timeout = min(remaining, 1.0))

            if resp is None:
                if not self._ensure_subprocess_alive():
                    raise RuntimeError(
                        "Inference subprocess crashed during audio generation"
                    )
                continue

            rtype = resp.get("type", "")

            if rtype == "audio_done":
                wav_bytes = base64.b64decode(resp["wav_base64"])
                sample_rate = resp["sample_rate"]
                return wav_bytes, sample_rate

            if rtype == "audio_error":
                raise RuntimeError(resp.get("error", "Audio generation failed"))

            if rtype == "error":
                raise RuntimeError(resp.get("error", "Unknown error"))

            if rtype == "status":
                continue

        raise RuntimeError("Timeout waiting for audio generation (120s)")

    def generate_whisper_response(
        self,
        audio_array,
        cancel_event = None,
    ) -> Generator[str, None, None]:
        """Whisper ASR — sends audio to subprocess, yields text."""
        yield from self._generate_audio_input_inner(
            audio_array = audio_array,
            audio_type = "whisper",
            messages = [],
            system_prompt = "",
            cancel_event = cancel_event,
        )

    def generate_audio_input_response(
        self,
        messages,
        system_prompt,
        audio_array,
        temperature: float = 0.7,
        top_p: float = 0.9,
        top_k: int = 40,
        min_p: float = 0.0,
        max_new_tokens: int = 512,
        repetition_penalty: float = 1.0,
        cancel_event = None,
    ) -> Generator[str, None, None]:
        """Audio input generation (e.g. Gemma 3n) — streams text tokens."""
        yield from self._generate_audio_input_inner(
            audio_array = audio_array,
            audio_type = None,  # worker will use generate_audio_input_response
            messages = messages,
            system_prompt = system_prompt,
            temperature = temperature,
            top_p = top_p,
            top_k = top_k,
            min_p = min_p,
            max_new_tokens = max_new_tokens,
            repetition_penalty = repetition_penalty,
            cancel_event = cancel_event,
        )

    def _generate_audio_input_inner(
        self,
        audio_array,
        audio_type: Optional[str] = None,
        messages: list = None,
        system_prompt: str = "",
        temperature: float = 0.7,
        top_p: float = 0.9,
        top_k: int = 40,
        min_p: float = 0.0,
        max_new_tokens: int = 512,
        repetition_penalty: float = 1.0,
        cancel_event = None,
    ) -> Generator[str, None, None]:
        """Shared inner logic for audio input generation (Whisper + ASR)."""
        if not self._ensure_subprocess_alive():
            yield "Error: Inference subprocess is not running"
            return
        if not self.active_model_name:
            yield "Error: No active model"
            return

        with self._gen_lock:
            import uuid

            request_id = str(uuid.uuid4())

            # Convert numpy array to list for mp.Queue serialization
            audio_data = (
                audio_array.tolist()
                if hasattr(audio_array, "tolist")
                else list(audio_array)
            )

            cmd = {
                "type": "generate_audio_input",
                "request_id": request_id,
                "audio_data": audio_data,
                "audio_type": audio_type,
                "messages": messages or [],
                "system_prompt": system_prompt,
                "temperature": temperature,
                "top_p": top_p,
                "top_k": top_k,
                "min_p": min_p,
                "max_new_tokens": max_new_tokens,
                "repetition_penalty": repetition_penalty,
            }

            try:
                self._send_cmd(cmd)
            except RuntimeError as exc:
                yield f"Error: {exc}"
                return

            # Yield tokens — same pattern as _generate_locked
            while True:
                resp = self._read_resp(timeout = 30.0)

                if resp is None:
                    if not self._ensure_subprocess_alive():
                        yield "Error: Inference subprocess crashed during audio input generation"
                        return
                    continue

                rtype = resp.get("type", "")

                if rtype == "status":
                    continue

                if rtype == "error" and not resp.get("request_id"):
                    yield f"Error: {resp.get('error', 'Unknown error')}"
                    return

                if rtype == "token":
                    if cancel_event is not None and cancel_event.is_set():
                        self._cancel_generation()
                        self._drain_until_gen_done(timeout = 5.0)
                        return
                    yield resp.get("text", "")

                elif rtype == "gen_done":
                    return

                elif rtype == "gen_error":
                    yield f"Error: {resp.get('error', 'Unknown error')}"
                    return

    # ------------------------------------------------------------------
    # Local helpers (no subprocess needed)
    # ------------------------------------------------------------------

    def resize_image(self, img, max_size: int = 800):
        """Resize image while maintaining aspect ratio.
        No ML imports needed — runs locally in parent process.
        """
        if img is None:
            return None
        if img.size[0] > max_size or img.size[1] > max_size:
            from PIL import Image

            ratio = min(max_size / img.size[0], max_size / img.size[1])
            new_size = (int(img.size[0] * ratio), int(img.size[1] * ratio))
            return img.resize(new_size, Image.Resampling.LANCZOS)
        return img

    @staticmethod
    def _pil_to_base64(img) -> str:
        """Convert a PIL Image to base64 string for IPC."""
        buf = BytesIO()
        img.save(buf, format = "PNG")
        return base64.b64encode(buf.getvalue()).decode("ascii")

    def get_current_model(self) -> Optional[str]:
        """Get currently active model name."""
        return self.active_model_name

    def is_model_loading(self) -> bool:
        """Check if any model is currently loading."""
        return len(self.loading_models) > 0

    def get_loading_model(self) -> Optional[str]:
        """Get name of currently loading model."""
        return next(iter(self.loading_models)) if self.loading_models else None

    def check_vision_model_compatibility(self) -> bool:
        """Check if current model supports vision."""
        if self.active_model_name and self.active_model_name in self.models:
            return self.models[self.active_model_name].get("is_vision", False)
        return False


# ========== GLOBAL INSTANCE ==========
_inference_backend = None


def get_inference_backend() -> InferenceOrchestrator:
    """Get global inference backend instance (orchestrator)."""
    global _inference_backend
    if _inference_backend is None:
        _inference_backend = InferenceOrchestrator()
    return _inference_backend


================================================
FILE: studio/backend/core/inference/tools.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Tool definitions and executors for LLM tool calling.

Supports web search (DuckDuckGo), Python code execution, and terminal commands.
"""

import ast
import os

os.environ["UNSLOTH_IS_PRESENT"] = "1"

import subprocess
import sys
import tempfile
import threading

from loggers import get_logger

logger = get_logger(__name__)

_EXEC_TIMEOUT = 300  # 5 minutes
_MAX_OUTPUT_CHARS = 8000  # truncate long output
_BASH_BLOCKED_WORDS = {"rm", "sudo", "dd", "chmod", "mkfs", "shutdown", "reboot"}

# Per-session working directories so each chat thread gets its own sandbox.
# Falls back to a shared ~/studio_sandbox/ for API callers without a session_id.
_workdirs: dict[str, str] = {}


def _get_workdir(session_id: str | None = None) -> str:
    """Return (and lazily create) a persistent working directory for tool execution."""
    global _workdirs
    key = session_id or "_default"
    if key not in _workdirs or not os.path.isdir(_workdirs[key]):
        home = os.path.expanduser("~")
        sandbox_root = os.path.join(home, "studio_sandbox")
        if session_id:
            # Sanitize: strip path separators and parent-dir references
            safe_id = os.path.basename(session_id.replace("..", ""))
            if not safe_id:
                safe_id = "_invalid"
            workdir = os.path.join(sandbox_root, safe_id)
            # Verify resolved path stays under sandbox root
            if not os.path.realpath(workdir).startswith(os.path.realpath(sandbox_root)):
                workdir = os.path.join(sandbox_root, "_invalid")
        else:
            workdir = sandbox_root
        os.makedirs(workdir, exist_ok = True)
        _workdirs[key] = workdir
    return _workdirs[key]


WEB_SEARCH_TOOL = {
    "type": "function",
    "function": {
        "name": "web_search",
        "description": "Search the web for current information, recent events, or facts you are uncertain about.",
        "parameters": {
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "The search query",
                }
            },
            "required": ["query"],
        },
    },
}

PYTHON_TOOL = {
    "type": "function",
    "function": {
        "name": "python",
        "description": "Execute Python code in a sandbox and return stdout/stderr.",
        "parameters": {
            "type": "object",
            "properties": {
                "code": {
                    "type": "string",
                    "description": "The Python code to run",
                }
            },
            "required": ["code"],
        },
    },
}

TERMINAL_TOOL = {
    "type": "function",
    "function": {
        "name": "terminal",
        "description": "Execute a terminal command and return stdout/stderr.",
        "parameters": {
            "type": "object",
            "properties": {
                "command": {
                    "type": "string",
                    "description": "The command to run",
                }
            },
            "required": ["command"],
        },
    },
}

ALL_TOOLS = [WEB_SEARCH_TOOL, PYTHON_TOOL, TERMINAL_TOOL]


_TIMEOUT_UNSET = object()


def execute_tool(
    name: str,
    arguments: dict,
    cancel_event = None,
    timeout: int | None = _TIMEOUT_UNSET,
    session_id: str | None = None,
) -> str:
    """Execute a tool by name with the given arguments. Returns result as a string.

    ``timeout``: int sets per-call limit in seconds, ``None`` means no limit,
    unset (default) uses ``_EXEC_TIMEOUT`` (300 s).
    ``session_id``: optional thread/session ID for per-conversation sandbox isolation.
    """
    logger.info(
        f"execute_tool: name={name}, session_id={session_id}, timeout={timeout}"
    )
    effective_timeout = _EXEC_TIMEOUT if timeout is _TIMEOUT_UNSET else timeout
    if name == "web_search":
        return _web_search(arguments.get("query", ""), timeout = effective_timeout)
    if name == "python":
        return _python_exec(
            arguments.get("code", ""), cancel_event, effective_timeout, session_id
        )
    if name == "terminal":
        return _bash_exec(
            arguments.get("command", ""), cancel_event, effective_timeout, session_id
        )
    return f"Unknown tool: {name}"


def _web_search(query: str, max_results: int = 5, timeout: int = _EXEC_TIMEOUT) -> str:
    """Search the web using DuckDuckGo and return formatted results."""
    if not query.strip():
        return "No query provided."
    try:
        from ddgs import DDGS

        results = DDGS(timeout = timeout).text(query, max_results = max_results)
        if not results:
            return "No results found."
        parts = []
        for r in results:
            parts.append(
                f"Title: {r.get('title', '')}\n"
                f"URL: {r.get('href', '')}\n"
                f"Snippet: {r.get('body', '')}"
            )
        return "\n\n---\n\n".join(parts)
    except Exception as e:
        return f"Search failed: {e}"


def _check_signal_escape_patterns(code: str):
    """
    Check if code contains patterns that could escape signal-based timeouts.

    Vendored from unsloth_zoo.rl_environments to avoid importing unsloth_zoo
    (which requires GPU drivers and fails on Mac/Apple Silicon).

    Returns (safe: bool, details: dict)
    """
    try:
        tree = ast.parse(code)
    except SyntaxError as e:
        return False, {
            "error": f"SyntaxError: {e}",
            "signal_tampering": [],
            "exception_catching": [],
            "warnings": [],
        }

    signal_tampering = []
    exception_catching = []
    warnings = []

    def _ast_name_matches(node, names):
        if isinstance(node, ast.Name):
            return node.id in names
        elif isinstance(node, ast.Attribute):
            full_name = []
            current = node
            while isinstance(current, ast.Attribute):
                full_name.append(current.attr)
                current = current.value
            if isinstance(current, ast.Name):
                full_name.append(current.id)
            full_name = ".".join(reversed(full_name))
            return full_name in names
        return False

    class SignalEscapeVisitor(ast.NodeVisitor):
        def __init__(self):
            self.imports_signal = False
            self.signal_aliases = {"signal"}
            self.loop_depth = 0

        def visit_Import(self, node):
            for alias in node.names:
                if alias.name == "signal":
                    self.imports_signal = True
                    if alias.asname:
                        self.signal_aliases.add(alias.asname)
            self.generic_visit(node)

        def visit_ImportFrom(self, node):
            if node.module == "signal":
                self.imports_signal = True
                for alias in node.names:
                    if alias.name in (
                        "signal",
                        "SIGALRM",
                        "SIG_IGN",
                        "setitimer",
                        "ITIMER_REAL",
                        "pthread_sigmask",
                        "SIG_BLOCK",
                        "alarm",
                    ):
                        self.signal_aliases.add(alias.asname or alias.name)
            self.generic_visit(node)

        def visit_While(self, node):
            self.loop_depth += 1
            self.generic_visit(node)
            self.loop_depth -= 1

        def visit_For(self, node):
            self.loop_depth += 1
            self.generic_visit(node)
            self.loop_depth -= 1

        def visit_Call(self, node):
            func = node.func
            func_name = None
            if isinstance(func, ast.Attribute):
                if isinstance(func.value, ast.Name):
                    if func.value.id in self.signal_aliases:
                        func_name = f"signal.{func.attr}"
            elif isinstance(func, ast.Name):
                if func.id in ("signal", "setitimer", "alarm", "pthread_sigmask"):
                    func_name = func.id

            if func_name:
                if func_name in ("signal.signal", "signal"):
                    if len(node.args) >= 1:
                        if _ast_name_matches(
                            node.args[0], ("SIGALRM", "signal.SIGALRM")
                        ):
                            signal_tampering.append(
                                {
                                    "type": "signal_handler_override",
                                    "line": node.lineno,
                                    "description": "Overrides SIGALRM handler",
                                }
                            )
                elif func_name in ("signal.setitimer", "setitimer"):
                    if len(node.args) >= 1:
                        if _ast_name_matches(
                            node.args[0], ("ITIMER_REAL", "signal.ITIMER_REAL")
                        ):
                            signal_tampering.append(
                                {
                                    "type": "timer_manipulation",
                                    "line": node.lineno,
                                    "description": "Manipulates ITIMER_REAL timer",
                                }
                            )
                elif func_name in ("signal.alarm", "alarm"):
                    signal_tampering.append(
                        {
                            "type": "alarm_manipulation",
                            "line": node.lineno,
                            "description": "Manipulates alarm timer",
                        }
                    )
                elif func_name in ("signal.pthread_sigmask", "pthread_sigmask"):
                    signal_tampering.append(
                        {
                            "type": "signal_mask",
                            "line": node.lineno,
                            "description": "Modifies signal mask (may block SIGALRM)",
                        }
                    )
            self.generic_visit(node)

        def visit_ExceptHandler(self, node):
            if self.loop_depth == 0:
                self.generic_visit(node)
                return
            if node.type is None:
                exception_catching.append(
                    {
                        "type": "bare_except_in_loop",
                        "line": node.lineno,
                        "description": "Bare except in loop catches TimeoutError and continues looping",
                    }
                )
            elif isinstance(node.type, ast.Name):
                if node.type.id in ("TimeoutError", "BaseException", "Exception"):
                    exception_catching.append(
                        {
                            "type": f"catches_{node.type.id}_in_loop",
                            "line": node.lineno,
                            "description": f"Catches {node.type.id} in loop - may suppress timeout and continue",
                        }
                    )
            elif isinstance(node.type, ast.Tuple):
                for elt in node.type.elts:
                    if isinstance(elt, ast.Name):
                        if elt.id in ("TimeoutError", "BaseException", "Exception"):
                            exception_catching.append(
                                {
                                    "type": f"catches_{elt.id}_in_loop",
                                    "line": node.lineno,
                                    "description": f"Catches {elt.id} in loop - may suppress timeout and continue",
                                }
                            )
            self.generic_visit(node)

    visitor = SignalEscapeVisitor()
    visitor.visit(tree)

    if visitor.imports_signal and not signal_tampering:
        warnings.append("Code imports 'signal' module - review manually for safety")

    is_safe = len(signal_tampering) == 0 and len(exception_catching) == 0
    return is_safe, {
        "signal_tampering": signal_tampering,
        "exception_catching": exception_catching,
        "warnings": warnings,
    }


def _check_code_safety(code: str) -> str | None:
    """Validate code safety via static analysis.

    Returns an error message string if the code is unsafe, or None if OK.
    """
    safe, info = _check_signal_escape_patterns(code)
    if not safe:
        reasons = [
            item.get("description", "") for item in info.get("signal_tampering", [])
        ]
        return (
            f"Error: unsafe code detected ({'; '.join(reasons)}). "
            f"Please remove signal manipulation from your code."
        )

    return None


def _cancel_watcher(proc, cancel_event, poll_interval = 0.2):
    """Daemon thread that kills a process when cancel_event is set."""
    while proc.poll() is None:
        if cancel_event is not None and cancel_event.is_set():
            proc.kill()
            return
        cancel_event.wait(poll_interval) if cancel_event else None


def _truncate(text: str, limit: int = _MAX_OUTPUT_CHARS) -> str:
    if len(text) > limit:
        return text[:limit] + f"\n\n... (truncated, {len(text)} chars total)"
    return text


def _python_exec(
    code: str,
    cancel_event = None,
    timeout: int = _EXEC_TIMEOUT,
    session_id: str | None = None,
) -> str:
    """Execute Python code in a subprocess sandbox."""
    if not code or not code.strip():
        return "No code provided."

    # Validate imports and code safety
    error = _check_code_safety(code)
    if error:
        return error

    tmp_path = None
    workdir = _get_workdir(session_id)
    try:
        fd, tmp_path = tempfile.mkstemp(
            suffix = ".py", prefix = "studio_exec_", dir = workdir
        )
        with os.fdopen(fd, "w") as f:
            f.write(code)

        proc = subprocess.Popen(
            [sys.executable, tmp_path],
            stdout = subprocess.PIPE,
            stderr = subprocess.STDOUT,
            text = True,
            cwd = workdir,
        )

        # Spawn cancel watcher if we have a cancel event
        if cancel_event is not None:
            watcher = threading.Thread(
                target = _cancel_watcher, args = (proc, cancel_event), daemon = True
            )
            watcher.start()

        try:
            output, _ = proc.communicate(timeout = timeout)
        except subprocess.TimeoutExpired:
            proc.kill()
            proc.communicate()
            return _truncate(f"Execution timed out after {timeout} seconds.")

        if cancel_event is not None and cancel_event.is_set():
            return "Execution cancelled."

        result = output or ""
        if proc.returncode != 0:
            result = f"Exit code {proc.returncode}:\n{result}"
        return _truncate(result) if result.strip() else "(no output)"

    except Exception as e:
        return f"Execution error: {e}"
    finally:
        if tmp_path and os.path.exists(tmp_path):
            try:
                os.unlink(tmp_path)
            except OSError:
                pass


def _bash_exec(
    command: str,
    cancel_event = None,
    timeout: int = _EXEC_TIMEOUT,
    session_id: str | None = None,
) -> str:
    """Execute a bash command in a subprocess sandbox."""
    if not command or not command.strip():
        return "No command provided."

    # Block dangerous commands
    tokens = set(command.lower().split())
    blocked = tokens & _BASH_BLOCKED_WORDS
    if blocked:
        return f"Blocked command(s) for safety: {', '.join(sorted(blocked))}"

    try:
        workdir = _get_workdir(session_id)
        proc = subprocess.Popen(
            ["bash", "-c", command],
            stdout = subprocess.PIPE,
            stderr = subprocess.STDOUT,
            text = True,
            cwd = workdir,
        )

        if cancel_event is not None:
            watcher = threading.Thread(
                target = _cancel_watcher, args = (proc, cancel_event), daemon = True
            )
            watcher.start()

        try:
            output, _ = proc.communicate(timeout = timeout)
        except subprocess.TimeoutExpired:
            proc.kill()
            proc.communicate()
            return _truncate(f"Execution timed out after {timeout} seconds.")

        if cancel_event is not None and cancel_event.is_set():
            return "Execution cancelled."

        result = output or ""
        if proc.returncode != 0:
            result = f"Exit code {proc.returncode}:\n{result}"
        return _truncate(result) if result.strip() else "(no output)"

    except Exception as e:
        return f"Execution error: {e}"


================================================
FILE: studio/backend/core/inference/worker.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Inference subprocess entry point.

Each inference session runs in a persistent subprocess (mp.get_context("spawn")).
This gives us a clean Python interpreter with no stale module state —
solving the transformers version-switching problem completely.

The subprocess stays alive while a model is loaded, accepting commands
(generate, load, unload) via mp.Queue. It exits on shutdown or unload.

Pattern follows core/training/worker.py.
"""

from __future__ import annotations

import base64
import structlog
from loggers import get_logger
import os
import queue as _queue
import sys
import time
import traceback
from io import BytesIO
from pathlib import Path
from typing import Any

logger = get_logger(__name__)


def _activate_transformers_version(model_name: str) -> None:
    """Activate the correct transformers version BEFORE any ML imports.

    If the model needs transformers 5.x, prepend the pre-installed .venv_t5/
    directory to sys.path. Otherwise do nothing (default 4.57.x in .venv/).
    """
    # Ensure backend is on path for utils imports
    backend_path = str(Path(__file__).resolve().parent.parent.parent)
    if backend_path not in sys.path:
        sys.path.insert(0, backend_path)

    from utils.transformers_version import (
        needs_transformers_5,
        _resolve_base_model,
        _ensure_venv_t5_exists,
        _VENV_T5_DIR,
    )

    resolved = _resolve_base_model(model_name)
    if needs_transformers_5(resolved):
        if not _ensure_venv_t5_exists():
            raise RuntimeError(
                f"Cannot activate transformers 5.x: .venv_t5 missing at {_VENV_T5_DIR}"
            )
        if _VENV_T5_DIR not in sys.path:
            sys.path.insert(0, _VENV_T5_DIR)
        logger.info("Activated transformers 5.x from %s", _VENV_T5_DIR)
        # Propagate to child subprocesses (e.g. GGUF converter)
        _pp = os.environ.get("PYTHONPATH", "")
        os.environ["PYTHONPATH"] = _VENV_T5_DIR + (os.pathsep + _pp if _pp else "")
    else:
        logger.info("Using default transformers (4.57.x) for %s", model_name)


def _decode_image(image_base64: str):
    """Decode base64 string to PIL.Image."""
    from PIL import Image

    image_data = base64.b64decode(image_base64)
    return Image.open(BytesIO(image_data))


def _resize_image(img, max_size: int = 800):
    """Resize image while maintaining aspect ratio."""
    if img is None:
        return None
    if img.size[0] > max_size or img.size[1] > max_size:
        from PIL import Image

        ratio = min(max_size / img.size[0], max_size / img.size[1])
        new_size = (int(img.size[0] * ratio), int(img.size[1] * ratio))
        return img.resize(new_size, Image.Resampling.LANCZOS)
    return img


def _send_response(resp_queue: Any, response: dict) -> None:
    """Send a response to the parent process."""
    try:
        resp_queue.put(response)
    except (OSError, ValueError) as exc:
        logger.error("Failed to send response: %s", exc)


def _build_model_config(config: dict):
    """Build a ModelConfig from the config dict."""
    from utils.models import ModelConfig

    model_name = config["model_name"]
    hf_token = config.get("hf_token")
    hf_token = hf_token if hf_token and hf_token.strip() else None
    gguf_variant = config.get("gguf_variant")

    mc = ModelConfig.from_identifier(
        model_id = model_name,
        hf_token = hf_token,
        gguf_variant = gguf_variant,
    )
    if not mc:
        raise ValueError(f"Invalid model identifier: {model_name}")
    return mc


def _handle_load(backend, config: dict, resp_queue: Any) -> None:
    """Handle a load command: load a model into the backend."""
    try:
        mc = _build_model_config(config)

        hf_token = config.get("hf_token")
        hf_token = hf_token if hf_token and hf_token.strip() else None

        # Auto-detect quantization for LoRA adapters
        load_in_4bit = config.get("load_in_4bit", True)
        if mc.is_lora and mc.path:
            import json
            from pathlib import Path

            adapter_cfg_path = Path(mc.path) / "adapter_config.json"
            if adapter_cfg_path.exists():
                try:
                    with open(adapter_cfg_path) as f:
                        adapter_cfg = json.load(f)
                    training_method = adapter_cfg.get("unsloth_training_method")
                    if training_method == "lora" and load_in_4bit:
                        logger.info(
                            "adapter_config.json says lora — setting load_in_4bit=False"
                        )
                        load_in_4bit = False
                    elif training_method == "qlora" and not load_in_4bit:
                        logger.info(
                            "adapter_config.json says qlora — setting load_in_4bit=True"
                        )
                        load_in_4bit = True
                    elif not training_method:
                        if (
                            mc.base_model
                            and "-bnb-4bit" not in mc.base_model.lower()
                            and load_in_4bit
                        ):
                            logger.info(
                                "No training method, base model has no -bnb-4bit — setting load_in_4bit=False"
                            )
                            load_in_4bit = False
                except Exception as e:
                    logger.warning("Could not read adapter_config.json: %s", e)

        success = backend.load_model(
            config = mc,
            max_seq_length = config.get("max_seq_length", 2048),
            load_in_4bit = load_in_4bit,
            hf_token = hf_token,
            trust_remote_code = config.get("trust_remote_code", False),
        )

        if success:
            # Build model_info for the parent to mirror
            model_info = {
                "identifier": mc.identifier,
                "display_name": mc.display_name,
                "is_vision": mc.is_vision,
                "is_lora": mc.is_lora,
                "is_gguf": False,
                "is_audio": getattr(mc, "is_audio", False),
                "audio_type": getattr(mc, "audio_type", None),
                "has_audio_input": getattr(mc, "has_audio_input", False),
            }
            _send_response(
                resp_queue,
                {
                    "type": "loaded",
                    "success": True,
                    "model_info": model_info,
                    "ts": time.time(),
                },
            )
        else:
            _send_response(
                resp_queue,
                {
                    "type": "loaded",
                    "success": False,
                    "error": "Failed to load model",
                    "ts": time.time(),
                },
            )

    except Exception as exc:
        _send_response(
            resp_queue,
            {
                "type": "loaded",
                "success": False,
                "error": str(exc),
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            },
        )


def _handle_generate(
    backend,
    cmd: dict,
    resp_queue: Any,
    cancel_event,
) -> None:
    """Handle a generate command: stream tokens back via resp_queue.

    cancel_event is an mp.Event shared with the parent process.
    The parent can set it at any time (e.g. user stops generation,
    or user loads a new model while generating) and generation
    stops within 1-2 tokens.
    """
    request_id = cmd.get("request_id", "")

    try:
        # Decode image if provided
        image = None
        image_b64 = cmd.get("image_base64")
        if image_b64:
            image = _decode_image(image_b64)
            image = _resize_image(image)

        # Build generation kwargs
        gen_kwargs = {
            "messages": cmd["messages"],
            "system_prompt": cmd.get("system_prompt", ""),
            "image": image,
            "temperature": cmd.get("temperature", 0.7),
            "top_p": cmd.get("top_p", 0.9),
            "top_k": cmd.get("top_k", 40),
            "min_p": cmd.get("min_p", 0.0),
            "max_new_tokens": cmd.get("max_new_tokens", 256),
            "repetition_penalty": cmd.get("repetition_penalty", 1.0),
            "cancel_event": cancel_event,
        }

        # Choose generation path
        use_adapter = cmd.get("use_adapter")
        if use_adapter is not None:
            generator = backend.generate_with_adapter_control(
                use_adapter = use_adapter,
                **gen_kwargs,
            )
        else:
            generator = backend.generate_chat_response(**gen_kwargs)

        logger.info("Starting text generation for request_id=%s", request_id)

        for cumulative_text in generator:
            # cancel_event is an mp.Event — checked instantly, no queue polling
            if cancel_event.is_set():
                logger.info("Generation cancelled for request %s", request_id)
                break

            _send_response(
                resp_queue,
                {
                    "type": "token",
                    "request_id": request_id,
                    "text": cumulative_text,
                    "ts": time.time(),
                },
            )

        _send_response(
            resp_queue,
            {
                "type": "gen_done",
                "request_id": request_id,
                "ts": time.time(),
            },
        )
        logger.info("Finished text generation for request_id=%s", request_id)

    except Exception as exc:
        logger.error("Generation error: %s", exc, exc_info = True)
        _send_response(
            resp_queue,
            {
                "type": "gen_error",
                "request_id": request_id,
                "error": str(exc),
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            },
        )


def _handle_generate_audio(
    backend,
    cmd: dict,
    resp_queue: Any,
) -> None:
    """Handle TTS audio generation — returns WAV bytes + sample_rate."""
    request_id = cmd.get("request_id", "")
    try:
        logger.info("Starting audio generation for request_id=%s", request_id)
        wav_bytes, sample_rate = backend.generate_audio_response(
            text = cmd["text"],
            temperature = cmd.get("temperature", 0.6),
            top_p = cmd.get("top_p", 0.95),
            top_k = cmd.get("top_k", 50),
            min_p = cmd.get("min_p", 0.0),
            max_new_tokens = cmd.get("max_new_tokens", 2048),
            repetition_penalty = cmd.get("repetition_penalty", 1.0),
            use_adapter = cmd.get("use_adapter"),
        )

        # Send WAV bytes as base64 (bytes can't go through mp.Queue directly)
        _send_response(
            resp_queue,
            {
                "type": "audio_done",
                "request_id": request_id,
                "wav_base64": base64.b64encode(wav_bytes).decode("ascii"),
                "sample_rate": sample_rate,
                "ts": time.time(),
            },
        )
        logger.info("Finished audio generation for request_id=%s", request_id)

    except Exception as exc:
        logger.error("Audio generation error: %s", exc, exc_info = True)
        _send_response(
            resp_queue,
            {
                "type": "audio_error",
                "request_id": request_id,
                "error": str(exc),
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            },
        )


def _handle_generate_audio_input(
    backend,
    cmd: dict,
    resp_queue: Any,
    cancel_event,
) -> None:
    """Handle audio input generation (ASR/Whisper) — streams text tokens back."""
    request_id = cmd.get("request_id", "")

    try:
        import numpy as np

        # Decode audio array from list (numpy arrays can't go through mp.Queue)
        audio_array = np.array(cmd["audio_data"], dtype = np.float32)

        audio_type = cmd.get("audio_type")

        if audio_type == "whisper":
            generator = backend.generate_whisper_response(
                audio_array = audio_array,
                cancel_event = cancel_event,
            )
        else:
            generator = backend.generate_audio_input_response(
                messages = cmd.get("messages", []),
                system_prompt = cmd.get("system_prompt", ""),
                audio_array = audio_array,
                temperature = cmd.get("temperature", 0.7),
                top_p = cmd.get("top_p", 0.9),
                top_k = cmd.get("top_k", 40),
                min_p = cmd.get("min_p", 0.0),
                max_new_tokens = cmd.get("max_new_tokens", 512),
                repetition_penalty = cmd.get("repetition_penalty", 1.0),
                cancel_event = cancel_event,
            )

        logger.info("Starting audio input generation for request_id=%s", request_id)

        for text_chunk in generator:
            if cancel_event.is_set():
                logger.info(
                    "Audio input generation cancelled for request %s", request_id
                )
                break

            _send_response(
                resp_queue,
                {
                    "type": "token",
                    "request_id": request_id,
                    "text": text_chunk,
                    "ts": time.time(),
                },
            )

        _send_response(
            resp_queue,
            {
                "type": "gen_done",
                "request_id": request_id,
                "ts": time.time(),
            },
        )
        logger.info("Finished audio input generation for request_id=%s", request_id)

    except Exception as exc:
        logger.error("Audio input generation error: %s", exc, exc_info = True)
        _send_response(
            resp_queue,
            {
                "type": "gen_error",
                "request_id": request_id,
                "error": str(exc),
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            },
        )


def _handle_unload(backend, cmd: dict, resp_queue: Any) -> None:
    """Handle an unload command."""
    model_name = cmd.get("model_name", "")
    try:
        if model_name and model_name in backend.models:
            backend.unload_model(model_name)
        elif backend.active_model_name:
            backend.unload_model(backend.active_model_name)

        _send_response(
            resp_queue,
            {
                "type": "unloaded",
                "model_name": model_name,
                "ts": time.time(),
            },
        )
    except Exception as exc:
        logger.error("Unload error: %s", exc)
        _send_response(
            resp_queue,
            {
                "type": "unloaded",
                "model_name": model_name,
                "error": str(exc),
                "ts": time.time(),
            },
        )


def run_inference_process(
    *,
    cmd_queue: Any,
    resp_queue: Any,
    cancel_event,
    config: dict,
) -> None:
    """Subprocess entrypoint. Persistent — runs command loop until shutdown.

    Args:
        cmd_queue: mp.Queue for receiving commands from parent.
        resp_queue: mp.Queue for sending responses to parent.
        cancel_event: mp.Event shared with parent — set by parent to cancel generation.
        config: Initial configuration dict with model info.
    """
    os.environ["TOKENIZERS_PARALLELISM"] = "false"
    os.environ["PYTHONWARNINGS"] = (
        "ignore"  # Suppress warnings at C-level before imports
    )

    import warnings
    from loggers.config import LogConfig

    if os.getenv("ENVIRONMENT_TYPE", "production") == "production":
        warnings.filterwarnings("ignore")

    LogConfig.setup_logging(
        service_name = "unsloth-studio-inference-worker",
        env = os.getenv("ENVIRONMENT_TYPE", "production"),
    )

    model_name = config["model_name"]

    # ── 1. Activate correct transformers version BEFORE any ML imports ──
    try:
        _activate_transformers_version(model_name)
    except Exception as exc:
        _send_response(
            resp_queue,
            {
                "type": "error",
                "error": f"Failed to activate transformers version: {exc}",
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            },
        )
        return

    # ── 1b. On Windows, check Triton availability (must be before import torch) ──
    if sys.platform == "win32":
        try:
            import triton  # noqa: F401

            logger.info("Triton available — torch.compile enabled")
        except ImportError:
            os.environ["TORCHDYNAMO_DISABLE"] = "1"
            logger.warning(
                "Triton not found on Windows — torch.compile disabled. "
                'Install for better performance: pip install "triton-windows<3.7"'
            )

    # ── 2. Import ML libraries (fresh in this clean process) ──
    try:
        _send_response(
            resp_queue,
            {
                "type": "status",
                "message": "Importing Unsloth...",
                "ts": time.time(),
            },
        )

        backend_path = str(Path(__file__).resolve().parent.parent.parent)
        if backend_path not in sys.path:
            sys.path.insert(0, backend_path)

        from core.inference.inference import InferenceBackend

        import transformers

        logger.info("Subprocess loaded transformers %s", transformers.__version__)

    except Exception as exc:
        _send_response(
            resp_queue,
            {
                "type": "error",
                "error": f"Failed to import ML libraries: {exc}",
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            },
        )
        return

    # ── 3. Create inference backend and load initial model ──
    try:
        backend = InferenceBackend()

        _send_response(
            resp_queue,
            {
                "type": "status",
                "message": "Loading model...",
                "ts": time.time(),
            },
        )

        _handle_load(backend, config, resp_queue)

    except Exception as exc:
        _send_response(
            resp_queue,
            {
                "type": "error",
                "error": f"Failed to initialize inference backend: {exc}",
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            },
        )
        return

    # ── 4. Command loop — process commands until shutdown ──
    # cancel_event is an mp.Event shared with parent — parent can set it
    # at any time to cancel generation instantly (no queue polling needed).
    logger.info("Inference subprocess ready, entering command loop")

    while True:
        try:
            cmd = cmd_queue.get(timeout = 1.0)
        except _queue.Empty:
            continue
        except (EOFError, OSError):
            logger.info("Command queue closed, shutting down")
            return

        if cmd is None:
            continue

        cmd_type = cmd.get("type", "")
        logger.info("Received command: %s", cmd_type)

        try:
            if cmd_type == "generate":
                cancel_event.clear()
                _handle_generate(backend, cmd, resp_queue, cancel_event)

            elif cmd_type == "load":
                # Load a new model (reusing this subprocess)
                # First unload current model
                if backend.active_model_name:
                    backend.unload_model(backend.active_model_name)
                _handle_load(backend, cmd, resp_queue)

            elif cmd_type == "generate_audio":
                cancel_event.clear()
                _handle_generate_audio(backend, cmd, resp_queue)

            elif cmd_type == "generate_audio_input":
                cancel_event.clear()
                _handle_generate_audio_input(backend, cmd, resp_queue, cancel_event)

            elif cmd_type == "unload":
                _handle_unload(backend, cmd, resp_queue)

            elif cmd_type == "cancel":
                # Redundant with mp.Event but handle gracefully
                cancel_event.set()
                logger.info("Cancel command received")

            elif cmd_type == "reset":
                cancel_event.set()
                backend.reset_generation_state()
                _send_response(
                    resp_queue,
                    {
                        "type": "reset_ack",
                        "ts": time.time(),
                    },
                )

            elif cmd_type == "status":
                # Return current status
                _send_response(
                    resp_queue,
                    {
                        "type": "status_response",
                        "active_model": backend.active_model_name,
                        "models": {
                            name: {
                                "is_vision": info.get("is_vision", False),
                                "is_lora": info.get("is_lora", False),
                            }
                            for name, info in backend.models.items()
                        },
                        "loading": list(backend.loading_models),
                        "ts": time.time(),
                    },
                )

            elif cmd_type == "shutdown":
                logger.info("Shutdown command received, exiting")
                # Unload all models
                for model_name in list(backend.models.keys()):
                    try:
                        backend.unload_model(model_name)
                    except Exception:
                        pass
                _send_response(
                    resp_queue,
                    {
                        "type": "shutdown_ack",
                        "ts": time.time(),
                    },
                )
                return

            else:
                logger.warning("Unknown command type: %s", cmd_type)
                _send_response(
                    resp_queue,
                    {
                        "type": "error",
                        "error": f"Unknown command type: {cmd_type}",
                        "ts": time.time(),
                    },
                )

        except Exception as exc:
            logger.error(
                "Error handling command '%s': %s", cmd_type, exc, exc_info = True
            )
            _send_response(
                resp_queue,
                {
                    "type": "error",
                    "error": f"Command '{cmd_type}' failed: {exc}",
                    "stack": traceback.format_exc(limit = 20),
                    "ts": time.time(),
                },
            )


================================================
FILE: studio/backend/core/training/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Training submodule - Training backends and trainer classes
"""

from .training import TrainingBackend, TrainingProgress, get_training_backend

__all__ = [
    "TrainingProgress",
    "TrainingBackend",
    "get_training_backend",
]


================================================
FILE: studio/backend/core/training/trainer.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Unsloth Training Backend
Integrates Unsloth training capabilities with the FastAPI backend
"""

import os
import sys

# Prevent tokenizer parallelism deadlocks when datasets uses multiprocessing fork
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import torch
from utils.hardware import clear_gpu_cache, safe_num_proc

torch._dynamo.config.recompile_limit = 64
from unsloth import FastLanguageModel, FastVisionModel, is_bfloat16_supported
from unsloth.chat_templates import get_chat_template

import json
import threading
import math
import structlog
from loggers import get_logger
import time
from pathlib import Path
from typing import Optional, Callable
from dataclasses import dataclass
import pandas as pd
from datasets import Dataset, load_dataset

from utils.models import is_vision_model, detect_audio_type
from utils.datasets import format_and_template_dataset
from utils.datasets import MODEL_TO_TEMPLATE_MAPPER, TEMPLATE_TO_RESPONSES_MAPPER
from utils.paths import (
    ensure_dir,
    resolve_dataset_path,
    resolve_output_dir,
    resolve_tensorboard_dir,
)
from trl import SFTTrainer, SFTConfig

logger = get_logger(__name__)


def _build_report_targets(training_args) -> list[str] | str:
    report_to: list[str] = []
    if training_args.get("enable_wandb", False):
        report_to.append("wandb")
    if training_args.get("enable_tensorboard", False):
        report_to.append("tensorboard")
    return report_to or "none"


@dataclass
class TrainingProgress:
    """Training progress tracking"""

    epoch: float = 0
    step: int = 0
    total_steps: int = 0
    loss: float = 0.0
    learning_rate: float = 0.0
    is_training: bool = False
    is_completed: bool = False
    error: Optional[str] = None
    status_message: str = "Ready to train"  # Current stage message
    elapsed_seconds: Optional[float] = None
    eta_seconds: Optional[float] = None
    grad_norm: Optional[float] = None
    num_tokens: Optional[int] = None
    eval_loss: Optional[float] = None


class UnslothTrainer:
    """
    Unsloth Training Backend
    """

    def __init__(self):
        self.model = None
        self.tokenizer = None
        self.trainer = None
        self.training_thread = None
        self.training_progress = TrainingProgress()
        self.progress_callbacks = []
        self.is_training = False
        self.should_stop = False
        self.save_on_stop = True
        self.load_in_4bit = True  # Track quantization mode for metadata

        # Model state tracking
        self.is_vlm = False
        self.is_audio = False
        self.is_audio_vlm = (
            False  # Multimodal model (e.g. Gemma 3N) trained on audio data
        )
        self._audio_type = None  # 'csm', 'whisper', 'snac', 'bicodec', 'dac'
        self._cuda_audio_used = (
            False  # Set once after audio CUDA preprocessing; never cleared
        )
        self._spark_tts_repo_dir = (
            None  # Path to downloaded Spark-TTS repo (for BiCodecTokenizer)
        )
        self.model_name = None

        # Training metrics tracking
        self.training_start_time: Optional[float] = None
        self.batch_size: Optional[int] = None
        self.max_seq_length: Optional[int] = None
        self.gradient_accumulation_steps: Optional[int] = None

        # Thread safety
        self._lock = threading.Lock()

        # Store training context for later transfer
        self.training_context = {
            "base_model_name": None,
            "output_dir": None,
            "is_lora": True,  # Default to LoRA
        }

    def pre_detect_and_load_tokenizer(
        self,
        model_name: str,
        max_seq_length: int = 2048,
        hf_token: Optional[str] = None,
        is_dataset_image: bool = False,
        is_dataset_audio: bool = False,
        trust_remote_code: bool = False,
    ) -> None:
        """Lightweight detection and tokenizer load — no model weights, no VRAM.

        Sets is_vlm, _audio_type, is_audio_vlm, model_name and loads a
        lightweight tokenizer for dataset formatting.  Call this before
        load_and_format_dataset() when you want to process the dataset
        BEFORE loading the training model (avoids VRAM contention with
        the LLM-assisted detection helper).

        load_model() may be called afterwards — it will re-detect and load
        the full model + tokenizer, overwriting the lightweight one set here.
        """
        self.model_name = model_name
        self.max_seq_length = max_seq_length
        self.trust_remote_code = trust_remote_code

        if hf_token:
            os.environ["HF_TOKEN"] = hf_token

        # --- Detect audio type (reads config.json only, no VRAM) ---
        self._audio_type = detect_audio_type(model_name, hf_token)
        if self._audio_type == "audio_vlm":
            self.is_audio = False
            self.is_audio_vlm = is_dataset_audio
            self._audio_type = None
        else:
            self.is_audio = self._audio_type is not None
            self.is_audio_vlm = False

        if not self.is_audio and not self.is_audio_vlm:
            self._cuda_audio_used = False

        # --- Detect VLM ---
        vision = is_vision_model(model_name) if not self.is_audio else False
        self.is_vlm = not self.is_audio_vlm and vision and is_dataset_image

        logger.info(
            "pre_detect: audio_type=%s, is_audio=%s, is_audio_vlm=%s, is_vlm=%s",
            self._audio_type,
            self.is_audio,
            self.is_audio_vlm,
            self.is_vlm,
        )

        # --- Load lightweight tokenizer/processor (CPU only, no VRAM) ---
        # Whisper needs AutoProcessor (has feature_extractor + tokenizer).
        # All others work with AutoTokenizer (CSM loads its own processor inline).
        if self._audio_type == "whisper":
            from transformers import AutoProcessor

            self.tokenizer = AutoProcessor.from_pretrained(
                model_name,
                trust_remote_code = trust_remote_code,
                token = hf_token,
            )
        else:
            from transformers import AutoTokenizer

            self.tokenizer = AutoTokenizer.from_pretrained(
                model_name,
                trust_remote_code = trust_remote_code,
                token = hf_token,
            )

        logger.info("Pre-loaded tokenizer for %s", model_name)

    def add_progress_callback(self, callback: Callable[[TrainingProgress], None]):
        """Add callback for training progress updates"""
        self.progress_callbacks.append(callback)

    def _update_progress(self, **kwargs):
        """Update training progress and notify callbacks"""
        with self._lock:
            for key, value in kwargs.items():
                if hasattr(self.training_progress, key):
                    setattr(self.training_progress, key, value)

            # Notify all callbacks
            for callback in self.progress_callbacks:
                try:
                    callback(self.training_progress)
                except Exception as e:
                    logger.error(f"Error in progress callback: {e}")

    def _create_progress_callback(self):
        """Create a TrainerCallback for progress tracking. Reused by all training branches."""
        from transformers import TrainerCallback

        trainer_ref = self

        class _ProgressCallback(TrainerCallback):
            def on_log(self, args, state, control, logs = None, **kwargs):
                if not logs:
                    return
                loss_value = logs.get("loss", logs.get("train_loss", 0.0))
                current_step = state.global_step
                grad_norm = logs.get("grad_norm", None)

                elapsed_seconds = None
                if trainer_ref.training_start_time is not None:
                    elapsed_seconds = time.time() - trainer_ref.training_start_time

                eta_seconds = None
                if elapsed_seconds is not None and current_step > 0:
                    total_steps = trainer_ref.training_progress.total_steps
                    if total_steps > 0:
                        steps_remaining = total_steps - current_step
                        if steps_remaining > 0:
                            eta_seconds = (
                                elapsed_seconds / current_step
                            ) * steps_remaining

                num_tokens = getattr(state, "num_input_tokens_seen", None)

                trainer_ref._update_progress(
                    step = current_step,
                    epoch = round(state.epoch, 2) if state.epoch else 0,
                    loss = loss_value,
                    learning_rate = logs.get("learning_rate", 0.0),
                    elapsed_seconds = elapsed_seconds,
                    eta_seconds = eta_seconds,
                    grad_norm = grad_norm,
                    num_tokens = num_tokens,
                    eval_loss = logs.get("eval_loss", None),
                    status_message = "",
                )

            def on_epoch_end(self, args, state, control, **kwargs):
                trainer_ref._update_progress(epoch = state.epoch, step = state.global_step)

            def on_step_end(self, args, state, control, **kwargs):
                if trainer_ref.should_stop:
                    logger.info(f"Stop detected at step {state.global_step}\n")
                    control.should_training_stop = True
                    return control

        return _ProgressCallback()

    def _calculate_total_steps(
        self, num_samples, batch_size, grad_accum, num_epochs, max_steps
    ):
        """Calculate total training steps from dataset size and training params."""
        if max_steps and max_steps > 0:
            return max_steps
        len_dataloader = math.ceil(num_samples / batch_size)
        steps_per_epoch = max(
            len_dataloader // grad_accum + int(len_dataloader % grad_accum > 0), 1
        )
        return steps_per_epoch * num_epochs

    def _build_audio_training_args(self, training_args, output_dir, *, extra_args = None):
        """Build training args dict for audio branches.

        Constructs the common config (batch size, lr, warmup, fp16/bf16, etc.)
        and applies per-branch overrides via extra_args.
        """
        batch_size = training_args.get("batch_size", 2)
        gradient_accumulation_steps = training_args.get(
            "gradient_accumulation_steps", 4
        )
        warmup_steps_val = training_args.get("warmup_steps", 5)
        max_steps_val = training_args.get("max_steps", 0)
        learning_rate = training_args.get("learning_rate", 2e-4)
        weight_decay = training_args.get("weight_decay", 0.001)
        lr_scheduler_type = training_args.get("lr_scheduler_type", "linear")
        random_seed = training_args.get("random_seed", 3407)
        optim_value = training_args.get("optim", "adamw_8bit")

        config = {
            "per_device_train_batch_size": batch_size,
            "gradient_accumulation_steps": gradient_accumulation_steps,
            "warmup_steps": warmup_steps_val if warmup_steps_val is not None else 5,
            "learning_rate": learning_rate,
            "fp16": not is_bfloat16_supported(),
            "bf16": is_bfloat16_supported(),
            "logging_steps": 1,
            "optim": optim_value,
            "weight_decay": weight_decay,
            "lr_scheduler_type": lr_scheduler_type,
            "seed": random_seed,
            "output_dir": output_dir,
            "report_to": _build_report_targets(training_args),
        }

        if training_args.get("enable_tensorboard", False):
            config["logging_dir"] = str(
                resolve_tensorboard_dir(training_args.get("tensorboard_dir"))
            )

        # max_steps vs epochs
        if max_steps_val and max_steps_val > 0:
            config["max_steps"] = max_steps_val
        else:
            config["num_train_epochs"] = training_args.get("num_epochs", 3)

        # save_steps
        save_steps_val = training_args.get("save_steps", 0)
        if save_steps_val and save_steps_val > 0:
            config["save_steps"] = save_steps_val
            config["save_strategy"] = "steps"

        # Apply per-branch overrides
        if extra_args:
            config.update(extra_args)

        return config

    def _finalize_training(self, output_dir, label = ""):
        """Save model after training and update progress. Used by all training branches."""
        if self.should_stop and self.save_on_stop:
            self.trainer.save_model()
            self.tokenizer.save_pretrained(output_dir)
            self._patch_adapter_config(output_dir)
            msg = f"{label} training stopped" if label else "Training stopped"
            logger.info(f"\n{msg}. Model saved to {output_dir}\n")
            self._update_progress(
                is_training = False,
                status_message = f"Training stopped. Model saved to {output_dir}",
            )
        elif self.should_stop:
            msg = f"{label} training cancelled" if label else "Training cancelled"
            logger.info(f"\n{msg}.\n")
            self._update_progress(
                is_training = False, status_message = "Training cancelled."
            )
        else:
            self.trainer.save_model()
            self.tokenizer.save_pretrained(output_dir)
            self._patch_adapter_config(output_dir)
            msg = f"{label} training completed" if label else "Training completed"
            logger.info(f"\n{msg}! Model saved to {output_dir}\n")
            self._update_progress(
                is_training = False,
                is_completed = True,
                status_message = f"Training completed! Model saved to {output_dir}",
            )

    def _cleanup_audio_artifacts(self):
        """Remove sys.path entries and sys.modules from previous audio preprocessing.

        After audio training, cloned repo dirs (OuteTTS, Spark-TTS) remain on
        sys.path and heavy audio modules (snac, whisper, sparktts, outetts) stay
        in sys.modules. When the next training run calls dataset.map(num_proc=N),
        forked child processes inherit this stale state and deadlock.
        """
        import sys as _sys

        # Remove cloned audio repo paths from sys.path
        base_dir = os.path.dirname(os.path.abspath(__file__))
        audio_paths = [
            os.path.join(base_dir, "inference", "OuteTTS"),  # DAC/OuteTTS
        ]
        # Spark-TTS path is relative to the downloaded repo
        if self._spark_tts_repo_dir:
            spark_code_dir = os.path.join(
                os.path.dirname(self._spark_tts_repo_dir), "Spark-TTS"
            )
            audio_paths.append(spark_code_dir)

        removed_paths = []
        for path in audio_paths:
            if path in _sys.path:
                _sys.path.remove(path)
                removed_paths.append(path)

        # Remove stale audio modules from sys.modules
        prefixes = ("snac", "whisper", "sparktts", "outetts")
        removed_modules = [key for key in _sys.modules if key.startswith(prefixes)]
        for key in removed_modules:
            del _sys.modules[key]

        if removed_paths or removed_modules:
            logger.info(
                f"Cleaned up audio artifacts: {len(removed_paths)} paths, "
                f"{len(removed_modules)} modules\n"
            )

    def _resolve_audio_columns(self, dataset, custom_format_mapping: dict = None):
        """Resolve audio, text, and speaker columns from user mapping or hardcoded fallback.

        Returns:
            dict with keys: audio_col, text_col, speaker_col (speaker_col may be None)
        """
        cols = dataset.column_names

        if custom_format_mapping:
            audio_col = None
            text_col = None
            speaker_col = None
            for col, role in custom_format_mapping.items():
                if role == "audio":
                    audio_col = col
                elif role == "text":
                    text_col = col
                elif role == "speaker_id":
                    speaker_col = col
            # Use mapping if both required columns exist in the dataset
            if audio_col and audio_col in cols and text_col and text_col in cols:
                return {
                    "audio_col": audio_col,
                    "text_col": text_col,
                    "speaker_col": speaker_col,
                }

        # Hardcoded fallback (existing behavior)
        audio_col = next((c for c in cols if c.lower() in ("audio", "speech")), None)
        text_col = next(
            (
                c
                for c in cols
                if c.lower() in ("text", "sentence", "transcript", "transcription")
            ),
            None,
        )

        speaker_col = None
        if "source" in cols:
            speaker_col = "source"
        elif "speaker_id" in cols:
            speaker_col = "speaker_id"

        return {
            "audio_col": audio_col,
            "text_col": text_col,
            "speaker_col": speaker_col,
        }

    def load_model(
        self,
        model_name: str,
        max_seq_length: int = 2048,
        load_in_4bit: bool = True,
        hf_token: Optional[str] = None,
        is_dataset_image: bool = False,
        is_dataset_audio: bool = False,
        trust_remote_code: bool = False,
        full_finetuning: bool = False,
    ) -> bool:
        """Load model for training (supports both text and vision models)"""
        self.load_in_4bit = load_in_4bit  # Store for training_meta.json
        self.trust_remote_code = (
            trust_remote_code  # For AutoProcessor etc. used during training
        )
        try:
            if self.model is not None:
                del self.model
            if self.tokenizer is not None:
                del self.tokenizer

            if self.trainer is not None:
                del self.trainer

            logger.info("\nClearing GPU memory before training...")
            clear_gpu_cache()

            # Clean up sys.path and sys.modules from previous audio preprocessing
            # to prevent deadlocks when forking worker processes in dataset.map()
            self._cleanup_audio_artifacts()

            # Reload Unsloth-patched transformers modeling modules before clearing
            # the compiled cache. unsloth_compile_transformers() sets __UNSLOTH_PATCHED__
            # on each modeling module and replaces methods with exec'd code.
            # clear_unsloth_compiled_cache() deletes the disk cache, but the flag
            # prevents re-compilation — leaving missing cache files. Reloading
            # restores original class definitions so Unsloth can re-compile cleanly.
            import sys as _sys
            import importlib

            for _key, _mod in list(_sys.modules.items()):
                if "transformers.models." in _key and ".modeling_" in _key:
                    if hasattr(_mod, "__UNSLOTH_PATCHED__"):
                        try:
                            importlib.reload(_mod)
                        except Exception:
                            pass  # Non-critical — Unsloth will handle stale modules

            # Remove stale compiled cache so the new model gets a fresh one
            from utils.cache_cleanup import clear_unsloth_compiled_cache

            clear_unsloth_compiled_cache()
            # Detect audio model type dynamically (config.json + tokenizer)
            self._audio_type = detect_audio_type(model_name, hf_token)
            # audio_vlm is detected as an audio_type now, handle it separately
            if self._audio_type == "audio_vlm":
                self.is_audio = False
                self.is_audio_vlm = (
                    is_dataset_audio  # Only use audio VLM path if dataset has audio
                )
                self._audio_type = None
            else:
                self.is_audio = self._audio_type is not None
                self.is_audio_vlm = False

            if not self.is_audio and not self.is_audio_vlm:
                self._cuda_audio_used = False

            # VLM: vision model with image dataset (mutually exclusive with audio paths)
            vision = is_vision_model(model_name) if not self.is_audio else False
            self.is_vlm = not self.is_audio_vlm and vision and is_dataset_image
            self.model_name = model_name
            self.max_seq_length = max_seq_length

            logger.info(
                f"Audio type: {self._audio_type}, is_audio: {self.is_audio}, is_audio_vlm: {self.is_audio_vlm}"
            )
            logger.info(
                f"Dataset has images: {is_dataset_image}, audio: {is_dataset_audio}"
            )
            logger.info(f"Using VLM path: {self.is_vlm}")

            # Reset training state for new run
            self._update_progress(
                is_training = True,
                is_completed = False,
                error = None,
                step = 0,
                loss = 0.0,
                epoch = 0,
            )

            # Update UI immediately with loading message
            model_display = (
                model_name.split("/")[-1] if "/" in model_name else model_name
            )
            model_type_label = (
                "audio" if self.is_audio else ("vision" if self.is_vlm else "text")
            )
            self._update_progress(
                status_message = f"Loading {model_type_label} model... {model_display}"
            )

            logger.info(f"\nLoading {model_type_label} model: {model_name}")

            # Set HF token if provided
            if hf_token:
                os.environ["HF_TOKEN"] = hf_token

            # Proactive gated-model check: verify access BEFORE from_pretrained.
            # Catches ALL gated/private models (text, vision, audio) globally.
            if "/" in model_name:  # Only check HF repo IDs, not local paths
                try:
                    from huggingface_hub import model_info as hf_model_info

                    info = hf_model_info(model_name, token = hf_token or None)
                    # model_info succeeds even for gated repos (metadata is public),
                    # but info.gated tells us if files require acceptance/token.
                    if info.gated and not hf_token:
                        friendly = (
                            f"Access denied for '{model_name}'. This model is gated. "
                            f"Please add a Hugging Face token with access and try again."
                        )
                        logger.error(
                            f"Model '{model_name}' is gated (gated={info.gated}) and no HF token provided"
                        )
                        self._update_progress(error = friendly, is_training = False)
                        return False
                except Exception as gate_err:
                    from huggingface_hub.utils import (
                        GatedRepoError,
                        RepositoryNotFoundError,
                    )

                    if isinstance(gate_err, (GatedRepoError, RepositoryNotFoundError)):
                        friendly = (
                            f"Access denied for '{model_name}'. This model is gated or private. "
                            f"Please add a Hugging Face token with access and try again."
                        )
                        logger.error(f"Gated model check failed: {gate_err}")
                        self._update_progress(error = friendly, is_training = False)
                        return False

            # Branch based on model type
            if self._audio_type == "csm":
                # CSM: FastModel + auto_model=CsmForConditionalGeneration + load_in_4bit=False
                from unsloth import FastModel
                from transformers import CsmForConditionalGeneration

                self.model, self.tokenizer = FastModel.from_pretrained(
                    model_name = model_name,
                    max_seq_length = max_seq_length,
                    dtype = None,
                    auto_model = CsmForConditionalGeneration,
                    load_in_4bit = False,
                    full_finetuning = full_finetuning,
                    token = hf_token,
                    trust_remote_code = trust_remote_code,
                )
                logger.info("Loaded CSM audio model")

            elif self._audio_type == "whisper":
                # Whisper: FastModel + auto_model=WhisperForConditionalGeneration + load_in_4bit=False
                from unsloth import FastModel
                from transformers import WhisperForConditionalGeneration

                self.model, self.tokenizer = FastModel.from_pretrained(
                    model_name = model_name,
                    dtype = None,
                    load_in_4bit = False,
                    full_finetuning = full_finetuning,
                    auto_model = WhisperForConditionalGeneration,
                    whisper_language = "English",
                    whisper_task = "transcribe",
                    token = hf_token,
                    trust_remote_code = trust_remote_code,
                )
                # Configure generation settings (notebook lines 100-105)
                self.model.generation_config.language = "<|en|>"
                self.model.generation_config.task = "transcribe"
                self.model.config.suppress_tokens = []
                self.model.generation_config.forced_decoder_ids = None
                logger.info("Loaded Whisper audio model (FastModel)")

            elif self._audio_type == "snac":
                # Orpheus: language model with audio codec tokens
                self.model, self.tokenizer = FastLanguageModel.from_pretrained(
                    model_name = model_name,
                    max_seq_length = max_seq_length,
                    dtype = None,
                    load_in_4bit = load_in_4bit,
                    full_finetuning = full_finetuning,
                    token = hf_token,
                    trust_remote_code = trust_remote_code,
                )
                logger.info(
                    f"Loaded {self._audio_type} audio model (FastLanguageModel)"
                )

            elif self._audio_type == "bicodec":
                # Spark-TTS: download full repo (contains sparktts package + BiCodec weights),
                # then load only the LLM subfolder with FastModel.
                # model_name may be:
                #   "Spark-TTS-0.5B/LLM"       (local-style, from YAML mapping)
                #   "unsloth/Spark-TTS-0.5B"    (HF repo ID)
                from unsloth import FastModel
                from huggingface_hub import snapshot_download

                if model_name.endswith("/LLM"):
                    # "Spark-TTS-0.5B/LLM" → parent="Spark-TTS-0.5B"
                    local_dir = model_name.rsplit("/", 1)[0]
                    hf_repo = f"unsloth/{local_dir}"
                    llm_path = model_name
                else:
                    # "unsloth/Spark-TTS-0.5B" → local_dir="Spark-TTS-0.5B"
                    hf_repo = model_name
                    local_dir = model_name.split("/")[-1]
                    llm_path = f"{local_dir}/LLM"

                repo_path = snapshot_download(hf_repo, local_dir = local_dir)
                self._spark_tts_repo_dir = os.path.abspath(
                    repo_path
                )  # Absolute path for sys.path
                llm_path = os.path.join(self._spark_tts_repo_dir, "LLM")

                self.model, self.tokenizer = FastModel.from_pretrained(
                    model_name = llm_path,
                    max_seq_length = max_seq_length,
                    dtype = torch.float32,  # Spark-TTS requires float32
                    load_in_4bit = False,
                    full_finetuning = full_finetuning,
                    token = hf_token,
                    trust_remote_code = trust_remote_code,
                )
                logger.info("Loaded Spark-TTS (bicodec) model")

            elif self._audio_type == "dac":
                # OuteTTS: uses FastModel (not FastLanguageModel) with load_in_4bit=False
                from unsloth import FastModel

                self.model, self.tokenizer = FastModel.from_pretrained(
                    model_name,
                    max_seq_length = max_seq_length,
                    load_in_4bit = False,
                    full_finetuning = full_finetuning,
                    token = hf_token,
                    trust_remote_code = trust_remote_code,
                )
                logger.info("Loaded OuteTTS (dac) model (FastModel)")

            elif self.is_audio_vlm:
                # Audio VLM: multimodal model trained on audio (e.g. Gemma 3N)
                # Uses FastModel (general loader) — returns (model, processor)
                from unsloth import FastModel

                self.model, self.tokenizer = FastModel.from_pretrained(
                    model_name = model_name,
                    max_seq_length = max_seq_length,
                    dtype = None,
                    load_in_4bit = load_in_4bit,
                    full_finetuning = full_finetuning,
                    token = hf_token,
                    trust_remote_code = trust_remote_code,
                )
                logger.info("Loaded audio VLM model (FastModel)")

            elif self.is_vlm:
                # Load vision model - returns (model, tokenizer)
                self.model, self.tokenizer = FastVisionModel.from_pretrained(
                    model_name = model_name,
                    max_seq_length = max_seq_length,
                    dtype = None,  # Auto-detect
                    load_in_4bit = load_in_4bit,
                    full_finetuning = full_finetuning,
                    token = hf_token,
                    trust_remote_code = trust_remote_code,
                )
                logger.info("Loaded vision model")

                # Diagnostic: check if FastVisionModel returned a real Processor or a raw tokenizer
                from transformers import ProcessorMixin

                tok = self.tokenizer
                has_image_proc = isinstance(tok, ProcessorMixin) or hasattr(
                    tok, "image_processor"
                )
                logger.info(
                    f"\n[VLM Diagnostic] FastVisionModel returned: {type(tok).__name__}"
                )
                logger.info(
                    f"[VLM Diagnostic] Is ProcessorMixin: {isinstance(tok, ProcessorMixin)}"
                )
                logger.info(
                    f"[VLM Diagnostic] Has image_processor: {hasattr(tok, 'image_processor')}"
                )
                logger.info(
                    f"[VLM Diagnostic] Usable as vision processor: {has_image_proc}\n"
                )
            else:
                # Load text model - returns (model, tokenizer)
                self.model, self.tokenizer = FastLanguageModel.from_pretrained(
                    model_name = model_name,
                    max_seq_length = max_seq_length,
                    dtype = None,  # Auto-detect
                    load_in_4bit = load_in_4bit,
                    full_finetuning = full_finetuning,
                    token = hf_token,
                    trust_remote_code = trust_remote_code,
                )
                logger.info("Loaded text model")

            if self.should_stop:
                return False

            if full_finetuning:
                # Enable training mode for full fine-tuning
                # This ensures all model parameters are trainable; otherwise, they might be frozen.
                self.model.for_training()

            self._update_progress(status_message = "Model loaded successfully")
            logger.info("Model loaded successfully")
            return True

        except OSError as e:
            if "could not get source code" in str(e) and not getattr(
                self, "_source_code_retried", False
            ):
                # Unsloth's patching can leave stale state that makes
                # inspect.getsource() fail when switching model families
                # (e.g. gemma3 → gemma3n). The load always succeeds on a
                # second attempt because the failed first call's partial
                # imports clean up the stale state as a side effect.
                self._source_code_retried = True
                logger.info(f"\n'could not get source code' — retrying once...\n")
                return self.load_model(
                    model_name = model_name,
                    max_seq_length = max_seq_length,
                    load_in_4bit = load_in_4bit,
                    hf_token = hf_token,
                    is_dataset_image = is_dataset_image,
                    is_dataset_audio = is_dataset_audio,
                    trust_remote_code = trust_remote_code,
                    full_finetuning = full_finetuning,
                )
            error_msg = str(e)
            error_lower = error_msg.lower()
            if any(
                k in error_lower
                for k in (
                    "gated repo",
                    "access to it at",
                    "401",
                    "403",
                    "unauthorized",
                    "forbidden",
                )
            ):
                error_msg = (
                    f"Access denied for '{model_name}'. This model is gated or private. "
                    f"Please add a Hugging Face token with access and try again."
                )
            logger.error(f"Error loading model: {e}")
            self._update_progress(error = error_msg, is_training = False)
            return False
        except Exception as e:
            error_msg = str(e)
            # Catch gated/auth errors and surface a friendly message
            error_lower = error_msg.lower()
            if any(
                k in error_lower
                for k in (
                    "gated repo",
                    "access to it at",
                    "401",
                    "403",
                    "unauthorized",
                    "forbidden",
                )
            ):
                error_msg = (
                    f"Access denied for '{model_name}'. This model is gated or private. "
                    f"Please add a Hugging Face token with access and try again."
                )
            logger.error(f"Error loading model: {e}")
            self._update_progress(error = error_msg, is_training = False)
            return False
        finally:
            self._source_code_retried = False

    def prepare_model_for_training(
        self,
        use_lora: bool = True,
        # Vision-specific LoRA parameters (only used if is_vlm=True)
        finetune_vision_layers: bool = True,
        finetune_language_layers: bool = True,
        finetune_attention_modules: bool = True,
        finetune_mlp_modules: bool = True,
        # Standard LoRA parameters
        target_modules: list = None,
        lora_r: int = 16,
        lora_alpha: int = 16,
        lora_dropout: float = 0.0,
        use_gradient_checkpointing: str = "unsloth",
        use_rslora: bool = False,
        use_loftq: bool = False,
    ) -> bool:
        """
        Prepare model for training (with optional LoRA).
        """
        try:
            if self.model is None:
                raise ValueError("Model not loaded. Call load_model() first.")

            # Full finetuning mode - skip PEFT entirely
            if not use_lora:
                self._update_progress(
                    status_message = "Full finetuning mode - no LoRA adapters"
                )
                logger.info("Full finetuning mode - training all parameters\n")
                return True

            # LoRA/QLoRA mode - apply PEFT
            # "all-linear" is a PEFT keyword that targets every linear layer
            if isinstance(target_modules, list) and "all-linear" in target_modules:
                if len(target_modules) == 1:
                    target_modules = "all-linear"
                else:
                    target_modules = [m for m in target_modules if m != "all-linear"]
            elif target_modules is None or (
                isinstance(target_modules, list) and len(target_modules) == 0
            ):
                target_modules = [
                    "q_proj",
                    "k_proj",
                    "v_proj",
                    "o_proj",
                    "gate_proj",
                    "up_proj",
                    "down_proj",
                ]

            # Validate and normalize gradient_checkpointing
            # Must be one of: True, False, or "unsloth"
            if isinstance(use_gradient_checkpointing, str):
                use_gradient_checkpointing = use_gradient_checkpointing.strip().lower()
                if (
                    use_gradient_checkpointing == ""
                    or use_gradient_checkpointing == "unsloth"
                ):
                    use_gradient_checkpointing = "unsloth"
                elif use_gradient_checkpointing in ("true", "1", "yes"):
                    use_gradient_checkpointing = True
                elif use_gradient_checkpointing in ("false", "0", "no"):
                    use_gradient_checkpointing = False
                else:
                    # Invalid value, default to "unsloth"
                    logger.warning(
                        f"Invalid gradient_checkpointing value: {use_gradient_checkpointing}, defaulting to 'unsloth'"
                    )
                    use_gradient_checkpointing = "unsloth"
            elif use_gradient_checkpointing not in (True, False, "unsloth"):
                # Invalid type or value, default to "unsloth"
                logger.warning(
                    f"Invalid gradient_checkpointing type/value: {use_gradient_checkpointing}, defaulting to 'unsloth'"
                )
                use_gradient_checkpointing = "unsloth"

            # Verify model is loaded
            if self.model is None:
                error_msg = "Model is None - model was not loaded properly"
                logger.error(error_msg)
                self._update_progress(error = error_msg)
                return False

            # Check if model has the expected attributes
            if not hasattr(self.model, "config"):
                error_msg = "Model does not have config attribute - model may not be loaded correctly"
                logger.error(error_msg)
                self._update_progress(error = error_msg)
                return False

            logger.info(
                f"Configuring LoRA adapters (r={lora_r}, alpha={lora_alpha})...\n"
            )
            logger.info(
                f"Gradient checkpointing: {use_gradient_checkpointing} (type: {type(use_gradient_checkpointing).__name__})\n"
            )

            # Branch based on model type: audio, audio_vlm, vision, or text
            if self._audio_type in ("csm", "bicodec", "dac") or self.is_audio_vlm:
                # Models using FastModel.get_peft_model (codec audio + audio VLM)
                from unsloth import FastModel

                label = self._audio_type or "audio_vlm"
                logger.info(f"{label} LoRA configuration:")
                logger.info(f"  - Target modules: {target_modules}")
                if self.is_audio_vlm:
                    logger.info(f"  - Finetune vision layers: {finetune_vision_layers}")
                    logger.info(
                        f"  - Finetune language layers: {finetune_language_layers}"
                    )
                    logger.info(
                        f"  - Finetune attention modules: {finetune_attention_modules}"
                    )
                    logger.info(f"  - Finetune MLP modules: {finetune_mlp_modules}")
                logger.info()

                peft_kwargs = dict(
                    r = lora_r,
                    target_modules = target_modules,
                    lora_alpha = lora_alpha,
                    lora_dropout = lora_dropout,
                    bias = "none",
                    use_gradient_checkpointing = use_gradient_checkpointing,
                    random_state = 3407,
                    use_rslora = use_rslora,
                    loftq_config = {"loftq_bits": 4, "loftq_iter": 1}
                    if use_loftq
                    else None,
                )
                # Audio VLM models support VLM-style layer selection
                if self.is_audio_vlm:
                    peft_kwargs.update(
                        finetune_vision_layers = finetune_vision_layers,
                        finetune_language_layers = finetune_language_layers,
                        finetune_attention_modules = finetune_attention_modules,
                        finetune_mlp_modules = finetune_mlp_modules,
                    )

                self.model = FastModel.get_peft_model(self.model, **peft_kwargs)

            elif self._audio_type == "whisper":
                # Phase 2: Whisper uses FastModel.get_peft_model with task_type=None
                from unsloth import FastModel

                logger.info(f"Audio model (whisper) LoRA configuration:")
                logger.info(f"  - Target modules: {target_modules}\n")

                self.model = FastModel.get_peft_model(
                    self.model,
                    r = lora_r,
                    target_modules = target_modules,
                    lora_alpha = lora_alpha,
                    lora_dropout = lora_dropout,
                    bias = "none",
                    use_gradient_checkpointing = use_gradient_checkpointing,
                    random_state = 3407,
                    use_rslora = use_rslora,
                    loftq_config = {"loftq_bits": 4, "loftq_iter": 1}
                    if use_loftq
                    else None,
                    task_type = None,
                )

            elif self._audio_type == "snac":
                # Orpheus uses FastLanguageModel.get_peft_model
                logger.info(f"Audio model ({self._audio_type}) LoRA configuration:")
                logger.info(f"  - Target modules: {target_modules}\n")

                self.model = FastLanguageModel.get_peft_model(
                    self.model,
                    r = lora_r,
                    target_modules = target_modules,
                    lora_alpha = lora_alpha,
                    lora_dropout = lora_dropout,
                    bias = "none",
                    use_gradient_checkpointing = use_gradient_checkpointing,
                    random_state = 3407,
                    use_rslora = use_rslora,
                    loftq_config = {"loftq_bits": 4, "loftq_iter": 1}
                    if use_loftq
                    else None,
                )

            elif self.is_vlm:
                # Vision model LoRA
                logger.info(f"Vision model LoRA configuration:")
                logger.info(f"  - Finetune vision layers: {finetune_vision_layers}")
                logger.info(f"  - Finetune language layers: {finetune_language_layers}")
                logger.info(
                    f"  - Finetune attention modules: {finetune_attention_modules}"
                )
                logger.info(f"  - Finetune MLP modules: {finetune_mlp_modules}\n")

                self.model = FastVisionModel.get_peft_model(
                    self.model,
                    finetune_vision_layers = finetune_vision_layers,
                    finetune_language_layers = finetune_language_layers,
                    finetune_attention_modules = finetune_attention_modules,
                    finetune_mlp_modules = finetune_mlp_modules,
                    r = lora_r,
                    target_modules = target_modules,
                    lora_alpha = lora_alpha,
                    lora_dropout = lora_dropout,
                    bias = "none",
                    use_gradient_checkpointing = use_gradient_checkpointing,
                    random_state = 3407,
                    use_rslora = use_rslora,
                    loftq_config = {"loftq_bits": 4, "loftq_iter": 1}
                    if use_loftq
                    else None,
                )
            else:
                # Text model LoRA
                logger.info(f"Text model LoRA configuration:")
                logger.info(f"  - Target modules: {target_modules}\n")

                self.model = FastLanguageModel.get_peft_model(
                    self.model,
                    r = lora_r,
                    target_modules = target_modules,
                    lora_alpha = lora_alpha,
                    lora_dropout = lora_dropout,
                    bias = "none",
                    use_gradient_checkpointing = use_gradient_checkpointing,
                    random_state = 3407,
                    use_rslora = use_rslora,
                    loftq_config = {"loftq_bits": 4, "loftq_iter": 1}
                    if use_loftq
                    else None,
                )

            # Check if stopped during LoRA preparation
            if self.should_stop:
                logger.info("Stopped during LoRA configuration\n")
                return False

            self._update_progress(status_message = "LoRA adapters configured")
            logger.info("LoRA adapters configured successfully\n")
            return True

        except Exception as e:
            import traceback
            import sys

            error_details = (
                f"{type(e).__name__}: {str(e)}"
                if str(e)
                else f"{type(e).__name__} (no message)"
            )
            full_traceback = traceback.format_exc()
            logger.error(f"Error preparing model: {error_details}")
            logger.error(f"Full traceback:\n{full_traceback}")
            logger.info(f"\n[ERROR] Error preparing model: {error_details}")
            logger.info(f"[ERROR] Full traceback:\n{full_traceback}")
            self._update_progress(error = error_details)
            return False

    def _apply_csm_forward_fix(self):
        """Monkey-patch CsmForConditionalGeneration.forward to fix depth decoder kwargs.

        The original transformers forward passes raw **kwargs (num_items_in_batch,
        causal_mask, etc.) from the Trainer/PEFT through to the depth decoder,
        causing depth_decoder_loss=None and 'Tensor + NoneType' crash.

        We patch at both instance AND class level for maximum reliability,
        and strip non-TransformersKwargs params that Unsloth/PEFT inject.
        """
        import types
        import torch
        import torch.nn as nn
        from transformers.models.csm.modeling_csm import (
            CsmForConditionalGeneration,
            CsmOutputWithPast,
        )

        base_csm = self.model.base_model.model  # CsmForConditionalGeneration

        # Save original forward (the @can_return_tuple wrapped version)
        _original_forward = CsmForConditionalGeneration.forward

        # Keys that the depth decoder and its sub-layers actually understand
        _TRANSFORMERS_KWARGS = {
            "num_items_in_batch",
            "output_hidden_states",
            "output_attentions",
            "output_router_logits",
            "cu_seq_lens_q",
            "cu_seq_lens_k",
            "max_length_q",
            "max_length_k",
        }

        def _fixed_csm_forward(
            self,
            input_ids = None,
            input_values = None,
            attention_mask = None,
            input_values_cutoffs = None,
            position_ids = None,
            past_key_values = None,
            inputs_embeds = None,
            labels = None,
            use_cache = None,
            cache_position = None,
            logits_to_keep = 0,
            **kwargs,
        ):
            # Strip non-standard kwargs injected by Unsloth/PEFT (causal_mask,
            # num_logits_to_keep, task_ids, return_dict, etc.)
            output_attentions = kwargs.pop("output_attentions", None)
            output_hidden_states = kwargs.pop("output_hidden_states", None)
            kwargs.pop("return_dict", None)
            kwargs.pop("causal_mask", None)
            kwargs.pop("num_logits_to_keep", None)
            kwargs.pop("task_ids", None)

            # Only keep recognized TransformersKwargs
            clean_kwargs = {
                k: v for k, v in kwargs.items() if k in _TRANSFORMERS_KWARGS
            }

            if input_ids is not None and input_ids.ndim == 2:
                merged = self._merge_input_ids_with_input_values(
                    input_ids, input_values, input_values_cutoffs, labels
                )
                inputs_embeds = merged["inputs_embeds"]
                labels = merged["labels"]
                input_ids = None

            backbone_outputs = self.backbone_model(
                input_ids = input_ids,
                attention_mask = attention_mask,
                position_ids = position_ids,
                past_key_values = past_key_values,
                inputs_embeds = inputs_embeds,
                use_cache = use_cache,
                cache_position = cache_position,
                output_attentions = output_attentions,
                output_hidden_states = output_hidden_states,
                **clean_kwargs,
            )

            backbone_hidden_states = backbone_outputs[0]
            slice_indices = (
                slice(-logits_to_keep, None)
                if isinstance(logits_to_keep, int)
                else logits_to_keep
            )
            backbone_logits = self.lm_head(backbone_hidden_states[:, slice_indices, :])

            loss = None
            backbone_loss = None
            depth_decoder_loss = None
            depth_decoder_outputs = None
            if labels is not None:
                backbone_labels = labels[:, :, 0]
                backbone_loss = self.loss_function(
                    logits = backbone_logits,
                    labels = backbone_labels,
                    vocab_size = self.config.vocab_size,
                    **clean_kwargs,
                )

                train_mask = ~(labels[:, :, 1:] == -100).all(dim = -1)
                depth_decoder_input_ids = labels[train_mask][
                    ..., : self.config.num_codebooks - 1
                ]
                depth_decoder_input_ids = nn.functional.pad(
                    depth_decoder_input_ids, (1, 0), value = 0
                )

                train_idxs = train_mask.nonzero(as_tuple = True)
                backbone_last_hidden_states = backbone_hidden_states[
                    train_idxs[0], train_idxs[1] - 1, :
                ]
                depth_decoder_labels = labels[train_mask]

                # Build clean kwargs for depth decoder
                dd_kwargs = clean_kwargs.copy()
                # Scale num_items_in_batch for depth decoder (31 codebooks)
                if "num_items_in_batch" in dd_kwargs:
                    dd_kwargs["num_items_in_batch"] = dd_kwargs[
                        "num_items_in_batch"
                    ] * (self.config.num_codebooks - 1)

                depth_decoder_outputs = self.depth_decoder(
                    input_ids = depth_decoder_input_ids,
                    backbone_last_hidden_state = backbone_last_hidden_states,
                    use_cache = False,
                    return_dict = True,
                    labels = depth_decoder_labels,
                    output_attentions = output_attentions,
                    output_hidden_states = output_hidden_states,
                    **dd_kwargs,
                )

                depth_decoder_loss = depth_decoder_outputs.loss
                if depth_decoder_loss is None:
                    logger.warning(
                        "CSM depth_decoder_loss is None! "
                        f"labels shape={depth_decoder_labels.shape}, "
                        f"train_mask sum={train_mask.sum().item()}"
                    )
                    # Fallback: use only backbone loss instead of crashing
                    loss = backbone_loss
                else:
                    loss = backbone_loss + depth_decoder_loss

            return CsmOutputWithPast(
                loss = loss,
                backbone_loss = backbone_loss,
                depth_decoder_loss = depth_decoder_loss,
                logits = backbone_logits,
                past_key_values = backbone_outputs.past_key_values,
                hidden_states = backbone_outputs.hidden_states,
                attentions = backbone_outputs.attentions,
                depth_decoder_logits = (
                    depth_decoder_outputs.logits if depth_decoder_outputs else None
                ),
                depth_decoder_past_key_values = (
                    depth_decoder_outputs.past_key_values
                    if depth_decoder_outputs
                    else None
                ),
                depth_decoder_hidden_states = (
                    depth_decoder_outputs.hidden_states
                    if depth_decoder_outputs
                    else None
                ),
                depth_decoder_attentions = (
                    depth_decoder_outputs.attentions if depth_decoder_outputs else None
                ),
            )

        # Patch at BOTH instance and class level for maximum reliability.
        # Instance-level: catches calls via BaseTuner.forward -> self.model.forward()
        base_csm.forward = types.MethodType(_fixed_csm_forward, base_csm)
        # Class-level: catches any path that resolves through the class dict
        CsmForConditionalGeneration.forward = _fixed_csm_forward
        logger.info("Applied CSM forward fix (class + instance level)\n")

    def _preprocess_csm_dataset(self, dataset, custom_format_mapping = None):
        """Preprocess dataset for CSM TTS training (exact notebook copy)."""
        from transformers import AutoProcessor
        from datasets import Audio
        import torch

        processor = AutoProcessor.from_pretrained(
            self.model_name,
            trust_remote_code = getattr(self, "trust_remote_code", False),
        )

        # Strip pad_to_multiple_of from tokenizer init_kwargs — fine-tuned models
        # (e.g. keanteng/sesame-csm-elise) save it in tokenizer_config.json, and
        # _merge_kwargs leaks it into audio_kwargs where EncodecFeatureExtractor rejects it.
        processor.tokenizer.init_kwargs.pop("pad_to_multiple_of", None)

        # Resolve columns from user mapping or hardcoded fallback
        resolved = self._resolve_audio_columns(dataset, custom_format_mapping)
        audio_col = resolved["audio_col"]
        text_col = resolved["text_col"]
        speaker_key = resolved["speaker_col"]

        if audio_col is None:
            raise ValueError(
                f"No audio column found in dataset. Columns: {dataset.column_names}"
            )
        if text_col is None:
            raise ValueError(
                f"No text column found in dataset. Columns: {dataset.column_names}"
            )
        if speaker_key is None:
            logger.info(
                "No speaker found, adding default 'source' of 0 for all examples\n"
            )
            dataset = dataset.add_column("source", ["0"] * len(dataset))
            speaker_key = "source"

        logger.info(
            f"CSM preprocessing: audio_col='{audio_col}', text_col='{text_col}', speaker_key='{speaker_key}'\n"
        )

        dataset = dataset.cast_column(audio_col, Audio(sampling_rate = 24000))

        required_keys = [
            "input_ids",
            "attention_mask",
            "labels",
            "input_values",
            "input_values_cutoffs",
        ]

        self._update_progress(status_message = "Preprocessing CSM dataset...")
        processed_examples = []
        skipped = 0
        for idx in range(len(dataset)):
            if self.should_stop:
                logger.info("Stopped during CSM preprocessing\n")
                break

            example = dataset[idx]
            try:
                conversation = [
                    {
                        "role": str(example[speaker_key]),
                        "content": [
                            {"type": "text", "text": example.get(text_col, "")},
                            {"type": "audio", "path": example[audio_col]["array"]},
                        ],
                    }
                ]
                # NOTE: pad_to_multiple_of intentionally omitted from text_kwargs —
                # CsmProcessor._merge_kwargs leaks it to EncodecFeatureExtractor which rejects it.
                model_inputs = processor.apply_chat_template(
                    conversation,
                    tokenize = True,
                    return_dict = True,
                    output_labels = True,
                    text_kwargs = {
                        "padding": "max_length",
                        "max_length": 256,
                        "padding_side": "right",
                    },
                    audio_kwargs = {
                        "sampling_rate": 24_000,
                        "max_length": 240001,
                        "padding": "max_length",
                    },
                    common_kwargs = {"return_tensors": "pt"},
                )

                out = {}
                for k in required_keys:
                    if k not in model_inputs:
                        raise KeyError(f"Missing required key '{k}' in model outputs")
                    out[k] = model_inputs[k][0]

                if not all(isinstance(out[k], torch.Tensor) for k in out):
                    skipped += 1
                    continue

                processed_examples.append(out)

            except Exception as e:
                logger.warning(f"Error processing CSM example {idx}: {e}")
                skipped += 1
                continue

            if (idx + 1) % 100 == 0:
                self._update_progress(
                    status_message = f"Preprocessing CSM... {idx + 1}/{len(dataset)}"
                )

        if not processed_examples:
            raise ValueError(
                f"No valid examples after CSM preprocessing (skipped {skipped})"
            )

        result_dataset = Dataset.from_list(processed_examples)
        logger.info(
            f"CSM preprocessing complete: {len(result_dataset)} examples "
            f"({skipped} skipped)\n"
        )
        return result_dataset

    def _format_audio_vlm_dataset(self, dataset, custom_format_mapping = None):
        """Format dataset as audio chat messages for multimodal models (e.g. Gemma 3N).

        Expects columns: audio (Audio), text (str).
        Produces: messages column with system/user/assistant chat format.
        """
        from datasets import Audio

        resolved = self._resolve_audio_columns(dataset, custom_format_mapping)
        audio_col = resolved["audio_col"]
        text_col = resolved["text_col"]
        if not audio_col or not text_col:
            raise ValueError(
                f"Audio VLM dataset needs 'audio' and 'text' columns, got: {dataset.column_names}"
            )

        # Store resolved audio column name for the collator closure
        self._audio_vlm_audio_col = audio_col

        # Cast audio to 16kHz (standard for speech models)
        dataset = dataset.cast_column(audio_col, Audio(sampling_rate = 16000))

        def format_messages(samples):
            formatted = {"messages": []}
            for idx in range(len(samples[audio_col])):
                audio = samples[audio_col][idx]["array"]
                label = str(samples[text_col][idx])
                message = [
                    {
                        "role": "system",
                        "content": [
                            {
                                "type": "text",
                                "text": "You are an assistant that transcribes speech accurately.",
                            }
                        ],
                    },
                    {
                        "role": "user",
                        "content": [
                            {"type": "audio", "audio": audio},
                            {"type": "text", "text": "Please transcribe this audio."},
                        ],
                    },
                    {"role": "assistant", "content": [{"type": "text", "text": label}]},
                ]
                formatted["messages"].append(message)
            return formatted

        self._update_progress(status_message = "Formatting audio VLM dataset...")
        dataset = dataset.map(
            format_messages, batched = True, batch_size = 4, num_proc = safe_num_proc(4)
        )
        logger.info(f"Audio VLM dataset formatted: {len(dataset)} examples\n")
        return dataset

    def _preprocess_snac_dataset(self, dataset, custom_format_mapping = None):
        """Preprocess dataset for Orpheus TTS training with SNAC codec.

        Mirrors Orpheus_(3B)-TTS.ipynb: encode audio with SNAC (24kHz, 3 hierarchical
        layers), interleave 7 codes per frame, wrap with Orpheus special tokens,
        train on full sequence (no label masking).
        """
        import torch
        import torchaudio.transforms as T

        SNAC_MODEL_NAME = "hubertsiuzdak/snac_24khz"
        SNAC_SAMPLE_RATE = 24000
        device = "cuda" if torch.cuda.is_available() else "cpu"
        max_length = self.max_seq_length or 2048
        tokenizer = self.tokenizer

        # Orpheus special token IDs (hardcoded in tokenizer vocabulary)
        START_OF_HUMAN = 128259
        END_OF_HUMAN = 128260
        START_OF_AI = 128261
        END_OF_AI = 128262
        START_OF_SPEECH = 128257
        END_OF_SPEECH = 128258
        END_OF_TEXT = 128009
        AUDIO_OFFSET = 128266

        resolved = self._resolve_audio_columns(dataset, custom_format_mapping)
        audio_col = resolved["audio_col"]
        text_col = resolved["text_col"]
        speaker_col = resolved["speaker_col"]
        has_source = speaker_col is not None
        if not audio_col or not text_col:
            raise ValueError(
                f"SNAC dataset needs 'audio' and 'text' columns, got: {dataset.column_names}"
            )

        # Cast audio column so datasets 4.x AudioDecoder objects are decoded to dicts
        from datasets import Audio

        dataset = dataset.cast_column(audio_col, Audio(sampling_rate = SNAC_SAMPLE_RATE))

        # Get dataset sample rate from first example (after cast, always SNAC_SAMPLE_RATE)
        first_audio = dataset[0][audio_col]
        ds_sample_rate = (
            first_audio.get("sampling_rate", SNAC_SAMPLE_RATE)
            if isinstance(first_audio, dict)
            else SNAC_SAMPLE_RATE
        )

        # Load SNAC codec model
        self._update_progress(status_message = "Loading SNAC codec model...")
        logger.info("Loading SNAC codec model...\n")
        from snac import SNAC

        snac_model = SNAC.from_pretrained(SNAC_MODEL_NAME)
        snac_model = snac_model.to(device).eval()

        # Resample transform (created once)
        resample_transform = (
            T.Resample(orig_freq = ds_sample_rate, new_freq = SNAC_SAMPLE_RATE)
            if ds_sample_rate != SNAC_SAMPLE_RATE
            else None
        )

        self._update_progress(status_message = "Encoding audio with SNAC...")
        logger.info(
            f"SNAC preprocessing: audio_col='{audio_col}', text_col='{text_col}', "
            f"has_source={has_source}, ds_sample_rate={ds_sample_rate}\n"
        )

        processed_examples = []
        skipped = 0
        for idx in range(len(dataset)):
            if self.should_stop:
                logger.info("Stopped during SNAC preprocessing\n")
                break

            example = dataset[idx]
            try:
                text = example.get(text_col)
                if not text:
                    skipped += 1
                    continue

                audio_data = example.get(audio_col)
                if audio_data is None or audio_data.get("array") is None:
                    skipped += 1
                    continue

                # --- Encode audio with SNAC (notebook lines 122-142) ---
                waveform = (
                    torch.from_numpy(audio_data["array"])
                    .unsqueeze(0)
                    .to(dtype = torch.float32)
                )
                if resample_transform is not None:
                    waveform = resample_transform(waveform)

                waveform = waveform.unsqueeze(0).to(device)
                with torch.inference_mode():
                    codes = snac_model.encode(waveform)

                # Interleave 7 codes per frame with layer offsets (notebook lines 134-142)
                all_codes = []
                for i in range(codes[0].shape[1]):
                    all_codes.append(codes[0][0][i].item() + AUDIO_OFFSET)
                    all_codes.append(codes[1][0][2 * i].item() + AUDIO_OFFSET + 4096)
                    all_codes.append(
                        codes[2][0][4 * i].item() + AUDIO_OFFSET + (2 * 4096)
                    )
                    all_codes.append(
                        codes[2][0][(4 * i) + 1].item() + AUDIO_OFFSET + (3 * 4096)
                    )
                    all_codes.append(
                        codes[1][0][(2 * i) + 1].item() + AUDIO_OFFSET + (4 * 4096)
                    )
                    all_codes.append(
                        codes[2][0][(4 * i) + 2].item() + AUDIO_OFFSET + (5 * 4096)
                    )
                    all_codes.append(
                        codes[2][0][(4 * i) + 3].item() + AUDIO_OFFSET + (6 * 4096)
                    )

                if len(all_codes) == 0:
                    skipped += 1
                    continue

                # Deduplicate consecutive frames with same first code (notebook lines 185-207)
                deduped = all_codes[:7]
                for i in range(7, len(all_codes), 7):
                    if all_codes[i] != deduped[-7]:
                        deduped.extend(all_codes[i : i + 7])
                all_codes = deduped

                # --- Build text tokens (notebook lines 217-224) ---
                text_prompt = (
                    f"{example[speaker_col]}: {text}"
                    if has_source and example.get(speaker_col)
                    else text
                )
                text_ids = tokenizer.encode(text_prompt, add_special_tokens = True)
                text_ids.append(END_OF_TEXT)

                # --- Build full input_ids (notebook lines 225-234) ---
                input_ids = (
                    [START_OF_HUMAN]
                    + text_ids
                    + [END_OF_HUMAN]
                    + [START_OF_AI]
                    + [START_OF_SPEECH]
                    + all_codes
                    + [END_OF_SPEECH]
                    + [END_OF_AI]
                )

                # Truncate to max_length
                input_ids = input_ids[:max_length]

                # Labels = input_ids (no masking — Orpheus trains on full sequence)
                labels = list(input_ids)
                attention_mask = [1] * len(input_ids)

                processed_examples.append(
                    {
                        "input_ids": input_ids,
                        "labels": labels,
                        "attention_mask": attention_mask,
                    }
                )

            except Exception as e:
                logger.warning(f"Error processing SNAC example {idx}: {e}")
                skipped += 1
                continue

            # Progress update every 100 examples
            if (idx + 1) % 100 == 0:
                self._update_progress(
                    status_message = f"Encoding audio... {idx + 1}/{len(dataset)}"
                )

        # Free SNAC model from GPU
        logger.info("Freeing SNAC codec model from GPU...\n")
        snac_model.to("cpu")
        del snac_model
        import gc

        gc.collect()
        torch.cuda.empty_cache()
        self._cuda_audio_used = True

        if not processed_examples:
            raise ValueError(
                f"No valid examples after SNAC preprocessing (skipped {skipped})"
            )

        result_dataset = Dataset.from_list(processed_examples)
        logger.info(
            f"SNAC preprocessing complete: {len(result_dataset)} examples "
            f"({skipped} skipped)\n"
        )
        return result_dataset

    def _preprocess_bicodec_dataset(self, dataset, custom_format_mapping = None):
        """Preprocess dataset for Spark-TTS training with BiCodec tokenizer.

        Mirrors Spark_TTS_(0_5B).ipynb: encode audio with BiCodec (semantic + global tokens),
        format as special-token text strings for SFTTrainer with dataset_text_field="text".
        """
        import sys
        import torch
        import numpy as np
        import torchaudio.transforms as T

        import subprocess

        device = "cuda" if torch.cuda.is_available() else "cpu"

        # The sparktts Python package lives in the SparkAudio/Spark-TTS GitHub repo,
        # NOT in the unsloth/Spark-TTS-0.5B HF model repo. Clone it if needed.
        spark_code_dir = os.path.join(
            os.path.dirname(self._spark_tts_repo_dir), "Spark-TTS"
        )
        sparktts_pkg = os.path.join(spark_code_dir, "sparktts")
        if not os.path.isdir(sparktts_pkg):
            self._update_progress(status_message = "Cloning Spark-TTS code repo...")
            logger.info(f"Cloning SparkAudio/Spark-TTS to {spark_code_dir}...\n")
            subprocess.run(
                [
                    "git",
                    "clone",
                    "--depth",
                    "1",
                    "https://github.com/SparkAudio/Spark-TTS",
                    spark_code_dir,
                ],
                check = True,
            )

        if spark_code_dir not in sys.path:
            sys.path.insert(0, spark_code_dir)

        from sparktts.models.audio_tokenizer import BiCodecTokenizer
        from sparktts.utils.audio import audio_volume_normalize

        # Resolve audio and text columns
        resolved = self._resolve_audio_columns(dataset, custom_format_mapping)
        audio_col = resolved["audio_col"]
        text_col = resolved["text_col"]
        speaker_col = resolved["speaker_col"]
        has_source = speaker_col is not None
        if not audio_col or not text_col:
            raise ValueError(
                f"BiCodec dataset needs 'audio' and 'text' columns, got: {dataset.column_names}"
            )

        # Cast audio column so datasets 4.x AudioDecoder objects are decoded to dicts.
        # Don't resample here — BiCodec's target_sr may differ; the loop handles resampling.
        from datasets import Audio

        dataset = dataset.cast_column(audio_col, Audio())

        # Load BiCodec tokenizer
        self._update_progress(status_message = "Loading BiCodec tokenizer...")
        logger.info("Loading BiCodec tokenizer...\n")
        audio_tokenizer = BiCodecTokenizer(self._spark_tts_repo_dir, device)

        target_sr = audio_tokenizer.config["sample_rate"]

        self._update_progress(status_message = "Encoding audio with BiCodec...")
        logger.info(
            f"BiCodec preprocessing: audio_col='{audio_col}', text_col='{text_col}', "
            f"has_source={has_source}, target_sr={target_sr}\n"
        )

        def extract_wav2vec2_features(wavs: torch.Tensor) -> torch.Tensor:
            """Extract wav2vec2 features (average of layers 11, 14, 16)."""
            if wavs.shape[0] != 1:
                raise ValueError(f"Expected batch size 1, but got shape {wavs.shape}")
            wav_np = wavs.squeeze(0).cpu().numpy()

            processed = audio_tokenizer.processor(
                wav_np,
                sampling_rate = 16000,
                return_tensors = "pt",
                padding = True,
            )
            input_values = processed.input_values.to(
                audio_tokenizer.feature_extractor.device
            )
            model_output = audio_tokenizer.feature_extractor(input_values)

            if model_output.hidden_states is None:
                raise ValueError("Wav2Vec2Model did not return hidden states.")

            feats_mix = (
                model_output.hidden_states[11]
                + model_output.hidden_states[14]
                + model_output.hidden_states[16]
            ) / 3
            return feats_mix

        processed_examples = []
        skipped = 0
        for idx in range(len(dataset)):
            if self.should_stop:
                logger.info("Stopped during BiCodec preprocessing\n")
                break

            example = dataset[idx]
            try:
                text = example.get(text_col)
                if not text:
                    skipped += 1
                    continue

                audio_data = example.get(audio_col)
                if audio_data is None or audio_data.get("array") is None:
                    skipped += 1
                    continue

                audio_array = audio_data["array"]
                sampling_rate = audio_data.get("sampling_rate", target_sr)

                # Resample if needed
                if sampling_rate != target_sr:
                    resampler = T.Resample(orig_freq = sampling_rate, new_freq = target_sr)
                    audio_tensor_temp = torch.from_numpy(audio_array).float()
                    audio_array = resampler(audio_tensor_temp).numpy()

                # Volume normalize if configured
                if audio_tokenizer.config.get("volume_normalize", False):
                    audio_array = audio_volume_normalize(audio_array)

                # Get reference clip
                ref_wav_np = audio_tokenizer.get_ref_clip(audio_array)

                # Prepare tensors
                audio_tensor = (
                    torch.from_numpy(audio_array).unsqueeze(0).float().to(device)
                )
                ref_wav_tensor = (
                    torch.from_numpy(ref_wav_np).unsqueeze(0).float().to(device)
                )

                # Extract wav2vec2 features
                feat = extract_wav2vec2_features(audio_tensor)

                batch = {
                    "wav": audio_tensor,
                    "ref_wav": ref_wav_tensor,
                    "feat": feat.to(device),
                }

                # BiCodec tokenize
                semantic_token_ids, global_token_ids = audio_tokenizer.model.tokenize(
                    batch
                )

                global_tokens = "".join(
                    [
                        f"<|bicodec_global_{i}|>"
                        for i in global_token_ids.squeeze().cpu().numpy()
                    ]
                )
                semantic_tokens = "".join(
                    [
                        f"<|bicodec_semantic_{i}|>"
                        for i in semantic_token_ids.squeeze().cpu().numpy()
                    ]
                )

                # Format text with source prefix if available
                text_content = (
                    f"{example[speaker_col]}: {text}"
                    if has_source and example.get(speaker_col)
                    else text
                )

                formatted = "".join(
                    [
                        "<|task_tts|>",
                        "<|start_content|>",
                        text_content,
                        "<|end_content|>",
                        "<|start_global_token|>",
                        global_tokens,
                        "<|end_global_token|>",
                        "<|start_semantic_token|>",
                        semantic_tokens,
                        "<|end_semantic_token|>",
                        "<|im_end|>",
                    ]
                )

                processed_examples.append({"text": formatted})

            except Exception as e:
                logger.warning(f"Error processing BiCodec example {idx}: {e}")
                skipped += 1
                continue

            # Progress update every 100 examples
            if (idx + 1) % 100 == 0:
                self._update_progress(
                    status_message = f"Encoding audio with BiCodec... {idx + 1}/{len(dataset)}"
                )

        # Free BiCodec model from GPU
        logger.info("Freeing BiCodec tokenizer from GPU...\n")
        audio_tokenizer.model.cpu()
        audio_tokenizer.feature_extractor.cpu()
        del audio_tokenizer
        import gc

        gc.collect()
        torch.cuda.empty_cache()
        self._cuda_audio_used = True

        if not processed_examples:
            raise ValueError(
                f"No valid examples after BiCodec preprocessing (skipped {skipped})"
            )

        result_dataset = Dataset.from_list(processed_examples)
        logger.info(
            f"BiCodec preprocessing complete: {len(result_dataset)} examples "
            f"({skipped} skipped)\n"
        )
        # Debug: show first example text (truncated)
        sample = result_dataset[0]["text"]
        logger.info(f"Sample text (first 200 chars): {sample[:200]}...\n")
        logger.info(f"Sample text length: {len(sample)} chars\n")
        return result_dataset

    def _preprocess_dac_dataset(self, dataset, custom_format_mapping = None):
        """Preprocess dataset for OuteTTS training with DAC codec.

        Mirrors Oute_TTS_(1B).ipynb DataCreationV3: uses Whisper for word timings,
        OuteTTS AudioProcessor for speaker representations, PromptProcessor for
        training prompts. Outputs text strings for SFTTrainer with dataset_text_field="text".
        """
        import sys
        import io
        import tempfile
        import torch
        import numpy as np
        import soundfile as sf
        from datasets import Dataset as HFDataset
        from utils.paths import ensure_dir, tmp_root

        device = "cuda" if torch.cuda.is_available() else "cpu"

        # Clone OuteTTS repo (same as audio_codecs._load_dac)
        import subprocess

        base_dir = os.path.dirname(os.path.abspath(__file__))
        outetts_code_dir = os.path.join(base_dir, "inference", "OuteTTS")
        outetts_pkg = os.path.join(outetts_code_dir, "outetts")
        if not os.path.isdir(outetts_pkg):
            self._update_progress(status_message = "Cloning OuteTTS code repo...")
            logger.info(f"Cloning edwko/OuteTTS to {outetts_code_dir}...\n")
            subprocess.run(
                [
                    "git",
                    "clone",
                    "--depth",
                    "1",
                    "https://github.com/edwko/OuteTTS",
                    outetts_code_dir,
                ],
                check = True,
            )
            for fpath in [
                os.path.join(outetts_pkg, "models", "gguf_model.py"),
                os.path.join(outetts_pkg, "interface.py"),
                os.path.join(outetts_pkg, "__init__.py"),
            ]:
                if os.path.exists(fpath):
                    os.remove(fpath)
                    logger.info(f"Removed {fpath}\n")

        if outetts_code_dir not in sys.path:
            sys.path.insert(0, outetts_code_dir)

        from outetts.version.v3.audio_processor import AudioProcessor
        from outetts.version.v3.prompt_processor import PromptProcessor
        from outetts.models.config import ModelConfig as OuteTTSModelConfig
        from outetts.utils.preprocessing import text_normalizations

        # Resolve audio and text columns
        resolved = self._resolve_audio_columns(dataset, custom_format_mapping)
        audio_col = resolved["audio_col"]
        text_col = resolved["text_col"]
        if not audio_col or not text_col:
            raise ValueError(
                f"DAC dataset needs 'audio' and 'text' columns, got: {dataset.column_names}"
            )

        # Cast audio to 24kHz (notebook: dataset.cast_column("audio", Audio(sampling_rate=24000)))
        from datasets import Audio

        dataset = dataset.cast_column(audio_col, Audio(sampling_rate = 24000))
        logger.info("Cast audio column to 24kHz\n")

        # Load Whisper for word timings
        self._update_progress(
            status_message = "Loading Whisper model for word timings..."
        )
        logger.info("Loading Whisper model for word timings...\n")
        import whisper

        whisper_model = whisper.load_model("turbo", device = device)

        # Load OuteTTS AudioProcessor + PromptProcessor
        self._update_progress(status_message = "Loading OuteTTS AudioProcessor...")
        logger.info("Loading OuteTTS AudioProcessor...\n")
        model_tokenizer_path = "OuteAI/Llama-OuteTTS-1.0-1B"
        dummy_config = OuteTTSModelConfig(
            tokenizer_path = model_tokenizer_path,
            device = device,
            audio_codec_path = None,
        )
        audio_processor = AudioProcessor(config = dummy_config)
        prompt_processor = PromptProcessor(model_tokenizer_path)

        self._update_progress(status_message = "Preprocessing audio with OuteTTS...")
        logger.info(
            f"DAC preprocessing: audio_col='{audio_col}', text_col='{text_col}'\n"
        )

        processed_examples = []
        skipped = 0
        for idx in range(len(dataset)):
            if self.should_stop:
                logger.info("Stopped during DAC preprocessing\n")
                break

            example = dataset[idx]
            try:
                text = example.get(text_col)
                if not text or not isinstance(text, str):
                    skipped += 1
                    continue

                audio_data = example.get(audio_col)
                if audio_data is None or audio_data.get("array") is None:
                    skipped += 1
                    continue

                audio_array = np.array(audio_data["array"], dtype = np.float32)
                sampling_rate = audio_data.get("sampling_rate", 24000)

                # Convert to WAV bytes (Whisper needs a file path)
                buf = io.BytesIO()
                sf.write(buf, audio_array, sampling_rate, format = "WAV", subtype = "FLOAT")
                buf.seek(0)
                audio_bytes = buf.getvalue()

                # 1. Get word timings from Whisper
                with tempfile.NamedTemporaryFile(
                    suffix = ".wav",
                    delete = False,
                    dir = str(ensure_dir(tmp_root())),
                ) as tmp:
                    tmp.write(audio_bytes)
                    tmp.flush()
                    tmp_path = tmp.name
                try:
                    whisper_result = whisper_model.transcribe(
                        tmp_path, word_timestamps = True
                    )
                finally:
                    Path(tmp_path).unlink(missing_ok = True)

                normalized_transcript = text_normalizations(text)
                words_with_timings = []
                if whisper_result and "segments" in whisper_result:
                    for segment in whisper_result["segments"]:
                        for word_info in segment.get("words", []):
                            cleaned = word_info["word"].strip()
                            if cleaned:
                                words_with_timings.append(
                                    {
                                        "word": cleaned,
                                        "start": float(word_info["start"]),
                                        "end": float(word_info["end"]),
                                    }
                                )

                if not words_with_timings:
                    skipped += 1
                    continue

                # 2. Create speaker representation with AudioProcessor
                speaker_data_dict = {
                    "audio": {"bytes": audio_bytes},
                    "text": normalized_transcript,
                    "words": words_with_timings,
                }
                speaker = audio_processor.create_speaker_from_dict(speaker_data_dict)
                if speaker is None:
                    skipped += 1
                    continue

                # 3. Get training prompt from PromptProcessor
                prompt = prompt_processor.get_training_prompt(speaker)
                if prompt:
                    processed_examples.append({"text": prompt})

            except Exception as e:
                logger.warning(f"Error processing DAC example {idx}: {e}")
                skipped += 1
                continue

            if (idx + 1) % 100 == 0:
                self._update_progress(
                    status_message = f"Preprocessing audio with OuteTTS... {idx + 1}/{len(dataset)}"
                )

        # Free Whisper from GPU (notebook: data_processor.whisper_model.to('cpu'))
        logger.info("Moving Whisper model to CPU...\n")
        whisper_model.to("cpu")
        del whisper_model
        del audio_processor
        del prompt_processor
        import gc

        gc.collect()
        torch.cuda.empty_cache()
        self._cuda_audio_used = True

        if not processed_examples:
            raise ValueError(
                f"No valid examples after DAC preprocessing (skipped {skipped})"
            )

        result_dataset = HFDataset.from_list(processed_examples)
        logger.info(
            f"DAC preprocessing complete: {len(result_dataset)} examples "
            f"({skipped} skipped)\n"
        )
        sample = result_dataset[0]["text"]
        logger.info(f"Sample text (first 200 chars): {sample[:200]}...\n")
        return result_dataset

    def _preprocess_whisper_dataset(
        self, dataset, eval_split = None, custom_format_mapping = None
    ):
        """Preprocess dataset for Whisper speech-to-text training.

        Mirrors Whisper.ipynb: extract audio features with Whisper's feature
        extractor, tokenize text labels. Returns (train_data, eval_data) where
        each is a list of dicts with 'input_features' and 'labels'.
        """
        from datasets import Audio

        WHISPER_SAMPLE_RATE = 16000

        resolved = self._resolve_audio_columns(dataset, custom_format_mapping)
        audio_col = resolved["audio_col"]
        text_col = resolved["text_col"]
        if not audio_col or not text_col:
            raise ValueError(
                f"Whisper dataset needs 'audio' and 'text' columns, got: {dataset.column_names}"
            )

        # Cast audio to 16kHz (Whisper's expected sample rate)
        dataset = dataset.cast_column(
            audio_col, Audio(sampling_rate = WHISPER_SAMPLE_RATE)
        )

        # Train/eval split (notebook does dataset.train_test_split)
        eval_dataset_raw = None
        if eval_split:
            splits = dataset.train_test_split(test_size = 0.06, seed = 42)
            dataset = splits["train"]
            eval_dataset_raw = splits["test"]

        self._update_progress(status_message = "Processing audio for Whisper...")
        logger.info(
            f"Whisper preprocessing: audio_col='{audio_col}', text_col='{text_col}', "
            f"samples={len(dataset)}\n"
        )

        def process_split(ds, split_name = "train"):
            processed = []
            skipped = 0
            for idx in range(len(ds)):
                if self.should_stop:
                    logger.info(f"Stopped during Whisper {split_name} preprocessing\n")
                    break

                example = ds[idx]
                try:
                    audio_data = example.get(audio_col)
                    text = example.get(text_col)
                    if (
                        audio_data is None
                        or audio_data.get("array") is None
                        or not text
                    ):
                        skipped += 1
                        continue

                    # Extract audio features (notebook line 112-115)
                    features = self.tokenizer.feature_extractor(
                        audio_data["array"], sampling_rate = audio_data["sampling_rate"]
                    )
                    # Tokenize text (notebook line 116)
                    tokenized_text = self.tokenizer.tokenizer(text)

                    processed.append(
                        {
                            "input_features": features.input_features[0],
                            "labels": tokenized_text.input_ids,
                        }
                    )
                except Exception as e:
                    logger.warning(
                        f"Error processing Whisper {split_name} example {idx}: {e}"
                    )
                    skipped += 1
                    continue

                if (idx + 1) % 100 == 0:
                    self._update_progress(
                        status_message = f"Processing {split_name} audio... {idx + 1}/{len(ds)}"
                    )

            logger.info(
                f"Whisper {split_name} preprocessing: {len(processed)} examples ({skipped} skipped)\n"
            )
            return processed

        train_data = process_split(dataset, "train")
        eval_data = (
            process_split(eval_dataset_raw, "eval") if eval_dataset_raw else None
        )

        if not train_data:
            raise ValueError("No valid examples after Whisper preprocessing")

        return (train_data, eval_data)

    @staticmethod
    def _resolve_local_files(file_paths: list) -> list[str]:
        """Resolve a list of local dataset paths to concrete file paths."""
        all_files: list[str] = []
        for dataset_file in file_paths:
            if os.path.isabs(dataset_file):
                file_path = dataset_file
            else:
                file_path = str(resolve_dataset_path(dataset_file))

            file_path_obj = Path(file_path)

            if file_path_obj.is_dir():
                parquet_dir = (
                    file_path_obj / "parquet-files"
                    if (file_path_obj / "parquet-files").exists()
                    else file_path_obj
                )
                parquet_files = sorted(parquet_dir.glob("*.parquet"))
                if parquet_files:
                    all_files.extend(str(p) for p in parquet_files)
                    continue
                candidates: list[Path] = []
                for ext in (".json", ".jsonl", ".csv", ".parquet"):
                    candidates.extend(sorted(file_path_obj.glob(f"*{ext}")))
                if candidates:
                    all_files.extend(str(c) for c in candidates)
                    continue
                raise ValueError(
                    f"No supported data files in directory: {file_path_obj}"
                )
            else:
                all_files.append(str(file_path_obj))
        return all_files

    @staticmethod
    def _loader_for_files(files: list[str]) -> str:
        """Determine the HF datasets loader type from file extensions."""
        first_ext = Path(files[0]).suffix.lower()
        if first_ext in (".json", ".jsonl"):
            return "json"
        elif first_ext == ".csv":
            return "csv"
        elif first_ext == ".parquet":
            return "parquet"
        raise ValueError(f"Unsupported dataset format: {files[0]}")

    def load_and_format_dataset(
        self,
        dataset_source: str,
        format_type: str = "auto",
        local_datasets: list = None,
        local_eval_datasets: list = None,
        custom_format_mapping: dict = None,
        subset: str = None,
        train_split: str = "train",
        eval_split: str = None,
        eval_steps: float = 0.00,
        dataset_slice_start: int = None,
        dataset_slice_end: int = None,
    ) -> Optional[tuple]:
        """
        Load and prepare dataset for training.

        Strategy: format first, then split — ensures both train and eval
        portions are properly formatted and templated.

        Returns:
            Tuple of (dataset_info, eval_dataset) or None on error.
            eval_dataset may be None if no eval split is available.
        """
        try:
            dataset = None
            eval_dataset = None
            has_separate_eval_source = (
                False  # True if eval comes from a separate HF split
            )
            eval_enabled = eval_steps is not None and eval_steps > 0

            if local_datasets:
                # Load local datasets using load_dataset() so the result is
                # Arrow-backed (has cache files).  Dataset.from_list() creates
                # an in-memory dataset with no cache, which forces num_proc=1
                # during tokenization/map because sharding requires Arrow files.
                all_files = self._resolve_local_files(local_datasets)

                if all_files:
                    loader = self._loader_for_files(all_files)
                    dataset = load_dataset(loader, data_files = all_files, split = "train")

                    # Check if stopped during dataset loading
                    if self.should_stop:
                        logger.info("Stopped during dataset loading\n")
                        return None

                    self._update_progress(
                        status_message = f"Loaded {len(dataset)} samples from local files"
                    )
                    logger.info(f"Loaded {len(dataset)} samples from local files\n")
                    logger.info(f"[DEBUG] Dataset cache_files: {dataset.cache_files}\n")

                # Load local eval datasets if provided
                if local_eval_datasets and eval_enabled:
                    eval_all_files = self._resolve_local_files(local_eval_datasets)
                    if eval_all_files:
                        eval_loader = self._loader_for_files(eval_all_files)
                        eval_dataset = load_dataset(
                            eval_loader, data_files = eval_all_files, split = "train"
                        )
                        has_separate_eval_source = True
                        logger.info(
                            f"Loaded {len(eval_dataset)} eval samples from local eval files\n"
                        )

            elif dataset_source:
                # Load from Hugging Face
                split_name = train_split or "train"
                load_kwargs = {"path": dataset_source, "split": split_name}
                if subset:
                    load_kwargs["name"] = subset

                _slice_start = dataset_slice_start or 0
                if (
                    dataset_slice_end is not None
                    and dataset_slice_end >= 0
                    and dataset_slice_end >= _slice_start
                ):
                    # Manual slice — stream only the rows we need instead of
                    # downloading the entire dataset.
                    rows_to_stream = dataset_slice_end + 1
                    logger.info(
                        f"[dataset-slice] Manual slice specified "
                        f"(start={dataset_slice_start}, end={dataset_slice_end}), "
                        f"streaming {rows_to_stream} rows\n"
                    )
                    stream = load_dataset(**load_kwargs, streaming = True)
                    dataset = Dataset.from_list(list(stream.take(rows_to_stream)))
                    logger.info(
                        f"[dataset-slice] Downloaded {len(dataset)} rows "
                        f"(requested {rows_to_stream})\n"
                    )
                    self._update_progress(
                        status_message = f"Streamed {len(dataset)} rows from HuggingFace"
                    )
                else:
                    self._update_progress(
                        status_message = f"Downloading dataset: {dataset_source}..."
                    )
                    dataset = load_dataset(**load_kwargs)

                # Check if stopped during dataset loading
                if self.should_stop:
                    logger.info("Stopped during dataset loading\n")
                    return None

                n_rows = len(dataset) if hasattr(dataset, "__len__") else 0
                self._update_progress(
                    status_message = f"Downloaded {dataset_source} ({n_rows:,} rows)"
                )
                logger.info(
                    f"Loaded dataset from Hugging Face: {dataset_source} ({n_rows:,} rows)\n"
                )

                # Resolve eval split from a separate HF split (explicit or auto-detected)
                if eval_enabled:
                    effective_train = train_split or "train"
                    if eval_split and eval_split != effective_train:
                        # Explicit eval split provided - load it directly
                        logger.info(f"Loading explicit eval split: '{eval_split}'\n")
                        eval_load_kwargs = {"path": dataset_source, "split": eval_split}
                        if subset:
                            eval_load_kwargs["name"] = subset
                        eval_dataset = load_dataset(**eval_load_kwargs)
                        has_separate_eval_source = True
                        logger.info(
                            f"Loaded eval split '{eval_split}' with {len(eval_dataset)} rows\n"
                        )
                    elif eval_split and eval_split == effective_train:
                        # Same split as training — will do 80/20 split after formatting
                        logger.info(
                            f"Eval split '{eval_split}' is the same as train split — will split 80/20\n"
                        )
                    else:
                        # Auto-detect eval split from HF (returns a separate dataset, or None)
                        eval_dataset = self._auto_detect_eval_split_from_hf(
                            dataset_source = dataset_source,
                            subset = subset,
                        )
                        if eval_dataset is not None:
                            has_separate_eval_source = True
                else:
                    logger.info(
                        "Eval disabled (eval_steps <= 0), skipping eval split detection\n"
                    )

            if dataset is None:
                raise ValueError("No dataset provided")

            # Apply index range slicing if requested (inclusive on both ends)
            if dataset_slice_start is not None or dataset_slice_end is not None:
                total_rows = len(dataset)
                start = dataset_slice_start if dataset_slice_start is not None else 0
                end = (
                    dataset_slice_end
                    if dataset_slice_end is not None
                    else total_rows - 1
                )
                # Clamp to valid range
                start = max(0, min(start, total_rows - 1))
                end = max(start, min(end, total_rows - 1))
                dataset = dataset.select(range(start, end + 1))
                logger.info(
                    f"Sliced dataset to rows [{start}, {end}]: {len(dataset)} of {total_rows} rows\n"
                )
                self._update_progress(
                    status_message = f"Sliced dataset to {len(dataset)} rows (indices {start}-{end})"
                )

            # Check if stopped before applying template
            if self.should_stop:
                logger.info("Stopped before applying chat template\n")
                return None

            # ========== AUDIO MODELS: custom preprocessing ==========
            if self._audio_type == "csm":
                processed = self._preprocess_csm_dataset(dataset, custom_format_mapping)
                return (processed, None)

            elif self._audio_type == "whisper":
                train_data, eval_data = self._preprocess_whisper_dataset(
                    dataset,
                    eval_split = eval_split,
                    custom_format_mapping = custom_format_mapping,
                )
                return (train_data, eval_data)

            elif self._audio_type == "snac":
                processed = self._preprocess_snac_dataset(
                    dataset, custom_format_mapping
                )
                return (processed, None)

            elif self._audio_type == "bicodec":
                processed = self._preprocess_bicodec_dataset(
                    dataset, custom_format_mapping
                )
                return ({"dataset": processed, "final_format": "audio_bicodec"}, None)

            elif self._audio_type == "dac":
                processed = self._preprocess_dac_dataset(dataset, custom_format_mapping)
                return ({"dataset": processed, "final_format": "audio_dac"}, None)

            elif self.is_audio_vlm:
                formatted = self._format_audio_vlm_dataset(
                    dataset, custom_format_mapping
                )
                return (formatted, None)

            # ========== FORMAT FIRST ==========
            logger.info(f"Formatting dataset with format_type='{format_type}'...\n")

            dataset_info = format_and_template_dataset(
                dataset,
                model_name = self.model_name,
                tokenizer = self.tokenizer,
                is_vlm = self.is_vlm,
                format_type = format_type,
                dataset_name = dataset_source,
                custom_format_mapping = custom_format_mapping,
                progress_callback = self._update_progress,
            )

            # Check if stopped during formatting
            if self.should_stop:
                logger.info("Stopped during dataset formatting\n")
                return None

            # Abort if dataset formatting/conversion failed
            if not dataset_info.get("success", True):
                errors = dataset_info.get("errors", [])
                error_msg = "; ".join(errors) if errors else "Dataset formatting failed"
                logger.error(f"Dataset conversion failed: {error_msg}")
                self._update_progress(error = error_msg)
                return None

            detected = dataset_info.get("detected_format", "unknown")
            final_ds = dataset_info.get("dataset")
            final_n = len(final_ds) if hasattr(final_ds, "__len__") else "?"
            self._update_progress(
                status_message = f"Dataset ready ({final_n:,} samples, {detected} format)"
            )
            logger.info(
                f"Dataset formatted successfully ({final_n} samples, {detected})\n"
            )

            # ========== THEN SPLIT ==========
            if has_separate_eval_source and eval_dataset is not None:
                # Eval came from a separate HF split — format it too
                logger.info(f"Formatting eval dataset ({len(eval_dataset)} rows)...\n")
                eval_info = format_and_template_dataset(
                    eval_dataset,
                    model_name = self.model_name,
                    tokenizer = self.tokenizer,
                    is_vlm = self.is_vlm,
                    format_type = format_type,
                    dataset_name = dataset_source,
                    custom_format_mapping = custom_format_mapping,
                )
                eval_dataset = eval_info["dataset"]
                logger.info(f"Eval dataset formatted successfully\n")
            elif eval_enabled and not has_separate_eval_source:
                # No separate eval source — split the already-formatted dataset
                formatted_dataset = dataset_info["dataset"]
                split_result = self._resolve_eval_split_from_dataset(formatted_dataset)
                if split_result is not None:
                    train_portion, eval_dataset = split_result
                    dataset_info["dataset"] = train_portion

            return (dataset_info, eval_dataset)

        except Exception as e:
            logger.error(f"Error loading dataset: {e}")
            self._update_progress(error = str(e))
            return None

    def _auto_detect_eval_split_from_hf(
        self, dataset_source: str, subset: str
    ) -> Optional[Dataset]:
        """Auto-detect an eval split from HF dataset (separate named split only)."""
        try:
            from datasets import get_dataset_split_names

            load_kwargs = {"path": dataset_source}
            if subset:
                load_kwargs["config_name"] = subset
            available_splits = get_dataset_split_names(**load_kwargs)
            logger.info(f"Available splits: {available_splits}\n")

            # Check for common eval split names
            for candidate in ["eval", "validation", "valid", "val", "test"]:
                if candidate in available_splits:
                    eval_load_kwargs = {"path": dataset_source, "split": candidate}
                    if subset:
                        eval_load_kwargs["name"] = subset
                    candidate_ds = load_dataset(**eval_load_kwargs)
                    if len(candidate_ds) >= 16:
                        logger.info(
                            f"Auto-detected eval split '{candidate}' with {len(candidate_ds)} rows\n"
                        )
                        return candidate_ds
                    else:
                        logger.info(
                            f"Found eval split '{candidate}' but only {len(candidate_ds)} rows (< 16), skipping\n"
                        )

        except Exception as e:
            logger.warning(f"Could not check dataset splits: {e}")

        # No separate HF eval split found — caller will handle programmatic splitting
        return None

    def _resolve_eval_split_from_dataset(self, dataset) -> Optional[tuple]:
        """Split a dataset into train and eval portions.

        Returns:
            Tuple of (train_dataset, eval_dataset), or None if dataset too small.
        """
        MIN_EVAL_ROWS = 16
        MIN_TOTAL_ROWS = 32  # Need at least 16 train + 16 eval

        n = len(dataset)
        if n < MIN_TOTAL_ROWS:
            logger.info(f"Dataset too small ({n} rows) for eval split, skipping eval\n")
            return None

        eval_size = max(MIN_EVAL_ROWS, min(128, int(0.05 * n)))
        # Ensure we don't take more than half the dataset
        eval_size = min(eval_size, n // 2)

        logger.info(f"Auto-splitting: {eval_size} rows for eval from {n} total\n")
        split_result = dataset.train_test_split(test_size = eval_size, seed = 3407)
        logger.info(
            f"Split complete: {len(split_result['train'])} train, {len(split_result['test'])} eval\n"
        )
        return (split_result["train"], split_result["test"])

    def start_training(
        self,
        dataset: Dataset,
        eval_dataset: Dataset = None,
        eval_steps: float = 0.00,
        output_dir: str | None = None,
        num_epochs: int = 3,
        learning_rate: float = 5e-5,
        batch_size: int = 2,
        gradient_accumulation_steps: int = 4,
        warmup_steps: int = None,
        warmup_ratio: float = None,
        max_steps: int = 0,
        save_steps: int = 0,
        weight_decay: float = 0.01,
        random_seed: int = 3407,
        packing: bool = False,
        train_on_completions: bool = False,
        enable_wandb: bool = False,
        wandb_project: str = "unsloth-training",
        wandb_token: str = None,
        enable_tensorboard: bool = False,
        tensorboard_dir: str | None = None,
        **kwargs,
    ) -> bool:
        """Start training in a separate thread"""

        if self.is_training:
            logger.warning("Training already in progress")
            return False

        if self.model is None or self.tokenizer is None:
            self._update_progress(error = "Model not loaded")
            return False

        # Pre-import heavy transformers modules on the main thread.
        # Unsloth's patched_import hook (deepseek_v3_moe.py) is not thread-safe
        # with Python's importlib cache, causing KeyError: 'size' if these are
        # first imported inside the worker thread.
        import transformers  # noqa: F401 – ensures submodules are cached
        from transformers import (  # noqa: F401
            Trainer as _HFTrainer,
            TrainingArguments as _TrainingArguments,
            TrainerCallback as _TrainerCallback,
        )

        if self._audio_type == "whisper":
            from transformers import (  # noqa: F401
                Seq2SeqTrainer as _Seq2SeqTrainer,
                Seq2SeqTrainingArguments as _Seq2SeqTrainingArguments,
            )

        # Start training in separate thread
        self.training_thread = threading.Thread(
            target = self._train_worker,
            args = (dataset,),
            kwargs = {
                "output_dir": output_dir,
                "num_epochs": num_epochs,
                "learning_rate": learning_rate,
                "batch_size": batch_size,
                "gradient_accumulation_steps": gradient_accumulation_steps,
                "warmup_steps": warmup_steps,
                "warmup_ratio": warmup_ratio,
                "max_steps": max_steps,
                "save_steps": save_steps,
                "weight_decay": weight_decay,
                "random_seed": random_seed,
                "packing": packing,
                "train_on_completions": train_on_completions,
                "enable_wandb": enable_wandb,
                "wandb_project": wandb_project,
                "wandb_token": wandb_token,
                "enable_tensorboard": enable_tensorboard,
                "tensorboard_dir": tensorboard_dir,
                "eval_dataset": eval_dataset,
                "eval_steps": eval_steps,
                **kwargs,
            },
        )

        self.should_stop = False
        self.is_training = True
        try:
            self.training_thread.start()
            return True
        except Exception as e:
            self.is_training = False
            logger.error(f"Failed to start training thread: {e}")
            return False

    def _train_worker(self, dataset: Dataset, **training_args):
        """Worker function for training (runs in separate thread)"""
        try:
            # Store training parameters for metrics calculation
            self.batch_size = training_args.get("batch_size", 2)
            self.max_seq_length = training_args.get("max_seq_length", 2048)
            self.gradient_accumulation_steps = training_args.get(
                "gradient_accumulation_steps", 4
            )

            # Set training start time
            self.training_start_time = time.time()

            self._update_progress(is_training = True, error = None)

            # Setup logging
            if training_args.get("enable_wandb", False) and training_args.get(
                "wandb_token"
            ):
                os.environ["WANDB_API_KEY"] = training_args["wandb_token"]
                import wandb

                wandb.init(
                    project = training_args.get("wandb_project", "unsloth-training")
                )

            # Create output directory
            output_dir = str(resolve_output_dir(training_args.get("output_dir")))
            ensure_dir(Path(output_dir))

            # ========== AUDIO TRAINER BRANCH ==========
            if self._audio_type == "csm":
                # CSM uses plain HF Trainer (NOT SFTTrainer)
                # Needs remove_unused_columns=False for depth decoder (input_values + cutoffs)
                from transformers import Trainer as HFTrainer, TrainingArguments

                self._apply_csm_forward_fix()

                config = self._build_audio_training_args(
                    training_args,
                    output_dir,
                    extra_args = {
                        "remove_unused_columns": False,
                    },
                )
                self.trainer = HFTrainer(
                    model = self.model,
                    train_dataset = dataset,
                    args = TrainingArguments(**config),
                )
                self.trainer.add_callback(self._create_progress_callback())

                batch_size = training_args.get("batch_size", 2)
                total = self._calculate_total_steps(
                    len(dataset),
                    batch_size,
                    training_args.get("gradient_accumulation_steps", 4),
                    training_args.get("num_epochs", 3),
                    training_args.get("max_steps", 0),
                )
                self._update_progress(
                    total_steps = total, status_message = "Starting CSM training..."
                )
                logger.info(f"CSM training config: {config}\n")
                self.trainer.train()
                self._finalize_training(output_dir, "CSM")
                return

            elif self._audio_type == "snac":
                # Orpheus: language model with SNAC codec tokens — plain HF Trainer
                # DataCollatorForSeq2Seq dynamically pads variable-length sequences per batch
                # (text + audio codes vary in length) and pads labels with -100.
                from transformers import (
                    Trainer as HFTrainer,
                    TrainingArguments,
                    DataCollatorForSeq2Seq,
                )

                config = self._build_audio_training_args(training_args, output_dir)
                self.trainer = HFTrainer(
                    model = self.model,
                    train_dataset = dataset,
                    args = TrainingArguments(**config),
                    data_collator = DataCollatorForSeq2Seq(
                        tokenizer = self.tokenizer,
                        padding = True,
                        pad_to_multiple_of = 8,
                    ),
                )
                self.trainer.add_callback(self._create_progress_callback())

                batch_size = training_args.get("batch_size", 2)
                total = self._calculate_total_steps(
                    len(dataset),
                    batch_size,
                    training_args.get("gradient_accumulation_steps", 4),
                    training_args.get("num_epochs", 3),
                    training_args.get("max_steps", 0),
                )
                self._update_progress(
                    total_steps = total, status_message = "Starting SNAC training..."
                )
                logger.info(f"SNAC training config: {config}\n")
                self.trainer.train()
                self._finalize_training(output_dir, "SNAC")
                return

            elif self._audio_type == "whisper":
                # Whisper: Seq2SeqTrainer with custom speech collator
                from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments
                from utils.datasets import DataCollatorSpeechSeq2SeqWithPadding

                eval_dataset = training_args.get("eval_dataset", None)
                extra = {"remove_unused_columns": False, "label_names": ["labels"]}
                if eval_dataset:
                    extra["eval_strategy"] = "steps"
                    extra["eval_steps"] = training_args.get("eval_steps", 5)

                config = self._build_audio_training_args(
                    training_args, output_dir, extra_args = extra
                )

                trainer_kwargs = {
                    "model": self.model,
                    "train_dataset": dataset,
                    "data_collator": DataCollatorSpeechSeq2SeqWithPadding(
                        processor = self.tokenizer
                    ),
                    "processing_class": self.tokenizer.feature_extractor,
                    "args": Seq2SeqTrainingArguments(**config),
                }
                if eval_dataset:
                    trainer_kwargs["eval_dataset"] = eval_dataset

                self.trainer = Seq2SeqTrainer(**trainer_kwargs)
                self.trainer.add_callback(self._create_progress_callback())

                batch_size = training_args.get("batch_size", 2)
                total = self._calculate_total_steps(
                    len(dataset),
                    batch_size,
                    training_args.get("gradient_accumulation_steps", 4),
                    training_args.get("num_epochs", 3),
                    training_args.get("max_steps", 0),
                )
                self._update_progress(
                    total_steps = total, status_message = "Starting Whisper training..."
                )
                logger.info(f"Whisper training config: {config}\n")
                self.trainer.train()
                self._finalize_training(output_dir, "Whisper")
                return

            elif self._audio_type is not None and self._audio_type not in (
                "bicodec",
                "dac",
            ):
                # bicodec/dac use the standard SFTTrainer text path below
                raise NotImplementedError(
                    f"Audio training for '{self._audio_type}' not yet implemented"
                )

            # ========== DATA COLLATOR SELECTION ==========
            # Detect special model types
            model_name_lower = self.model_name.lower()
            is_deepseek_ocr = (
                "deepseek" in model_name_lower and "ocr" in model_name_lower
            )

            logger.info("Configuring data collator...\n")

            data_collator = None  # Default to built-in data collator
            if is_deepseek_ocr:
                # Special DeepSeek OCR collator - auto-install if needed
                logger.info("Detected DeepSeek OCR model\n")
                # Ensure DeepSeek OCR module is installed
                if not _ensure_deepseek_ocr_installed():
                    error_msg = (
                        "Failed to install DeepSeek OCR module. "
                        "Please install manually: "
                        "from huggingface_hub import snapshot_download; "
                        "snapshot_download('unsloth/DeepSeek-OCR', local_dir='deepseek_ocr')"
                    )
                    logger.error(error_msg)
                    self._update_progress(error = error_msg, is_training = False)
                    return

                try:
                    from backend.data_utils import DeepSeekOCRDataCollator

                    logger.info("Configuring DeepSeek OCR data collator...\n")
                    FastVisionModel.for_training(self.model)
                    data_collator = DeepSeekOCRDataCollator(
                        tokenizer = self.tokenizer,
                        model = self.model,
                        image_size = 640,
                        base_size = 1024,
                        crop_mode = True,
                        train_on_responses_only = training_args.get(
                            "train_on_completions", False
                        ),
                    )
                    logger.info("DeepSeek OCR data collator configured successfully\n")

                except Exception as e:
                    logger.error(f"Failed to configure DeepSeek OCR collator: {e}")
                    error_msg = f"Error configuring DeepSeek OCR: {str(e)}"
                    self._update_progress(error = error_msg, is_training = False)
                    return

            elif self.is_audio_vlm:
                # Audio VLM collator (e.g. Gemma 3N with audio data)
                # Mirrors the collate_fn from Gemma3N_(4B)-Audio notebook
                logger.info("Configuring audio VLM data collator...\n")
                processor = self.tokenizer  # FastModel returns processor as tokenizer

                audio_col_name = getattr(self, "_audio_vlm_audio_col", "audio")

                def audio_vlm_collate_fn(examples):
                    texts = []
                    audios = []
                    for example in examples:
                        text = processor.apply_chat_template(
                            example["messages"],
                            tokenize = False,
                            add_generation_prompt = False,
                        ).strip()
                        texts.append(text)
                        audios.append(example[audio_col_name]["array"])

                    batch = processor(
                        text = texts, audio = audios, return_tensors = "pt", padding = True
                    )

                    # Labels = input_ids with special tokens masked
                    labels = batch["input_ids"].clone()
                    labels[labels == processor.tokenizer.pad_token_id] = -100
                    for attr in (
                        "audio_token_id",
                        "image_token_id",
                        "boi_token_id",
                        "eoi_token_id",
                    ):
                        token_id = getattr(processor.tokenizer, attr, None)
                        if token_id is not None:
                            labels[labels == token_id] = -100
                    batch["labels"] = labels
                    return batch

                data_collator = audio_vlm_collate_fn
                logger.info("Audio VLM data collator configured\n")

            elif self.is_vlm:
                # Standard VLM collator (images)
                logger.info("Using UnslothVisionDataCollator for vision model\n")
                from unsloth.trainer import UnslothVisionDataCollator

                FastVisionModel.for_training(self.model)
                data_collator = UnslothVisionDataCollator(self.model, self.tokenizer)
                logger.info("Vision data collator configured\n")

            # ========== TRAINING CONFIGURATION ==========
            # Handle warmup_steps vs warmup_ratio
            warmup_steps_val = training_args.get("warmup_steps", None)
            warmup_ratio_val = training_args.get("warmup_ratio", None)

            lr_value = training_args.get("learning_rate", 2e-4)
            logger.info(
                f"[DEBUG] learning_rate from training_args: {lr_value} (type: {type(lr_value).__name__})\n"
            )

            config_args = {
                "per_device_train_batch_size": training_args.get("batch_size", 2),
                "gradient_accumulation_steps": training_args.get(
                    "gradient_accumulation_steps", 4
                ),
                "num_train_epochs": training_args.get(
                    "num_epochs", 3
                ),  # Default to epochs
                "learning_rate": lr_value,
                "fp16": not is_bfloat16_supported(),
                "bf16": is_bfloat16_supported(),
                "logging_steps": 1,
                "weight_decay": training_args.get("weight_decay", 0.01),
                "seed": training_args.get("random_seed", 3407),
                "output_dir": output_dir,
                "report_to": _build_report_targets(training_args),
                "include_num_input_tokens_seen": True,  # Enable token counting
                "dataset_num_proc": 1
                if (self.is_audio or self.is_audio_vlm or self._cuda_audio_used)
                else safe_num_proc(max(1, os.cpu_count() // 4)),
                "max_seq_length": training_args.get("max_seq_length", 2048),
            }
            if training_args.get("enable_tensorboard", False):
                config_args["logging_dir"] = str(
                    resolve_tensorboard_dir(training_args.get("tensorboard_dir"))
                )
            logger.info(
                f"[DEBUG] dataset_num_proc={config_args['dataset_num_proc']} (is_audio={self.is_audio}, is_audio_vlm={self.is_audio_vlm}, _cuda_audio_used={self._cuda_audio_used})"
            )

            # On Windows with transformers 5.x, disable DataLoader multiprocessing
            # to avoid issues with modified sys.path (.venv_t5) in spawned workers.
            if sys.platform == "win32":
                import transformers as _tf

                if _tf.__version__.startswith("5."):
                    config_args["dataloader_num_workers"] = 0

            # Add warmup parameter - use warmup_ratio if provided, otherwise warmup_steps
            if warmup_ratio_val is not None:
                config_args["warmup_ratio"] = warmup_ratio_val
                logger.info(f"Using warmup_ratio: {warmup_ratio_val}\n")
            elif warmup_steps_val is not None:
                config_args["warmup_steps"] = warmup_steps_val
                logger.info(f"Using warmup_steps: {warmup_steps_val}\n")
            else:
                # Default to warmup_steps if neither provided
                config_args["warmup_steps"] = 5
                logger.info(f"Using default warmup_steps: 5\n")

            # Add save_steps if specified
            save_steps_val = training_args.get("save_steps", 0)
            if save_steps_val and save_steps_val > 0:
                config_args["save_steps"] = save_steps_val
                config_args["save_strategy"] = "steps"

            #  If max_steps is specified, use it instead of epochs
            max_steps_val = training_args.get("max_steps", 0)
            if max_steps_val and max_steps_val > 0:
                del config_args["num_train_epochs"]  # Remove epochs
                config_args["max_steps"] = max_steps_val  # Use steps instead
                logger.info(f"Training for {max_steps_val} steps\n")
            else:
                logger.info(f"Training for {config_args['num_train_epochs']} epochs\n")

            # ========== EVAL CONFIGURATION ==========
            eval_dataset = training_args.get("eval_dataset", None)
            eval_steps_val = training_args.get("eval_steps", 0.00)
            if eval_dataset is not None:
                if eval_steps_val > 0:
                    config_args["eval_strategy"] = "steps"
                    config_args["eval_steps"] = eval_steps_val
                    logger.info(
                        f"✅ Evaluation enabled: eval_steps={eval_steps_val} (fraction of total steps)\n"
                    )
                    logger.info(f"Eval dataset: {len(eval_dataset)} rows\n")
                else:
                    logger.info(
                        f"⚠️  Eval dataset provided but eval_steps={eval_steps_val} (disabled)\n"
                    )
                    logger.info("To enable evaluation, set eval_steps > 0.0\n")
            else:
                logger.info("No eval dataset — evaluation disabled\n")

            # Add model-specific parameters
            # Use optim and lr_scheduler_type from training_args if provided, otherwise use defaults
            optim_value = training_args.get("optim", "adamw_8bit")
            lr_scheduler_type_value = training_args.get("lr_scheduler_type", "linear")

            if self.is_vlm or self.is_audio_vlm:
                # Vision / audio VLM config (both need skip_prepare_dataset + remove_unused_columns)
                label = "audio VLM" if self.is_audio_vlm else "vision"
                logger.info(f"Configuring {label} model training parameters\n")
                # Use provided values or defaults for vision models
                optim_value = training_args.get("optim", "adamw_torch_fused")
                lr_scheduler_type_value = training_args.get(
                    "lr_scheduler_type", "cosine"
                )
                config_args.update(
                    {
                        "optim": optim_value,
                        "lr_scheduler_type": lr_scheduler_type_value,
                        "gradient_checkpointing": True,
                        "gradient_checkpointing_kwargs": {"use_reentrant": False},
                        "max_grad_norm": 0.3,
                        "remove_unused_columns": False,
                        "dataset_text_field": "",
                        "dataset_kwargs": {"skip_prepare_dataset": True},
                        "max_length": training_args.get("max_seq_length", 2048),
                    }
                )
            else:
                logger.info("Configuring text model training parameters\n")
                config_args.update(
                    {
                        "optim": optim_value,
                        "lr_scheduler_type": lr_scheduler_type_value,
                        "dataset_text_field": "text",
                    }
                )

                # Only add packing for text models (not DeepSeek OCR which is VLM)
                if not is_deepseek_ocr:
                    packing_enabled = training_args.get("packing", False)
                    config_args["packing"] = packing_enabled
                    logger.info(
                        f"Sequence packing: {'enabled' if packing_enabled else 'disabled'}\n"
                    )

            # Audio codec overrides — BiCodec/DAC use the text SFTTrainer path
            if self._audio_type == "bicodec":
                config_args["packing"] = False
                logger.info("Applied BiCodec overrides: packing=False\n")
            elif self._audio_type == "dac":
                config_args["packing"] = False
                logger.info("Applied DAC overrides: packing=False\n")

            logger.info(f"The configuration is: {config_args}")

            logger.info("Training configuration prepared\n")
            # ========== TRAINER INITIALIZATION ==========
            if self.is_audio_vlm:
                # Audio VLM (e.g. Gemma 3N + audio): raw Dataset from _format_audio_vlm_dataset
                # Notebook uses processing_class=processor.tokenizer (text tokenizer only)
                train_dataset = (
                    dataset if isinstance(dataset, Dataset) else dataset["dataset"]
                )
                processing_class = (
                    self.tokenizer.tokenizer
                    if hasattr(self.tokenizer, "tokenizer")
                    else self.tokenizer
                )
                trainer_kwargs = {
                    "model": self.model,
                    "train_dataset": train_dataset,
                    "processing_class": processing_class,
                    "data_collator": data_collator,
                    "args": SFTConfig(**config_args),
                }
                if eval_dataset is not None:
                    trainer_kwargs["eval_dataset"] = eval_dataset
                self.trainer = SFTTrainer(**trainer_kwargs)
            elif self.is_vlm:
                # Image VLM: dataset is dict wrapper from format_and_template_dataset
                train_dataset = (
                    dataset["dataset"] if isinstance(dataset, dict) else dataset
                )
                trainer_kwargs = {
                    "model": self.model,
                    "train_dataset": train_dataset,
                    "processing_class": self.tokenizer,
                    "data_collator": data_collator,
                    "args": SFTConfig(**config_args),
                }
                if eval_dataset is not None:
                    trainer_kwargs["eval_dataset"] = eval_dataset
                self.trainer = SFTTrainer(**trainer_kwargs)
            else:
                # For text-only training, if the tokenizer is actually a Processor
                # (e.g., Gemma-3 returns a ProcessorMixin even for text), we must
                # unwrap to the raw tokenizer. Otherwise Unsloth's SFTTrainer detects
                # ProcessorMixin → sets _is_vlm=True → skips _prepare_dataset entirely,
                # and the 'text' column never gets tokenized to 'input_ids'.
                from transformers import ProcessorMixin

                sft_tokenizer = self.tokenizer
                if isinstance(self.tokenizer, ProcessorMixin) and hasattr(
                    self.tokenizer, "tokenizer"
                ):
                    logger.info(
                        f"  ⚠️ Unwrapping Processor → raw tokenizer for text-only SFTTrainer"
                    )
                    sft_tokenizer = self.tokenizer.tokenizer

                trainer_kwargs = {
                    "model": self.model,
                    "tokenizer": sft_tokenizer,
                    "train_dataset": dataset["dataset"],
                    "data_collator": data_collator,
                    "args": SFTConfig(**config_args),
                }
                if eval_dataset is not None:
                    trainer_kwargs["eval_dataset"] = eval_dataset
                self.trainer = SFTTrainer(**trainer_kwargs)
                # Restore the full processor as processing_class so checkpoint
                # saves include preprocessor_config.json (needed for GGUF export).
                if sft_tokenizer is not self.tokenizer:
                    self.trainer.processing_class = self.tokenizer
            logger.info("Trainer initialized\n")

            # ========== TRAIN ON RESPONSES ONLY ==========
            # Determine if we should train on responses only
            instruction_part = None
            response_part = None
            train_on_responses_enabled = training_args.get(
                "train_on_completions", False
            )

            # DeepSeek OCR handles this internally in its collator, so skip
            # Audio VLM handles label masking in its collator, so skip
            if (
                train_on_responses_enabled
                and not self.is_audio_vlm
                and not self.is_audio
                and not (is_deepseek_ocr or dataset["final_format"].lower() == "alpaca")
            ):
                try:
                    logger.info("Configuring train on responses only...\n")

                    # Get the template mapping for this model
                    model_name_lower = self.model_name.lower()

                    if model_name_lower in MODEL_TO_TEMPLATE_MAPPER:
                        template_name = MODEL_TO_TEMPLATE_MAPPER[model_name_lower]
                        logger.info(f"Detected template: {template_name}\n")

                        if template_name in TEMPLATE_TO_RESPONSES_MAPPER:
                            instruction_part = TEMPLATE_TO_RESPONSES_MAPPER[
                                template_name
                            ]["instruction"]
                            response_part = TEMPLATE_TO_RESPONSES_MAPPER[template_name][
                                "response"
                            ]

                            logger.info(
                                f"Instruction marker: {instruction_part[:50]}...\n"
                            )
                            logger.info(f"Response marker: {response_part[:50]}...\n")
                        else:
                            logger.info(
                                f"No response mapping found for template: {template_name}\n"
                            )
                            train_on_responses_enabled = False
                    else:
                        logger.info(
                            f"No template mapping found for model: {self.model_name}\n"
                        )
                        train_on_responses_enabled = False

                except Exception as e:
                    logger.warning(f"Could not configure train on responses: {e}")
                    train_on_responses_enabled = False

            # Apply train on responses only if we have valid parts
            if (
                train_on_responses_enabled
                and instruction_part
                and response_part
                and not self.is_audio_vlm
                and not self.is_audio
                and not (is_deepseek_ocr or dataset["final_format"].lower() == "alpaca")
            ):
                try:
                    from unsloth.chat_templates import train_on_responses_only

                    self.trainer = train_on_responses_only(
                        self.trainer,
                        instruction_part = instruction_part,
                        response_part = response_part,
                        num_proc = config_args["dataset_num_proc"],
                    )
                    logger.info("Train on responses only configured successfully\n")

                    # ── Safety net: check if all samples were filtered out ──
                    # Unsloth's train_on_responses_only masks non-response
                    # tokens with -100. If max_seq_length is too short and the
                    # response portion gets truncated away, EVERY sample ends
                    # up with all labels == -100 and Unsloth removes them,
                    # leaving 0 usable training samples.
                    filtered_len = len(self.trainer.train_dataset)
                    original_len = len(dataset["dataset"])
                    dropped = original_len - filtered_len
                    drop_pct = (
                        round(100 * dropped / original_len, 1)
                        if original_len > 0
                        else 0
                    )

                    if filtered_len == 0 or drop_pct > 30:
                        max_seq = training_args.get("max_seq_length", 2048)
                        error_msg = (
                            f"{dropped}/{original_len} samples ({drop_pct}%) "
                            f"were dropped after applying 'train on responses "
                            f"only' — only {filtered_len} remain. This usually "
                            f"means max_seq_length ({max_seq}) is too short "
                            f"and the response portion is being truncated "
                            f"away. Try increasing max_seq_length (e.g. 8192) "
                            f"or disabling 'Train on completions'."
                        )
                        logger.error(error_msg)
                        self._update_progress(error = error_msg, is_training = False)
                        return

                    if dropped > 0:
                        logger.info(
                            f"⚠️ {dropped}/{original_len} samples "
                            f"({drop_pct}%) were dropped (all labels "
                            f"masked). {filtered_len} samples remain.\n"
                        )
                    logger.info(f"Post-filter dataset size: {filtered_len} samples\n")

                    # [DEBUG] Decode first sample AFTER train_on_completions applied
                    # try:
                    #     _row = self.trainer.train_dataset[0]
                    #     _space = self.tokenizer(
                    #         " ", add_special_tokens = False
                    #     ).input_ids[0]
                    #     print("[DEBUG] === After train_on_completions ===", flush = True)
                    #     print(
                    #         f"[DEBUG] input_ids decoded:\n{self.tokenizer.decode(_row['input_ids'])}\n",
                    #         flush = True,
                    #     )
                    #     print(
                    #         f"[DEBUG] labels decoded (-100 → space):\n{self.tokenizer.decode([_space if x == -100 else x for x in _row['labels']])}\n",
                    #         flush = True,
                    #     )
                    # except Exception as _dbg_e:
                    #     print(
                    #         f"[DEBUG] Could not decode post-completions sample: {_dbg_e}",
                    #         flush = True,
                    #     )

                except Exception as e:
                    logger.warning(f"Failed to apply train on responses only: {e}")
                    train_on_responses_enabled = False
            else:
                if train_on_responses_enabled and is_deepseek_ocr:
                    logger.info("Train on responses handled by DeepSeek OCR collator\n")
                else:
                    logger.info("Training on full sequences (including prompts)\n")

            # ========== PROGRESS TRACKING ==========
            self.trainer.add_callback(self._create_progress_callback())

            num_samples = len(
                dataset["dataset"] if isinstance(dataset, dict) else dataset
            )
            batch_size = training_args.get("batch_size", 2)
            total_steps = self._calculate_total_steps(
                num_samples,
                batch_size,
                training_args.get("gradient_accumulation_steps", 4),
                training_args.get("num_epochs", 3),
                training_args.get("max_steps", 0),
            )
            self._update_progress(total_steps = total_steps)

            # ========== START TRAINING ==========
            self._update_progress(status_message = "Starting training...")
            logger.info("Starting training...\n")
            self.trainer.train()

            # ========== SAVE MODEL ==========
            self._finalize_training(output_dir)

        except Exception as e:
            import traceback

            logger.error(f"Training error: {e}")
            logger.error(f"Full traceback:\n{traceback.format_exc()}")
            self._update_progress(is_training = False, error = str(e))

        finally:
            self.is_training = False

    def _patch_adapter_config(self, output_dir: str) -> None:
        """Patch adapter_config.json with unsloth_training_method.

        Values: 'qlora', 'lora', 'FT', 'CPT', 'DPO', 'GRPO', etc.
        For LoRA/QLoRA, the distinction comes from load_in_4bit.
        """
        config_path = os.path.join(output_dir, "adapter_config.json")
        if not os.path.exists(config_path):
            logger.info("No adapter_config.json found — skipping training method patch")
            return

        try:
            with open(config_path, "r") as f:
                config = json.load(f)

            # Determine the training method
            if self.load_in_4bit:
                method = "qlora"
            else:
                method = "lora"

            config["unsloth_training_method"] = method
            logger.info(
                f"Patching adapter_config.json with unsloth_training_method='{method}'"
            )

            with open(config_path, "w") as f:
                json.dump(config, f, indent = 2)

        except Exception as e:
            logger.warning(f"Failed to patch adapter_config.json: {e}")

    def stop_training(self, save: bool = True):
        """Stop ongoing training"""
        logger.info(f"\nStopping training (save={save})...")
        self.should_stop = True
        self.save_on_stop = save
        stop_msg = (
            "Stopping training and saving checkpoint..."
            if save
            else "Cancelling training..."
        )
        self._update_progress(status_message = stop_msg)

        # If trainer exists, try to stop it gracefully
        if self.trainer:
            try:
                # The callback will catch should_stop flag and stop the training loop
                logger.info("Training will stop at next step...\n")
            except Exception as e:
                logger.error(f"Error stopping trainer: {e}")

    def get_training_progress(self) -> TrainingProgress:
        """Get current training progress"""
        with self._lock:
            return self.training_progress

    def cleanup(self):
        """Cleanup resources"""
        if self.trainer:
            self.trainer = None
        if self.model:
            self.model = None
        if self.tokenizer:
            self.tokenizer = None

        # Clear GPU memory
        clear_gpu_cache()


def _ensure_deepseek_ocr_installed():
    """
    Auto-install DeepSeek OCR module if not available.
    Downloads from HuggingFace hub as a local module.

    Returns:
        bool: True if available (either already installed or just installed)
    """
    try:
        # Try importing to see if already available
        from deepseek_ocr.modeling_deepseekocr import format_messages

        logger.info("DeepSeek OCR module already available")
        return True
    except ImportError:
        pass

    try:
        logger.info(
            "DeepSeek OCR module not found. Auto-installing from HuggingFace..."
        )
        logger.info("\n Downloading DeepSeek OCR module from HuggingFace...\n")

        from huggingface_hub import snapshot_download
        import sys
        import os

        # Get the script directory to install locally
        script_dir = os.path.dirname(os.path.abspath(__file__))
        parent_dir = os.path.dirname(script_dir)  # Go up to project root

        # Download to project root as 'deepseek_ocr' folder
        local_dir = os.path.join(parent_dir, "deepseek_ocr")

        snapshot_download(
            "unsloth/DeepSeek-OCR", local_dir = local_dir, local_dir_use_symlinks = False
        )

        # Add to sys.path if not already there
        if parent_dir not in sys.path:
            sys.path.insert(0, parent_dir)

        # Try importing again
        from deepseek_ocr.modeling_deepseekocr import format_messages

        logger.info("DeepSeek OCR module installed successfully")
        logger.info("DeepSeek OCR module installed successfully!\n")
        return True

    except Exception as e:
        logger.error(f"Failed to install DeepSeek OCR module: {e}")
        logger.info(f"\n❌ Failed to install DeepSeek OCR module: {e}\n")
        return False


# Global trainer instance
_trainer_instance = None


def get_trainer() -> UnslothTrainer:
    """Get global trainer instance"""
    global _trainer_instance
    if _trainer_instance is None:
        _trainer_instance = UnslothTrainer()
    return _trainer_instance


================================================
FILE: studio/backend/core/training/training.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Training backend — subprocess orchestrator.

Each training job runs in a fresh subprocess (mp.get_context("spawn")),
solving the transformers version-switching problem. The old in-process
UnslothTrainer singleton is only used inside the subprocess (worker.py).

This file orchestrates the subprocess lifecycle, pumps events from the
worker's mp.Queue, and exposes the same API surface to routes/training.py.

Pattern follows core/data_recipe/jobs/manager.py.
"""

import math
import multiprocessing as mp
import queue
import threading
import time
import structlog
from loggers import get_logger
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional, Tuple, Any

import matplotlib.pyplot as plt

logger = get_logger(__name__)

_CTX = mp.get_context("spawn")

# Plot styling constants
PLOT_WIDTH = 8
PLOT_HEIGHT = 3.5


@dataclass
class TrainingProgress:
    """Mirror of trainer.TrainingProgress — kept here so the parent process
    never needs to import the heavy ML modules."""

    epoch: float = 0
    step: int = 0
    total_steps: int = 0
    loss: float = 0.0
    learning_rate: float = 0.0
    is_training: bool = False
    is_completed: bool = False
    error: Optional[str] = None
    status_message: str = "Ready to train"
    elapsed_seconds: Optional[float] = None
    eta_seconds: Optional[float] = None
    grad_norm: Optional[float] = None
    num_tokens: Optional[int] = None
    eval_loss: Optional[float] = None


class TrainingBackend:
    """
    Training orchestration backend — subprocess-based.
    Launches a fresh subprocess per training job, communicates via mp.Queue.
    """

    def __init__(self):
        # Subprocess state
        self._proc: Optional[mp.Process] = None
        self._event_queue: Any = None
        self._stop_queue: Any = None
        self._pump_thread: Optional[threading.Thread] = None
        self._lock = threading.Lock()

        # Progress state (updated by pump thread from subprocess events)
        self._progress = TrainingProgress()
        self._should_stop = False
        self._cancel_requested = False  # True only for stop(save=False)

        # Training Metrics (consumed by routes for SSE and /metrics)
        self.loss_history: list = []
        self.lr_history: list = []
        self.step_history: list = []
        self.grad_norm_history: list = []
        self.grad_norm_step_history: list = []
        self.eval_loss_history: list = []
        self.eval_step_history: list = []
        self.eval_enabled: bool = False
        self.current_theme: str = "light"

        # Job metadata
        self.current_job_id: Optional[str] = None
        self._output_dir: Optional[str] = None

        logger.info("TrainingBackend initialized (subprocess mode)")

    # ------------------------------------------------------------------
    # Public API (called by routes/training.py)
    # ------------------------------------------------------------------

    def start_training(self, **kwargs) -> bool:
        """Spawn a subprocess to run the full training pipeline.

        All kwargs are serialized into a config dict and sent to the worker.
        Returns True if the subprocess was started successfully.
        """
        with self._lock:
            if self._proc is not None and self._proc.is_alive():
                logger.warning("Training subprocess already running")
                return False

        # Join prior pump thread to prevent it from consuming events
        # from the new job's queue (it reads self._event_queue dynamically).
        if self._pump_thread is not None and self._pump_thread.is_alive():
            self._pump_thread.join(timeout = 5.0)
            if self._pump_thread.is_alive():
                logger.warning("Previous pump thread did not exit within 5s")
        self._pump_thread = None

        # Reset state
        self._should_stop = False
        self._cancel_requested = False
        self._progress = TrainingProgress(
            is_training = True, status_message = "Initializing training..."
        )
        self.loss_history.clear()
        self.lr_history.clear()
        self.step_history.clear()
        self.grad_norm_history.clear()
        self.grad_norm_step_history.clear()
        self.eval_loss_history.clear()
        self.eval_step_history.clear()
        self.eval_enabled = False
        self._output_dir = None

        # Build config dict for the subprocess
        config = {
            "model_name": kwargs["model_name"],
            "training_type": kwargs.get("training_type", "LoRA/QLoRA"),
            "hf_token": kwargs.get("hf_token", ""),
            "load_in_4bit": kwargs.get("load_in_4bit", True),
            "max_seq_length": kwargs.get("max_seq_length", 2048),
            "hf_dataset": kwargs.get("hf_dataset", ""),
            "local_datasets": kwargs.get("local_datasets"),
            "local_eval_datasets": kwargs.get("local_eval_datasets"),
            "format_type": kwargs.get("format_type", ""),
            "subset": kwargs.get("subset"),
            "train_split": kwargs.get("train_split", "train"),
            "eval_split": kwargs.get("eval_split"),
            "eval_steps": kwargs.get("eval_steps", 0.00),
            "dataset_slice_start": kwargs.get("dataset_slice_start"),
            "dataset_slice_end": kwargs.get("dataset_slice_end"),
            "custom_format_mapping": kwargs.get("custom_format_mapping"),
            "is_dataset_image": kwargs.get("is_dataset_image", False),
            "is_dataset_audio": kwargs.get("is_dataset_audio", False),
            "is_embedding": kwargs.get("is_embedding", False),
            "num_epochs": kwargs.get("num_epochs", 3),
            "learning_rate": kwargs.get("learning_rate", "2e-4"),
            "batch_size": kwargs.get("batch_size", 2),
            "gradient_accumulation_steps": kwargs.get("gradient_accumulation_steps", 4),
            "warmup_steps": kwargs.get("warmup_steps"),
            "warmup_ratio": kwargs.get("warmup_ratio"),
            "max_steps": kwargs.get("max_steps", 0),
            "save_steps": kwargs.get("save_steps", 0),
            "weight_decay": kwargs.get("weight_decay", 0.01),
            "random_seed": kwargs.get("random_seed", 3407),
            "packing": kwargs.get("packing", False),
            "optim": kwargs.get("optim", "adamw_8bit"),
            "lr_scheduler_type": kwargs.get("lr_scheduler_type", "linear"),
            "use_lora": kwargs.get("use_lora", True),
            "lora_r": kwargs.get("lora_r", 16),
            "lora_alpha": kwargs.get("lora_alpha", 16),
            "lora_dropout": kwargs.get("lora_dropout", 0.0),
            "target_modules": kwargs.get("target_modules"),
            "gradient_checkpointing": kwargs.get("gradient_checkpointing", "unsloth"),
            "use_rslora": kwargs.get("use_rslora", False),
            "use_loftq": kwargs.get("use_loftq", False),
            "train_on_completions": kwargs.get("train_on_completions", False),
            "finetune_vision_layers": kwargs.get("finetune_vision_layers", True),
            "finetune_language_layers": kwargs.get("finetune_language_layers", True),
            "finetune_attention_modules": kwargs.get(
                "finetune_attention_modules", True
            ),
            "finetune_mlp_modules": kwargs.get("finetune_mlp_modules", True),
            "enable_wandb": kwargs.get("enable_wandb", False),
            "wandb_token": kwargs.get("wandb_token"),
            "wandb_project": kwargs.get("wandb_project", "unsloth-training"),
            "enable_tensorboard": kwargs.get("enable_tensorboard", False),
            "tensorboard_dir": kwargs.get("tensorboard_dir", "runs"),
            "trust_remote_code": kwargs.get("trust_remote_code", False),
        }

        # Derive load_in_4bit from training_type
        if config["training_type"] != "LoRA/QLoRA":
            config["load_in_4bit"] = False

        # Spawn subprocess
        from .worker import run_training_process

        self._event_queue = _CTX.Queue()
        self._stop_queue = _CTX.Queue()

        self._proc = _CTX.Process(
            target = run_training_process,
            kwargs = {
                "event_queue": self._event_queue,
                "stop_queue": self._stop_queue,
                "config": config,
            },
            daemon = True,
        )
        self._proc.start()
        logger.info("Training subprocess started (pid=%s)", self._proc.pid)

        # Start event pump thread
        self._pump_thread = threading.Thread(target = self._pump_loop, daemon = True)
        self._pump_thread.start()

        return True

    def stop_training(self, save: bool = True) -> bool:
        """Send stop signal to the training subprocess."""
        self._should_stop = True
        if not save:
            self._cancel_requested = True
        with self._lock:
            if self._stop_queue is not None:
                try:
                    self._stop_queue.put({"type": "stop", "save": save})
                except (OSError, ValueError):
                    pass
            # Update progress immediately for responsive UI
            self._progress.status_message = (
                "Stopping training and saving checkpoint..."
                if save
                else "Cancelling training..."
            )
        return True

    def force_terminate(self) -> None:
        """Force-kill the training subprocess so state can be reset immediately."""
        with self._lock:
            if self._proc is not None and self._proc.is_alive():
                logger.info(
                    "Force-terminating training subprocess (pid=%s)", self._proc.pid
                )
                self._proc.terminate()
            proc = self._proc

        if proc is not None:
            proc.join(timeout = 5.0)
            if proc.is_alive():
                proc.kill()
                proc.join(timeout = 2.0)

    def is_training_active(self) -> bool:
        """Check if training is currently active."""
        with self._lock:
            # Subprocess alive = active
            if self._proc is not None and self._proc.is_alive():
                return True

            # Stop was requested and process exited → inactive
            if self._should_stop:
                return False

            # Check progress state
            p = self._progress
            if p.is_training:
                return True
            if p.is_completed or p.error:
                return False

            # Check status message for activity indicators
            status_lower = (p.status_message or "").lower()
            if any(
                k in status_lower
                for k in [
                    "cancelled",
                    "canceled",
                    "stopped",
                    "completed",
                    "ready to train",
                ]
            ):
                return False
            if any(
                k in status_lower
                for k in [
                    "loading",
                    "preparing",
                    "training",
                    "configuring",
                    "tokenizing",
                    "starting",
                    "importing",
                ]
            ):
                return True

            return False

    def get_training_status(self, theme: str = "light") -> Tuple:
        """Get current training status and loss plot."""
        with self._lock:
            progress = self._progress

        if not (progress.is_training or progress.is_completed or progress.error):
            return (None, progress)

        plot = self._create_loss_plot(progress, theme)
        return (plot, progress)

    def refresh_plot_for_theme(self, theme: str) -> Optional[plt.Figure]:
        """Refresh plot with new theme."""
        if theme and isinstance(theme, str) and theme in ["light", "dark"]:
            self.current_theme = theme
        if self.loss_history:
            with self._lock:
                progress = self._progress
            return self._create_loss_plot(progress, self.current_theme)
        return None

    # ------------------------------------------------------------------
    # Compatibility shims — routes/training.py accesses these
    # ------------------------------------------------------------------

    class _TrainerShim:
        """Minimal shim so routes that access backend.trainer.* still work."""

        def __init__(self, backend: "TrainingBackend"):
            self._backend = backend
            self.should_stop = False

        @property
        def training_progress(self):
            return self._backend._progress

        @training_progress.setter
        def training_progress(self, value):
            self._backend._progress = value

        def get_training_progress(self):
            return self._backend._progress

        def _update_progress(self, **kwargs):
            with self._backend._lock:
                for key, value in kwargs.items():
                    if hasattr(self._backend._progress, key):
                        setattr(self._backend._progress, key, value)

    @property
    def trainer(self):
        """Compatibility shim for routes that access backend.trainer.*"""
        return self._TrainerShim(self)

    # ------------------------------------------------------------------
    # Event pump (background thread)
    # ------------------------------------------------------------------

    def _pump_loop(self) -> None:
        """Background thread: consume events from subprocess → update state."""
        while True:
            if self._proc is None or self._event_queue is None:
                return

            # Try to read an event
            event = self._read_queue(self._event_queue, timeout_sec = 0.25)
            if event is not None:
                self._handle_event(event)
                continue

            # No event — check if process is still alive
            if self._proc.is_alive():
                continue

            # Process exited — drain remaining events
            for e in self._drain_queue(self._event_queue):
                self._handle_event(e)

            # Mark as done if no explicit complete/error was received
            with self._lock:
                if self._progress.is_training:
                    if self._should_stop:
                        self._progress.is_training = False
                        self._progress.status_message = "Training stopped."
                    else:
                        self._progress.is_training = False
                        self._progress.error = (
                            self._progress.error
                            or "Training process exited unexpectedly"
                        )
            return

    def _handle_event(self, event: dict) -> None:
        """Apply a subprocess event to local state."""
        etype = event.get("type")

        with self._lock:
            if etype == "progress":
                self._progress.step = event.get("step", self._progress.step)
                self._progress.epoch = event.get("epoch", self._progress.epoch)
                self._progress.loss = event.get("loss", self._progress.loss)
                self._progress.learning_rate = event.get(
                    "learning_rate", self._progress.learning_rate
                )
                self._progress.total_steps = event.get(
                    "total_steps", self._progress.total_steps
                )
                self._progress.elapsed_seconds = event.get("elapsed_seconds")
                self._progress.eta_seconds = event.get("eta_seconds")
                self._progress.grad_norm = event.get("grad_norm")
                self._progress.num_tokens = event.get("num_tokens")
                self._progress.eval_loss = event.get("eval_loss")
                self._progress.is_training = True
                status = event.get("status_message", "")
                if status:
                    self._progress.status_message = status

                # Update metric histories
                step = event.get("step", 0)
                loss = event.get("loss", 0.0)
                lr = event.get("learning_rate", 0.0)
                if step >= 0 and loss > 0:
                    self.loss_history.append(loss)
                    self.lr_history.append(lr)
                    self.step_history.append(step)

                grad_norm = event.get("grad_norm")
                if grad_norm is not None:
                    try:
                        gn = float(grad_norm)
                    except (TypeError, ValueError):
                        gn = None
                    if gn is not None and math.isfinite(gn):
                        self.grad_norm_history.append(gn)
                        self.grad_norm_step_history.append(step)

                eval_loss = event.get("eval_loss")
                if eval_loss is not None:
                    self.eval_loss_history.append(eval_loss)
                    self.eval_step_history.append(step)
                    self.eval_enabled = True

            elif etype == "eval_configured":
                self.eval_enabled = True

            elif etype == "status":
                self._progress.status_message = event.get("message", "")
                self._progress.is_training = True

            elif etype == "complete":
                self._progress.is_training = False
                self._progress.is_completed = True
                self._output_dir = event.get("output_dir")
                msg = event.get("status_message", "Training completed")
                self._progress.status_message = msg

            elif etype == "error":
                self._progress.is_training = False
                self._progress.error = event.get("error", "Unknown error")
                logger.error("Training error: %s", event.get("error"))
                stack = event.get("stack", "")
                if stack:
                    logger.error("Stack trace:\n%s", stack)

    @staticmethod
    def _read_queue(q: Any, timeout_sec: float) -> Optional[dict]:
        try:
            return q.get(timeout = timeout_sec)
        except queue.Empty:
            return None
        except (EOFError, OSError, ValueError):
            return None

    @staticmethod
    def _drain_queue(q: Any) -> list:
        events = []
        while True:
            try:
                events.append(q.get_nowait())
            except queue.Empty:
                return events
            except (EOFError, OSError, ValueError):
                return events

    # ------------------------------------------------------------------
    # Plot generation (unchanged from original)
    # ------------------------------------------------------------------

    def _create_loss_plot(
        self, progress: TrainingProgress, theme: str = "light"
    ) -> plt.Figure:
        """Create training loss plot with theme-aware styling."""
        plt.close("all")

        LIGHT_STYLE = {
            "facecolor": "#ffffff",
            "grid_color": "#d1d5db",
            "line": "#16b88a",
            "text": "#1f2937",
            "empty_text": "#6b7280",
        }
        DARK_STYLE = {
            "facecolor": "#292929",
            "grid_color": "#404040",
            "line": "#4ade80",
            "text": "#e5e7eb",
            "empty_text": "#9ca3af",
        }

        style = LIGHT_STYLE if theme == "light" else DARK_STYLE

        fig, ax = plt.subplots(figsize = (PLOT_WIDTH, PLOT_HEIGHT))
        fig.patch.set_facecolor(style["facecolor"])
        ax.set_facecolor(style["facecolor"])

        if self.loss_history:
            steps = self.step_history
            losses = self.loss_history
            scatter_color = "#60a5fa"
            ax.scatter(
                steps,
                losses,
                s = 16,
                alpha = 0.6,
                color = scatter_color,
                linewidths = 0,
                label = "Training Loss (raw)",
            )

            MA_WINDOW = 20
            window = min(MA_WINDOW, len(losses))

            if window >= 2:
                cumsum = [0.0]
                for v in losses:
                    cumsum.append(cumsum[-1] + float(v))

                ma = []
                for i in range(len(losses)):
                    start = max(0, i - window + 1)
                    denom = i - start + 1
                    ma.append((cumsum[i + 1] - cumsum[start]) / denom)

                ax.plot(
                    steps,
                    ma,
                    color = style["line"],
                    linewidth = 2.5,
                    alpha = 0.95,
                    label = f"Moving Avg ({ma[-1]:.4f})",
                )

                leg = ax.legend(frameon = False, fontsize = 9)
                for t in leg.get_texts():
                    t.set_color(style["text"])

            ax.set_xlabel("Steps", fontsize = 10, color = style["text"])
            ax.set_ylabel("Loss", fontsize = 10, color = style["text"])

            if progress.error:
                title = f"Error: {progress.error}"
            elif progress.is_completed:
                title = f"Training completed! Final loss: {progress.loss:.4f}"
            elif progress.status_message:
                title = progress.status_message
            elif progress.step > 0:
                title = f"Epoch: {progress.epoch} | Step: {progress.step}/{progress.total_steps} | Loss: {progress.loss:.4f}"
            else:
                title = "Training Loss"

            ax.set_title(
                title, fontsize = 11, fontweight = "bold", pad = 10, color = style["text"]
            )
            ax.grid(True, alpha = 0.4, linestyle = "--", color = style["grid_color"])
            ax.tick_params(colors = style["text"], which = "both")
            ax.spines["top"].set_visible(False)
            ax.spines["right"].set_visible(False)
            ax.spines["bottom"].set_color(style["text"])
            ax.spines["left"].set_color(style["text"])
        else:
            display_msg = (
                progress.status_message
                if progress.status_message
                else "Waiting for training data..."
            )
            ax.text(
                0.5,
                0.5,
                display_msg,
                ha = "center",
                va = "center",
                fontsize = 16,
                color = style["empty_text"],
                transform = ax.transAxes,
            )
            ax.set_xticks([])
            ax.set_yticks([])
            for spine in ax.spines.values():
                spine.set_visible(False)

        fig.tight_layout()
        return fig

    def _transfer_to_inference_backend(self) -> bool:
        """Transfer model to inference backend.

        With subprocess-based training, the model lives in the subprocess
        and is freed when it exits. Inference must load from the saved
        checkpoint on disk. This is a no-op placeholder.
        """
        logger.info(
            "_transfer_to_inference_backend: subprocess training — "
            "model must be loaded from disk (output_dir=%s)",
            self._output_dir,
        )
        return False


# ========== GLOBAL INSTANCE ==========
_training_backend = None


def get_training_backend() -> TrainingBackend:
    """Get global training backend instance"""
    global _training_backend
    if _training_backend is None:
        _training_backend = TrainingBackend()
    return _training_backend


================================================
FILE: studio/backend/core/training/worker.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Training subprocess entry point.

Each training job runs in a fresh subprocess (mp.get_context("spawn")).
This gives us a clean Python interpreter with no stale module state —
solving the transformers version-switching problem completely.

Pattern follows core/data_recipe/jobs/worker.py.
"""

from __future__ import annotations

import structlog
from loggers import get_logger
import os
import sys
import time
import traceback
from pathlib import Path
from typing import Any

logger = get_logger(__name__)


def _activate_transformers_version(model_name: str) -> None:
    """Activate the correct transformers version BEFORE any ML imports.

    If the model needs transformers 5.x, prepend the pre-installed .venv_t5/
    directory to sys.path. Otherwise do nothing (default 4.57.x in .venv/).
    """
    # Ensure backend is on path for utils imports
    backend_path = str(Path(__file__).resolve().parent.parent.parent)
    if backend_path not in sys.path:
        sys.path.insert(0, backend_path)

    from utils.transformers_version import (
        needs_transformers_5,
        _resolve_base_model,
        _ensure_venv_t5_exists,
        _VENV_T5_DIR,
    )

    resolved = _resolve_base_model(model_name)
    if needs_transformers_5(resolved):
        if not _ensure_venv_t5_exists():
            raise RuntimeError(
                f"Cannot activate transformers 5.x: .venv_t5 missing at {_VENV_T5_DIR}"
            )
        if _VENV_T5_DIR not in sys.path:
            sys.path.insert(0, _VENV_T5_DIR)
        logger.info("Activated transformers 5.x from %s", _VENV_T5_DIR)
        # Propagate to child subprocesses (e.g. GGUF converter)
        _pp = os.environ.get("PYTHONPATH", "")
        os.environ["PYTHONPATH"] = _VENV_T5_DIR + (os.pathsep + _pp if _pp else "")
    else:
        logger.info("Using default transformers (4.57.x) for %s", model_name)


def run_training_process(
    *,
    event_queue: Any,
    stop_queue: Any,
    config: dict,
) -> None:
    """Subprocess entrypoint. Fresh Python — no stale module state.

    Args:
        event_queue: mp.Queue for sending progress/status/error events to parent.
        stop_queue: mp.Queue for receiving stop commands from parent.
        config: Training configuration dict with all parameters.
    """
    os.environ["TOKENIZERS_PARALLELISM"] = "false"
    os.environ["PYTHONWARNINGS"] = (
        "ignore"  # Suppress warnings at C-level before imports
    )

    import warnings
    from loggers.config import LogConfig

    if os.getenv("ENVIRONMENT_TYPE", "production") == "production":
        warnings.filterwarnings("ignore")

    LogConfig.setup_logging(
        service_name = "unsloth-studio-training-worker",
        env = os.getenv("ENVIRONMENT_TYPE", "production"),
    )

    model_name = config["model_name"]

    # ── 1. Activate correct transformers version BEFORE any ML imports ──
    try:
        _activate_transformers_version(model_name)
    except Exception as exc:
        event_queue.put(
            {
                "type": "error",
                "error": f"Failed to activate transformers version: {exc}",
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            }
        )
        return

    # ── 1a. Auto-enable trust_remote_code for unsloth/* transformers 5.x models ──
    # Some newer architectures (e.g. NemotronH) have config parsing bugs in
    # transformers that require trust_remote_code=True as a workaround.
    # Only auto-enable for unsloth/* prefixed models (trusted source).
    from utils.transformers_version import needs_transformers_5

    if (
        needs_transformers_5(model_name)
        and model_name.lower().startswith("unsloth/")
        and not config.get("trust_remote_code", False)
    ):
        config["trust_remote_code"] = True
        logger.info(
            "Auto-enabled trust_remote_code for unsloth/* transformers 5.x model: %s",
            model_name,
        )

    # ── 1b. Auto-install mamba-ssm for SSM/hybrid models (NemotronH, Falcon-H1) ──
    _SSM_MODEL_SUBSTRINGS = ("nemotron_h", "nemotron-3-nano", "falcon_h1", "falcon-h1")
    if any(sub in model_name.lower() for sub in _SSM_MODEL_SUBSTRINGS):
        try:
            import mamba_ssm  # noqa: F401

            logger.info("mamba-ssm already installed")
        except ImportError:
            logger.info(
                "SSM model detected — installing mamba-ssm and causal-conv1d (this may take several minutes)..."
            )
            _send_status(
                event_queue, "Installing mamba-ssm (first time only, ~7 min)..."
            )
            import subprocess as _sp

            # --no-build-isolation: compile against current torch (no version conflicts)
            # --no-deps: don't pull in torch/transformers/triton (already installed)
            for _pkg in ["causal_conv1d", "mamba_ssm"]:
                _r = _sp.run(
                    [
                        sys.executable,
                        "-m",
                        "pip",
                        "install",
                        "--no-build-isolation",
                        "--no-deps",
                        "--no-cache-dir",
                        _pkg,
                    ],
                    stdout = _sp.PIPE,
                    stderr = _sp.STDOUT,
                    text = True,
                )
                if _r.returncode != 0:
                    logger.error("Failed to install %s:\n%s", _pkg, _r.stdout)
                else:
                    logger.info("Installed %s successfully", _pkg)
            logger.info("mamba-ssm installation complete")

    # ── 1c. Set fork start method so dataset.map() can multiprocess ──
    # The parent launched us via spawn (clean process), but the compiled
    # SFTTrainer checks get_start_method() and disables num_proc if not "fork".
    # Linux only: fork is the default start method and is safe here (no CUDA
    # context exists yet). macOS defaults to spawn since Python 3.8 because
    # fork is unsafe with macOS frameworks (Metal/MPS, CoreFoundation) --
    # do NOT override on macOS. Windows has no fork at all.
    if sys.platform == "linux":
        import multiprocessing as _mp

        try:
            _mp.set_start_method("fork", force = True)
        except RuntimeError:
            pass  # Already set

    # ── 1c. On Windows, check Triton availability (must be before import torch) ──
    if sys.platform == "win32":
        try:
            import triton  # noqa: F401

            logger.info("Triton available — torch.compile enabled")
        except ImportError:
            os.environ["TORCHDYNAMO_DISABLE"] = "1"
            logger.warning(
                "Triton not found on Windows — torch.compile disabled. "
                'Install for better performance: pip install "triton-windows<3.7"'
            )

    # ── 2. Now import ML libraries (fresh in this clean process) ──
    try:
        _send_status(event_queue, "Importing Unsloth...")

        backend_path = str(Path(__file__).resolve().parent.parent.parent)
        if backend_path not in sys.path:
            sys.path.insert(0, backend_path)

        from core.training.trainer import UnslothTrainer, TrainingProgress
        from utils.paths import (
            ensure_dir,
            resolve_output_dir,
            resolve_tensorboard_dir,
            datasets_root,
        )

        import transformers

        logger.info("Subprocess loaded transformers %s", transformers.__version__)
    except Exception as exc:
        event_queue.put(
            {
                "type": "error",
                "error": f"Failed to import ML libraries: {exc}",
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            }
        )
        return

    # ── 2b. EMBEDDING MODEL FAST-PATH ──
    # Embedding models use a completely different pipeline (FastSentenceTransformer
    # + SentenceTransformerTrainer + MultipleNegativesRankingLoss) so we branch
    # early and handle the entire flow in a self-contained function.
    if config.get("is_embedding", False):
        try:
            _run_embedding_training(event_queue, stop_queue, config)
        except Exception as exc:
            event_queue.put(
                {
                    "type": "error",
                    "error": str(exc),
                    "stack": traceback.format_exc(limit = 20),
                    "ts": time.time(),
                }
            )
        return

    # ── 3. Create a fresh trainer instance ──
    trainer = UnslothTrainer()

    # Wire up progress callback → event_queue
    def _on_progress(progress: TrainingProgress):
        has_train_loss = progress.step >= 0 and progress.loss > 0
        has_eval_loss = progress.eval_loss is not None
        if has_train_loss or has_eval_loss:
            event_queue.put(
                {
                    "type": "progress",
                    "step": progress.step,
                    "epoch": progress.epoch,
                    "loss": progress.loss,
                    "learning_rate": progress.learning_rate,
                    "total_steps": progress.total_steps,
                    "elapsed_seconds": progress.elapsed_seconds,
                    "eta_seconds": progress.eta_seconds,
                    "grad_norm": progress.grad_norm,
                    "num_tokens": progress.num_tokens,
                    "eval_loss": progress.eval_loss,
                    "status_message": progress.status_message,
                    "ts": time.time(),
                }
            )
        if progress.status_message:
            _send_status(event_queue, progress.status_message)

    trainer.add_progress_callback(_on_progress)

    # Wire up stop_queue polling to trainer.should_stop
    import threading
    import queue as _queue

    def _poll_stop():
        while True:
            try:
                msg = stop_queue.get(timeout = 1.0)
                if msg and msg.get("type") == "stop":
                    save = msg.get("save", True)
                    trainer.should_stop = True
                    trainer.save_on_stop = save
                    logger.info("Stop signal received (save=%s)", save)
                    return
            except _queue.Empty:
                continue
            except (EOFError, OSError):
                return

    stop_thread = threading.Thread(target = _poll_stop, daemon = True)
    stop_thread.start()

    # ── 4. Execute the training pipeline ──
    # Order: detect → dataset → model → prepare → train
    # Dataset processing (including LLM-assisted detection) runs BEFORE model
    # loading so both never occupy VRAM at the same time.
    try:
        hf_token = config.get("hf_token", "")
        hf_token = hf_token if hf_token and hf_token.strip() else None

        # ── 4a. Lightweight detection + tokenizer (no VRAM) ──
        _send_status(event_queue, "Detecting model type...")
        trainer.pre_detect_and_load_tokenizer(
            model_name = model_name,
            max_seq_length = config["max_seq_length"],
            hf_token = hf_token,
            is_dataset_image = config.get("is_dataset_image", False),
            is_dataset_audio = config.get("is_dataset_audio", False),
            trust_remote_code = config.get("trust_remote_code", False),
        )
        if trainer.should_stop:
            event_queue.put({"type": "complete", "output_dir": None, "ts": time.time()})
            return

        # ── 4b. Load and format dataset (LLM helper may use VRAM briefly) ──
        _send_status(event_queue, "Loading and formatting dataset...")
        hf_dataset = config.get("hf_dataset", "")
        dataset_result = trainer.load_and_format_dataset(
            dataset_source = hf_dataset if hf_dataset and hf_dataset.strip() else None,
            format_type = config.get("format_type", ""),
            local_datasets = config.get("local_datasets") or None,
            local_eval_datasets = config.get("local_eval_datasets") or None,
            custom_format_mapping = config.get("custom_format_mapping"),
            subset = config.get("subset"),
            train_split = config.get("train_split", "train"),
            eval_split = config.get("eval_split"),
            eval_steps = config.get("eval_steps", 0.00),
            dataset_slice_start = config.get("dataset_slice_start"),
            dataset_slice_end = config.get("dataset_slice_end"),
        )

        if isinstance(dataset_result, tuple):
            dataset, eval_dataset = dataset_result
        else:
            dataset = dataset_result
            eval_dataset = None

        # [DEBUG] Print first sample before model is loaded
        # dataset is a dict {"dataset": <Dataset>, "detected_format": ..., ...}
        # or a raw Dataset for audio paths
        # try:
        #     ds = dataset["dataset"] if isinstance(dataset, dict) else dataset
        #     print(
        #         f"\n[DEBUG] Dataset loaded BEFORE model. type={type(ds).__name__}, len={len(ds)}",
        #         flush = True,
        #     )
        #     print(f"[DEBUG] Columns: {ds.column_names}", flush = True)
        #     sample = ds[0]
        #     preview = {k: str(v)[:300] for k, v in sample.items()}
        #     print(f"[DEBUG] First sample: {preview}\n", flush = True)
        # except Exception as e:
        #     print(
        #         f"[DEBUG] Could not preview first sample: {type(e).__name__}: {e}",
        #         flush = True,
        #     )

        # Disable eval if eval_steps <= 0
        eval_steps = config.get("eval_steps", 0.00)
        if eval_steps is not None and float(eval_steps) <= 0:
            eval_dataset = None

        # Tell the parent process that eval is configured so the frontend
        # shows "Waiting for first evaluation step..." instead of "not configured"
        if eval_dataset is not None:
            event_queue.put(
                {
                    "type": "eval_configured",
                    "ts": time.time(),
                }
            )

        if dataset is None or trainer.should_stop:
            if trainer.should_stop:
                event_queue.put(
                    {"type": "complete", "output_dir": None, "ts": time.time()}
                )
            else:
                event_queue.put(
                    {
                        "type": "error",
                        "error": trainer.training_progress.error
                        or "Failed to load dataset",
                        "stack": "",
                        "ts": time.time(),
                    }
                )
            return

        # ── Start tqdm monitor early so it captures download + tokenization bars ──
        import threading as _th

        _tqdm_stop = _th.Event()

        def _monitor_tqdm():
            from tqdm.auto import tqdm as _tqdm_cls

            while not _tqdm_stop.is_set():
                for bar in list(getattr(_tqdm_cls, "_instances", set())):
                    try:
                        n, total = bar.n or 0, bar.total or 0
                        desc = getattr(bar, "desc", "") or ""
                        if total > 0 and n > 0 and desc:
                            pct = min(int(n * 100 / total), 100)
                            _send_status(
                                event_queue, f"{desc.strip()} {pct}% ({n:,}/{total:,})"
                            )
                    except (AttributeError, ReferenceError):
                        pass
                _tqdm_stop.wait(3)

        _tqdm_thread = _th.Thread(target = _monitor_tqdm, daemon = True)
        _tqdm_thread.start()

        training_type = config.get("training_type", "LoRA/QLoRA")
        use_lora = training_type == "LoRA/QLoRA"

        # ── 4c. Load training model (uses VRAM — dataset already formatted) ──
        _send_status(event_queue, "Loading model...")
        success = trainer.load_model(
            model_name = model_name,
            max_seq_length = config["max_seq_length"],
            load_in_4bit = config["load_in_4bit"],
            full_finetuning = not use_lora,
            hf_token = hf_token,
            is_dataset_image = config.get("is_dataset_image", False),
            is_dataset_audio = config.get("is_dataset_audio", False),
            trust_remote_code = config.get("trust_remote_code", False),
        )
        if not success or trainer.should_stop:
            if trainer.should_stop:
                event_queue.put(
                    {"type": "complete", "output_dir": None, "ts": time.time()}
                )
            else:
                error_msg = trainer.training_progress.error or "Failed to load model"
                event_queue.put(
                    {
                        "type": "error",
                        "error": error_msg,
                        "stack": "",
                        "ts": time.time(),
                    }
                )
            return

        # ── 4d. Prepare model (LoRA or full finetuning) ──
        if use_lora:
            _send_status(event_queue, "Configuring LoRA adapters...")
            success = trainer.prepare_model_for_training(
                use_lora = True,
                finetune_vision_layers = config.get("finetune_vision_layers", True),
                finetune_language_layers = config.get("finetune_language_layers", True),
                finetune_attention_modules = config.get(
                    "finetune_attention_modules", True
                ),
                finetune_mlp_modules = config.get("finetune_mlp_modules", True),
                target_modules = config.get("target_modules"),
                lora_r = config.get("lora_r", 16),
                lora_alpha = config.get("lora_alpha", 16),
                lora_dropout = config.get("lora_dropout", 0.0),
                use_gradient_checkpointing = config.get(
                    "gradient_checkpointing", "unsloth"
                ),
                use_rslora = config.get("use_rslora", False),
                use_loftq = config.get("use_loftq", False),
            )
        else:
            _send_status(event_queue, "Preparing model for full finetuning...")
            success = trainer.prepare_model_for_training(use_lora = False)

        if not success or trainer.should_stop:
            if trainer.should_stop:
                event_queue.put(
                    {"type": "complete", "output_dir": None, "ts": time.time()}
                )
            else:
                event_queue.put(
                    {
                        "type": "error",
                        "error": trainer.training_progress.error
                        or "Failed to prepare model",
                        "stack": "",
                        "ts": time.time(),
                    }
                )
            return

        # Convert learning rate
        try:
            lr_value = float(config.get("learning_rate", "2e-4"))
        except ValueError:
            event_queue.put(
                {
                    "type": "error",
                    "error": f"Invalid learning rate: {config.get('learning_rate')}",
                    "stack": "",
                    "ts": time.time(),
                }
            )
            return

        # Generate output dir
        output_dir = config.get("output_dir")
        if not output_dir:
            output_dir = f"{model_name.replace('/', '_')}_{int(time.time())}"
        output_dir = str(resolve_output_dir(output_dir))
        ensure_dir(Path(output_dir))

        tensorboard_dir = config.get("tensorboard_dir")
        if config.get("enable_tensorboard", False):
            tensorboard_dir = str(resolve_tensorboard_dir(tensorboard_dir))
            ensure_dir(Path(tensorboard_dir))

        # Start training (directly — no inner thread, we ARE the subprocess)
        dataset_display = (
            config.get("hf_dataset", "") or config.get("uploaded_file", "") or ""
        )
        _send_status(
            event_queue,
            f'Training "{model_name}"'
            + (f"\nDataset = {dataset_display}" if dataset_display else ""),
        )
        max_steps = config.get("max_steps", 0)
        save_steps = config.get("save_steps", 0)

        trainer._train_worker(
            dataset,
            output_dir = output_dir,
            num_epochs = config.get("num_epochs", 3),
            learning_rate = lr_value,
            batch_size = config.get("batch_size", 2),
            gradient_accumulation_steps = config.get("gradient_accumulation_steps", 4),
            warmup_steps = config.get("warmup_steps"),
            warmup_ratio = config.get("warmup_ratio"),
            max_steps = max_steps if max_steps and max_steps > 0 else 0,
            save_steps = save_steps if save_steps and save_steps > 0 else 0,
            weight_decay = config.get("weight_decay", 0.01),
            random_seed = config.get("random_seed", 3407),
            packing = config.get("packing", False),
            train_on_completions = config.get("train_on_completions", False),
            enable_wandb = config.get("enable_wandb", False),
            wandb_project = config.get("wandb_project", "unsloth-training"),
            wandb_token = config.get("wandb_token"),
            enable_tensorboard = config.get("enable_tensorboard", False),
            tensorboard_dir = tensorboard_dir,
            eval_dataset = eval_dataset,
            eval_steps = eval_steps,
            max_seq_length = config.get("max_seq_length", 2048),
            optim = config.get("optim", "adamw_8bit"),
            lr_scheduler_type = config.get("lr_scheduler_type", "linear"),
        )

        _tqdm_stop.set()

        # Check final state
        progress = trainer.get_training_progress()
        if progress.error:
            event_queue.put(
                {
                    "type": "error",
                    "error": progress.error,
                    "stack": "",
                    "ts": time.time(),
                }
            )
        else:
            event_queue.put(
                {
                    "type": "complete",
                    "output_dir": output_dir,
                    "status_message": progress.status_message or "Training completed",
                    "ts": time.time(),
                }
            )

    except Exception as exc:
        event_queue.put(
            {
                "type": "error",
                "error": str(exc),
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            }
        )


def _send_status(event_queue: Any, message: str) -> None:
    """Send a status update to the parent process."""
    event_queue.put(
        {
            "type": "status",
            "message": message,
            "ts": time.time(),
        }
    )


def _run_embedding_training(event_queue: Any, stop_queue: Any, config: dict) -> None:
    """Self-contained embedding model training pipeline.

    Uses FastSentenceTransformer + SentenceTransformerTrainer +
    MultipleNegativesRankingLoss — completely separate from the
    LLM/VLM/audio paths in UnslothTrainer.

    Mirrors the pattern from the reference embedding notebooks:
      All_MiniLM_L6_v2.py, BGE_M3.py, EmbeddingGemma_300M.py,
      ModernBert.py, Qwen3_Embedding_0_6B.py
    """
    import math
    import queue as _queue
    import threading

    model_name = config["model_name"]
    training_start_time = time.time()

    # ── 1. Import embedding-specific libraries ──
    _send_status(event_queue, "Importing embedding libraries...")
    try:
        from unsloth import FastSentenceTransformer, is_bfloat16_supported
        from sentence_transformers import (
            SentenceTransformerTrainer,
            SentenceTransformerTrainingArguments,
        )
        from sentence_transformers.losses import MultipleNegativesRankingLoss
        from sentence_transformers.training_args import BatchSamplers
        from datasets import load_dataset, Dataset
        from transformers import TrainerCallback
        from utils.paths import datasets_root, resolve_output_dir
    except ImportError as e:
        event_queue.put(
            {
                "type": "error",
                "error": f"Failed to import embedding libraries: {e}. "
                "Ensure 'sentence_transformers' and 'unsloth' are installed.",
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            }
        )
        return

    # ── Stop signal handling ──
    _should_stop = False
    _save_on_stop = True

    def _poll_stop():
        nonlocal _should_stop, _save_on_stop
        while True:
            try:
                msg = stop_queue.get(timeout = 1.0)
                if msg and msg.get("type") == "stop":
                    _save_on_stop = msg.get("save", True)
                    _should_stop = True
                    logger.info(
                        "Embedding training: stop signal received (save=%s)",
                        _save_on_stop,
                    )
                    return
            except _queue.Empty:
                continue
            except (EOFError, OSError):
                return

    stop_thread = threading.Thread(target = _poll_stop, daemon = True)
    stop_thread.start()

    # ── 2. Load model ──
    _send_status(event_queue, "Loading embedding model...")
    try:
        hf_token = config.get("hf_token", "")
        hf_token = hf_token if hf_token and hf_token.strip() else None
        max_seq_length = config.get("max_seq_length", 512)
        training_type = config.get("training_type", "LoRA/QLoRA")
        use_lora = training_type == "LoRA/QLoRA"

        model = FastSentenceTransformer.from_pretrained(
            model_name = model_name,
            max_seq_length = max_seq_length,
            full_finetuning = not use_lora,
            token = hf_token,
        )
    except Exception as e:
        event_queue.put(
            {
                "type": "error",
                "error": f"Failed to load embedding model '{model_name}': {e}",
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            }
        )
        return

    if _should_stop:
        event_queue.put({"type": "complete", "output_dir": None, "ts": time.time()})
        return

    # ── 3. Apply LoRA ──
    if use_lora:
        _send_status(event_queue, "Configuring LoRA adapters (FEATURE_EXTRACTION)...")
        try:
            gradient_checkpointing = config.get("gradient_checkpointing", False)
            # Normalize: "none" or empty → False
            if gradient_checkpointing in ("none", "", None):
                gradient_checkpointing = False

            model = FastSentenceTransformer.get_peft_model(
                model,
                r = config.get("lora_r", 32),
                target_modules = config.get("target_modules")
                or ["q_proj", "k_proj", "v_proj", "o_proj"],
                lora_alpha = config.get("lora_alpha", 64),
                lora_dropout = config.get("lora_dropout", 0.0),
                bias = "none",
                use_gradient_checkpointing = gradient_checkpointing,
                random_state = config.get("random_seed", 3407),
                use_rslora = config.get("use_rslora", False),
                loftq_config = {"loftq_bits": 4, "loftq_iter": 1}
                if config.get("use_loftq")
                else None,
                task_type = "FEATURE_EXTRACTION",
            )
        except Exception as e:
            event_queue.put(
                {
                    "type": "error",
                    "error": f"Failed to configure LoRA for embedding model: {e}",
                    "stack": traceback.format_exc(limit = 20),
                    "ts": time.time(),
                }
            )
            return

    if _should_stop:
        event_queue.put({"type": "complete", "output_dir": None, "ts": time.time()})
        return

    # ── 4. Load dataset ──
    _send_status(event_queue, "Loading dataset...")
    try:
        hf_dataset = config.get("hf_dataset", "")
        local_datasets = config.get("local_datasets") or []
        subset = config.get("subset") or None
        train_split = config.get("train_split", "train") or "train"

        if hf_dataset and hf_dataset.strip():
            hf_token = config.get("hf_token", "")
            hf_token = hf_token if hf_token and hf_token.strip() else None
            dataset = load_dataset(
                hf_dataset.strip(),
                subset,
                split = train_split,
                token = hf_token,
            )
        elif local_datasets:
            # Load from local file(s) — mirrors the non-embedding pipeline's
            # directory handling so recipe outputs (parquet-files/) work.
            all_files: list[str] = []
            for dataset_file in local_datasets:
                file_path = (
                    dataset_file
                    if os.path.isabs(dataset_file)
                    else os.path.join(
                        str(datasets_root()),
                        dataset_file,
                    )
                )
                if os.path.isdir(file_path):
                    file_path_obj = Path(file_path)
                    parquet_dir = (
                        file_path_obj / "parquet-files"
                        if (file_path_obj / "parquet-files").exists()
                        else file_path_obj
                    )
                    parquet_files = sorted(parquet_dir.glob("*.parquet"))
                    if parquet_files:
                        all_files.extend(str(p) for p in parquet_files)
                        continue
                    candidates: list[Path] = []
                    for ext in (".json", ".jsonl", ".csv", ".parquet"):
                        candidates.extend(sorted(file_path_obj.glob(f"*{ext}")))
                    if candidates:
                        all_files.extend(str(c) for c in candidates)
                        continue
                    raise ValueError(
                        f"No supported data files in directory: {file_path_obj}"
                    )
                else:
                    all_files.append(file_path)

            if all_files:
                first_ext = Path(all_files[0]).suffix.lower()
                if first_ext in (".json", ".jsonl"):
                    loader = "json"
                elif first_ext == ".csv":
                    loader = "csv"
                elif first_ext == ".parquet":
                    loader = "parquet"
                else:
                    raise ValueError(
                        f"Unsupported local dataset format: {all_files[0]}"
                    )
                dataset = load_dataset(loader, data_files = all_files, split = "train")
        else:
            event_queue.put(
                {
                    "type": "error",
                    "error": "No dataset specified for embedding training.",
                    "stack": "",
                    "ts": time.time(),
                }
            )
            return

        # Apply dataset slicing if specified
        slice_start = config.get("dataset_slice_start")
        slice_end = config.get("dataset_slice_end")
        if slice_start is not None or slice_end is not None:
            start = slice_start if slice_start is not None else 0
            end = slice_end if slice_end is not None else len(dataset)
            dataset = dataset.select(range(start, min(end + 1, len(dataset))))

        logger.info(f"Embedding dataset loaded: {len(dataset)} samples")
    except Exception as e:
        event_queue.put(
            {
                "type": "error",
                "error": f"Failed to load dataset: {e}",
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            }
        )
        return

    if _should_stop:
        event_queue.put({"type": "complete", "output_dir": None, "ts": time.time()})
        return

    # ── 5. Create loss function ──
    loss = MultipleNegativesRankingLoss(model)

    # ── 6. Build training arguments ──
    _send_status(event_queue, "Configuring training...")
    try:
        lr_value = float(config.get("learning_rate", "2e-4"))
    except ValueError:
        event_queue.put(
            {
                "type": "error",
                "error": f"Invalid learning rate: {config.get('learning_rate')}",
                "stack": "",
                "ts": time.time(),
            }
        )
        return

    output_dir = config.get("output_dir")
    if not output_dir:
        output_dir = str(
            resolve_output_dir(f"{model_name.replace('/', '_')}_{int(time.time())}")
        )

    num_epochs = config.get("num_epochs", 2)
    batch_size = config.get("batch_size", 256)
    gradient_accumulation_steps = config.get("gradient_accumulation_steps", 1)
    max_steps_val = config.get("max_steps", 0)
    save_steps_val = config.get("save_steps", 0)
    warmup_ratio = config.get("warmup_ratio", 0.03)
    warmup_steps_val = config.get("warmup_steps")
    log_frequency = config.get("log_frequency", 50)

    # Build args dict
    training_args_kwargs = {
        "output_dir": output_dir,
        "per_device_train_batch_size": batch_size,
        "gradient_accumulation_steps": gradient_accumulation_steps,
        "learning_rate": lr_value,
        "fp16": not is_bfloat16_supported(),
        "bf16": is_bfloat16_supported(),
        "logging_steps": 1,
        "report_to": ["wandb"] if config.get("enable_wandb") else "none",
        "lr_scheduler_type": config.get("lr_scheduler_type", "linear"),
        "batch_sampler": BatchSamplers.NO_DUPLICATES,
        "optim": config.get("optim", "adamw_8bit"),
        "weight_decay": config.get("weight_decay", 0.01),
        "seed": config.get("random_seed", 3407),
    }

    # max_steps vs epochs
    if max_steps_val and max_steps_val > 0:
        training_args_kwargs["max_steps"] = max_steps_val
    else:
        training_args_kwargs["num_train_epochs"] = num_epochs if num_epochs > 0 else 2

    # warmup: prefer warmup_ratio (standard for embedding scripts), fallback to steps
    if warmup_ratio is not None and warmup_ratio > 0:
        training_args_kwargs["warmup_ratio"] = warmup_ratio
    elif warmup_steps_val is not None and warmup_steps_val > 0:
        training_args_kwargs["warmup_steps"] = warmup_steps_val

    # save_steps
    if save_steps_val and save_steps_val > 0:
        training_args_kwargs["save_steps"] = save_steps_val
        training_args_kwargs["save_strategy"] = "steps"

    args = SentenceTransformerTrainingArguments(**training_args_kwargs)

    # ── 7. Calculate total steps for progress tracking ──
    if max_steps_val and max_steps_val > 0:
        total_steps = max_steps_val
    else:
        effective_epochs = num_epochs if num_epochs > 0 else 2
        len_dataloader = math.ceil(len(dataset) / batch_size)
        steps_per_epoch = max(len_dataloader // gradient_accumulation_steps, 1)
        total_steps = steps_per_epoch * effective_epochs

    # ── 8. Create progress callback ──
    class _EmbeddingProgressCallback(TrainerCallback):
        """Sends training progress events to the parent process via event_queue."""

        def on_log(self, args, state, control, logs = None, **kwargs):
            if not logs:
                return
            loss_value = logs.get("loss", logs.get("train_loss", 0.0))
            current_step = state.global_step

            elapsed = time.time() - training_start_time
            eta = None
            if current_step > 0 and total_steps > 0:
                remaining = total_steps - current_step
                if remaining > 0:
                    eta = (elapsed / current_step) * remaining

            event_queue.put(
                {
                    "type": "progress",
                    "step": current_step,
                    "epoch": round(state.epoch, 2) if state.epoch else 0,
                    "loss": loss_value,
                    "learning_rate": logs.get("learning_rate", 0.0),
                    "total_steps": total_steps,
                    "elapsed_seconds": elapsed,
                    "eta_seconds": eta,
                    "grad_norm": logs.get("grad_norm"),
                    "num_tokens": getattr(state, "num_input_tokens_seen", None),
                    "eval_loss": logs.get("eval_loss"),
                    "status_message": "",
                    "ts": time.time(),
                }
            )

        def on_step_end(self, args, state, control, **kwargs):
            if _should_stop:
                logger.info("Embedding training: stop at step %d", state.global_step)
                control.should_training_stop = True
                return control

    # ── 9. Create trainer and train ──
    _send_status(event_queue, "Starting embedding training...")
    try:
        trainer = SentenceTransformerTrainer(
            model = model,
            train_dataset = dataset,
            loss = loss,
            args = args,
            callbacks = [_EmbeddingProgressCallback()],
        )

        trainer.train()
    except Exception as e:
        event_queue.put(
            {
                "type": "error",
                "error": f"Embedding training failed: {e}",
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            }
        )
        return

    # ── 10. Save model ──
    if _should_stop and not _save_on_stop:
        event_queue.put(
            {
                "type": "complete",
                "output_dir": None,
                "status_message": "Training cancelled",
                "ts": time.time(),
            }
        )
        return

    _send_status(event_queue, "Saving model...")
    try:
        model.save_pretrained(output_dir)
        model.tokenizer.save_pretrained(output_dir)
        logger.info("Embedding model saved to %s", output_dir)
    except Exception as e:
        logger.error("Failed to save embedding model: %s", e)
        event_queue.put(
            {
                "type": "error",
                "error": f"Training completed but failed to save: {e}",
                "stack": traceback.format_exc(limit = 20),
                "ts": time.time(),
            }
        )
        return

    # ── 11. Done ──
    event_queue.put(
        {
            "type": "complete",
            "output_dir": output_dir,
            "status_message": "Embedding training completed",
            "ts": time.time(),
        }
    )


================================================
FILE: studio/backend/loggers/.gitkeep
================================================


================================================
FILE: studio/backend/loggers/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from .handlers import get_logger

__all__ = ["get_logger"]


================================================
FILE: studio/backend/loggers/config.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""Logging configuration for structured logging with structlog.

This module provides centralized logging configuration with environment-specific
formats and processors. Supports both development and production environments
with consistent structured logging.

Key Features:
- Environment-specific formatting (JSON for production, console for development)
- Timestamp standardization (ISO format)
- Context variable integration
- Log level filtering
- Logger caching for performance
"""

import logging
import os
import sys
from typing import Optional

import structlog


class LogConfig:
    """Structured logging configuration for the application.

    Provides static method to configure structlog with environment-specific
    formatting and processors for consistent structured logging.
    """

    @staticmethod
    def setup_logging(
        service_name: str = "unsloth-studio-backend", env: Optional[str] = None
    ) -> structlog.BoundLogger:
        """Configure structured logging for the application.
        Args:
            service_name: Name of the service for logging identification
            env: Environment (development/production), affects logging format
        """
        # Determine log level from environment
        log_level_name = os.getenv("LOG_LEVEL", "INFO").upper()
        # Fallback to INFO if an invalid level is provided
        log_level = getattr(logging, log_level_name, logging.INFO)

        structlog.configure(
            processors = [
                # Reorder processors to control field order
                structlog.processors.TimeStamper(fmt = "iso"),  # timestamp first
                structlog.processors.add_log_level,  # level second
                structlog.contextvars.merge_contextvars,
                # Custom processor to flatten the extra field
                lambda logger, method_name, event_dict: {
                    "timestamp": event_dict.get("timestamp"),
                    "level": event_dict.get("level"),
                    "event": event_dict.get("event"),
                    **(event_dict.get("extra", {})),  # Flatten extra into main dict
                    **{
                        k: v
                        for k, v in event_dict.items()
                        if k not in ["timestamp", "level", "event", "extra"]
                    },
                },
                (
                    structlog.processors.JSONRenderer(sort_keys = False)  # Preserve order
                    if env == "production"
                    else structlog.dev.ConsoleRenderer()
                ),
            ],
            wrapper_class = structlog.make_filtering_bound_logger(log_level),
            logger_factory = structlog.PrintLoggerFactory(file = sys.stdout),
            cache_logger_on_first_use = True,
        )

        return structlog.get_logger(service_name)


================================================
FILE: studio/backend/loggers/handlers.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""Logging handlers and middleware for structured logging.

This module provides FastAPI middleware and structlog processors for:
- Request/response logging with timing
- Sensitive data filtering in logs
- Structured logging configuration
- Error handling with detailed context

Key Components:
- LoggingMiddleware: FastAPI middleware for request/response logging
- filter_sensitive_data: Structlog processor for data sanitization
- get_logger: Factory function for structured loggers
"""

import time
from typing import Callable

import structlog
from fastapi import Request, Response
from starlette.middleware.base import BaseHTTPMiddleware

logger = structlog.get_logger(__name__)


class LoggingMiddleware(BaseHTTPMiddleware):
    async def dispatch(self, request: Request, call_next: Callable) -> Response:
        start_time = time.time()

        try:
            response = await call_next(request)

            # Log response
            process_time = (time.time() - start_time) * 1000

            EXCLUDED_PATHS = {
                "/api/train/status",
                "/api/train/metrics",
                "/api/train/hardware",
                "/api/system",
            }
            is_excluded = (
                request.url.path in EXCLUDED_PATHS
                or request.url.path.startswith("/assets/")
                or request.url.path.endswith(
                    (".png", ".jpg", ".jpeg", ".ico", ".woff", ".woff2", ".ttf")
                )
            )

            if not is_excluded:
                logger.info(
                    "request_completed",
                    method = request.method,
                    path = request.url.path,
                    status_code = response.status_code,
                    process_time_ms = round(process_time, 2),
                )

            return response

        except Exception as e:
            logger.error(
                "request_failed",
                path = request.url.path,
                method = request.method,
                error = str(e),
                exc_info = True,
            )
            raise


def filter_sensitive_data(logger, method_name, event_dict):
    """Structlog processor to filter out base64 data from logs."""

    def filter_value(value):
        if (
            isinstance(value, str)
            and len(value) > 100
            and ("," in value or "/" in value)
        ):
            # Likely base64 data, truncate it
            return value[:20] + "..."
        elif isinstance(value, dict):
            return {k: filter_value(v) for k, v in value.items()}
        elif isinstance(value, list):
            return [filter_value(item) for item in value]
        return value

    return {k: filter_value(v) for k, v in event_dict.items()}


def get_logger(name: str) -> structlog.BoundLogger:
    """Get a logger instance for a specific module.
    Args:
        name: Usually __name__ of the module
    Returns:
        A bound structured logger
    """
    return structlog.get_logger(name)


================================================
FILE: studio/backend/main.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Main FastAPI application for Unsloth UI Backend
"""

import os

# Suppress annoying C-level dependency warnings globally
os.environ["PYTHONWARNINGS"] = "ignore"

import shutil
import sys
import warnings
from contextlib import asynccontextmanager

# Suppress annoying dependency warnings in production
if os.getenv("ENVIRONMENT_TYPE", "production") == "production":
    warnings.filterwarnings("ignore")
    # Alternatively, you can be more specific:
    # warnings.filterwarnings("ignore", category=DeprecationWarning)
    # warnings.filterwarnings("ignore", module="triton.*")

from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse, HTMLResponse, Response
from pathlib import Path
from datetime import datetime

# Import routers
from routes import (
    auth_router,
    data_recipe_router,
    datasets_router,
    export_router,
    inference_router,
    models_router,
    training_router,
)
from auth import storage
from utils.hardware import detect_hardware, get_device, DeviceType
import utils.hardware.hardware as _hw_module

from utils.cache_cleanup import clear_unsloth_compiled_cache


@asynccontextmanager
async def lifespan(app: FastAPI):
    """Startup: detect hardware, seed default admin if needed. Shutdown: clean up compiled cache."""
    # Clean up any stale compiled cache from previous runs
    clear_unsloth_compiled_cache()

    # Remove stale .venv_overlay from previous versions — no longer used.
    # Version switching now uses .venv_t5/ (pre-installed by setup.sh).
    overlay_dir = Path(__file__).resolve().parent.parent.parent / ".venv_overlay"
    if overlay_dir.is_dir():
        shutil.rmtree(overlay_dir, ignore_errors = True)

    # Detect hardware first — sets DEVICE global used everywhere
    detect_hardware()

    # Pre-cache the helper GGUF model for LLM-assisted dataset detection.
    # Runs in a background thread so it doesn't block server startup.
    import threading

    def _precache():
        try:
            from utils.datasets.llm_assist import precache_helper_gguf

            precache_helper_gguf()
        except Exception:
            pass  # non-critical

    threading.Thread(target = _precache, daemon = True).start()

    if storage.ensure_default_admin():
        bootstrap_pw = storage.get_bootstrap_password()
        app.state.bootstrap_password = bootstrap_pw
        print("\n" + "=" * 60)
        print("DEFAULT ADMIN ACCOUNT CREATED")
        print(
            "Sign in with the seeded credentials and change the password immediately:\n"
        )
        print(f"    username: {storage.DEFAULT_ADMIN_USERNAME}")
        print(f"    password: {bootstrap_pw}\n")
        print("=" * 60 + "\n")
    else:
        app.state.bootstrap_password = storage.get_bootstrap_password()
    yield
    # Cleanup
    _hw_module.DEVICE = None
    clear_unsloth_compiled_cache()


# Create FastAPI app
app = FastAPI(
    title = "Unsloth UI Backend",
    version = "1.0.0",
    description = "Backend API for Unsloth UI - Training and Model Management",
    lifespan = lifespan,
)

# Initialize structured logging
from loggers.config import LogConfig
from loggers.handlers import LoggingMiddleware

logger = LogConfig.setup_logging(
    service_name = "unsloth-studio-backend",
    env = os.getenv("ENVIRONMENT_TYPE", "production"),
)

app.add_middleware(LoggingMiddleware)

# CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins = ["*"],  # In production, specify allowed origins
    allow_credentials = True,
    allow_methods = ["*"],
    allow_headers = ["*"],
)

# ============ Register API Routes ============

# Register routers
app.include_router(auth_router, prefix = "/api/auth", tags = ["auth"])
app.include_router(training_router, prefix = "/api/train", tags = ["training"])
app.include_router(models_router, prefix = "/api/models", tags = ["models"])
app.include_router(inference_router, prefix = "/api/inference", tags = ["inference"])

# OpenAI-compatible endpoints: mount the same inference router at /v1
# so external tools (Open WebUI, SillyTavern, etc.) can use the
# standard /v1/chat/completions path.
app.include_router(inference_router, prefix = "/v1", tags = ["openai-compat"])
app.include_router(datasets_router, prefix = "/api/datasets", tags = ["datasets"])
app.include_router(data_recipe_router, prefix = "/api/data-recipe", tags = ["data-recipe"])
app.include_router(export_router, prefix = "/api/export", tags = ["export"])


# ============ Health and System Endpoints ============


@app.get("/api/health")
async def health_check():
    """Health check endpoint"""
    platform_map = {"darwin": "mac", "win32": "windows", "linux": "linux"}
    device_type = platform_map.get(sys.platform, sys.platform)

    return {
        "status": "healthy",
        "timestamp": datetime.now().isoformat(),
        "service": "Unsloth UI Backend",
        "device_type": device_type,
        "chat_only": _hw_module.CHAT_ONLY,
    }


@app.get("/api/system")
async def get_system_info():
    """Get system information"""
    import platform
    import subprocess
    import psutil
    from utils.hardware import get_device, get_gpu_memory_info, DeviceType

    # GPU Info — query nvidia-smi for physical GPUs, filtered by
    # CUDA_VISIBLE_DEVICES when set (the frontend uses this for GGUF
    # fit estimation and llama-server respects CVD too).
    import os

    gpu_info: dict = {"available": False, "devices": []}

    device = get_device()
    if device == DeviceType.CUDA:
        # Parse CUDA_VISIBLE_DEVICES allowlist
        allowed_indices = None
        cvd = os.environ.get("CUDA_VISIBLE_DEVICES")
        if cvd is not None and cvd.strip():
            try:
                allowed_indices = set(int(x.strip()) for x in cvd.split(","))
            except ValueError:
                pass  # Non-numeric (e.g. GPU-uuid), show all

        try:
            result = subprocess.run(
                [
                    "nvidia-smi",
                    "--query-gpu=index,name,memory.total",
                    "--format=csv,noheader,nounits",
                ],
                capture_output = True,
                text = True,
                timeout = 10,
            )
            if result.returncode == 0:
                for line in result.stdout.strip().splitlines():
                    parts = [p.strip() for p in line.split(",")]
                    if len(parts) == 3:
                        idx = int(parts[0])
                        if allowed_indices is not None and idx not in allowed_indices:
                            continue
                        gpu_info["devices"].append(
                            {
                                "index": idx,
                                "name": parts[1],
                                "memory_total_gb": round(int(parts[2]) / 1024, 2),
                            }
                        )
                gpu_info["available"] = len(gpu_info["devices"]) > 0
        except Exception:
            pass

    # Fallback to torch-based single-GPU detection
    if not gpu_info["available"]:
        mem_info = get_gpu_memory_info()
        if mem_info.get("available"):
            gpu_info["available"] = True
            gpu_info["devices"].append(
                {
                    "index": mem_info.get("device", 0),
                    "name": mem_info.get("device_name", "Unknown"),
                    "memory_total_gb": round(mem_info.get("total_gb", 0), 2),
                }
            )

    # CPU & Memory
    memory = psutil.virtual_memory()

    return {
        "platform": platform.platform(),
        "python_version": platform.python_version(),
        "device_backend": get_device().value,
        "cpu_count": psutil.cpu_count(),
        "memory": {
            "total_gb": round(memory.total / 1e9, 2),
            "available_gb": round(memory.available / 1e9, 2),
            "percent_used": memory.percent,
        },
        "gpu": gpu_info,
    }


@app.get("/api/system/hardware")
async def get_hardware_info():
    """Return GPU name, total VRAM, and key ML package versions."""
    from utils.hardware import get_gpu_summary, get_package_versions

    return {
        "gpu": get_gpu_summary(),
        "versions": get_package_versions(),
    }


# ============ Serve Frontend (Optional) ============


def _strip_crossorigin(html_bytes: bytes) -> bytes:
    """Remove ``crossorigin`` attributes from script/link tags.

    Vite adds ``crossorigin`` by default which forces CORS mode on font
    subresource loads.  When Studio is served over plain HTTP, Firefox
    HTTPS-Only Mode does not exempt CORS font requests -- causing all
    @font-face downloads to fail silently.  Stripping the attribute
    makes them regular same-origin fetches that work on any protocol.
    """
    import re as _re

    html = html_bytes.decode("utf-8")
    html = _re.sub(r'\s+crossorigin(?:="[^"]*")?', "", html)
    return html.encode("utf-8")


def _inject_bootstrap(html_bytes: bytes, app: FastAPI) -> bytes:
    """Inject bootstrap credentials into HTML when password change is required.

    The script tag is only injected while the default admin account still
    has ``must_change_password=True``.  Once the user changes the password
    the HTML is served clean — no credentials leak.
    """
    import json as _json

    if not storage.requires_password_change(storage.DEFAULT_ADMIN_USERNAME):
        return html_bytes

    bootstrap_pw = getattr(app.state, "bootstrap_password", None)
    if not bootstrap_pw:
        return html_bytes

    payload = _json.dumps(
        {
            "username": storage.DEFAULT_ADMIN_USERNAME,
            "password": bootstrap_pw,
        }
    )
    tag = f"<script>window.__UNSLOTH_BOOTSTRAP__={payload}</script>"
    html = html_bytes.decode("utf-8")
    html = html.replace("</head>", f"{tag}</head>", 1)
    return html.encode("utf-8")


def setup_frontend(app: FastAPI, build_path: Path):
    """Mount frontend static files (optional)"""
    if not build_path.exists():
        return False

    # Mount assets
    assets_dir = build_path / "assets"
    if assets_dir.exists():
        app.mount("/assets", StaticFiles(directory = assets_dir), name = "assets")

    @app.get("/")
    async def serve_root():
        content = (build_path / "index.html").read_bytes()
        content = _strip_crossorigin(content)
        content = _inject_bootstrap(content, app)
        return Response(
            content = content,
            media_type = "text/html",
            headers = {"Cache-Control": "no-cache, no-store, must-revalidate"},
        )

    @app.get("/{full_path:path}")
    async def serve_frontend(full_path: str):
        if full_path.startswith("api"):
            return {"error": "API endpoint not found"}

        file_path = (build_path / full_path).resolve()

        # Block path traversal — ensure resolved path stays inside build_path
        if not file_path.is_relative_to(build_path.resolve()):
            return Response(status_code = 403)

        if file_path.is_file():
            return FileResponse(file_path)

        # Serve index.html as bytes — avoids Content-Length mismatch
        content = (build_path / "index.html").read_bytes()
        content = _strip_crossorigin(content)
        content = _inject_bootstrap(content, app)
        return Response(
            content = content,
            media_type = "text/html",
            headers = {"Cache-Control": "no-cache, no-store, must-revalidate"},
        )

    return True


================================================
FILE: studio/backend/models/.gitkeep
================================================


================================================
FILE: studio/backend/models/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Pydantic models for API request/response schemas
"""

from .training import (
    TrainingStartRequest,
    TrainingJobResponse,
    TrainingStatus,
    TrainingProgress,
)
from .models import (
    CheckpointInfo,
    ModelCheckpoints,
    CheckpointListResponse,
    ModelDetails,
    LocalModelInfo,
    LocalModelListResponse,
    LoRAInfo,
    LoRAScanResponse,
    ModelListResponse,
)
from .auth import (
    AuthLoginRequest,
    RefreshTokenRequest,
    AuthStatusResponse,
    ChangePasswordRequest,
)
from .export import (
    LoadCheckpointRequest,
    ExportStatusResponse,
    ExportOperationResponse,
    ExportMergedModelRequest,
    ExportBaseModelRequest,
    ExportGGUFRequest,
    ExportLoRAAdapterRequest,
)
from .users import Token
from .datasets import (
    CheckFormatRequest,
    CheckFormatResponse,
)
from .inference import (
    LoadRequest,
    UnloadRequest,
    GenerateRequest,
    LoadResponse,
    UnloadResponse,
    InferenceStatusResponse,
)
from .responses import (
    TrainingStopResponse,
    TrainingMetricsResponse,
    LoRABaseModelResponse,
    VisionCheckResponse,
    EmbeddingCheckResponse,
)
from .data_recipe import (
    RecipePayload,
    PreviewResponse,
    ValidateError,
    ValidateResponse,
    JobCreateResponse,
)

__all__ = [
    # Training schemas
    "TrainingStartRequest",
    "TrainingJobResponse",
    "TrainingStatus",
    "TrainingProgress",
    # Model management schemas
    "ModelDetails",
    "LocalModelInfo",
    "LocalModelListResponse",
    "LoRAInfo",
    "LoRAScanResponse",
    "ModelListResponse",
    # Auth schemas
    "AuthLoginRequest",
    "RefreshTokenRequest",
    "AuthStatusResponse",
    "ChangePasswordRequest",
    # Export schemas
    "CheckpointInfo",
    "ModelCheckpoints",
    "CheckpointListResponse",
    "LoadCheckpointRequest",
    "ExportStatusResponse",
    "ExportOperationResponse",
    "ExportMergedModelRequest",
    "ExportBaseModelRequest",
    "ExportGGUFRequest",
    "ExportLoRAAdapterRequest",
    "Token",
    # Dataset schemas
    "CheckFormatRequest",
    "CheckFormatResponse",
    # Inference schemas
    "LoadRequest",
    "UnloadRequest",
    "GenerateRequest",
    "LoadResponse",
    "UnloadResponse",
    "InferenceStatusResponse",
    # Response schemas
    "TrainingStopResponse",
    "TrainingMetricsResponse",
    "LoRABaseModelResponse",
    "VisionCheckResponse",
    "EmbeddingCheckResponse",
    # Data recipe
    "RecipePayload",
    "PreviewResponse",
    "ValidateError",
    "ValidateResponse",
    "JobCreateResponse",
]


================================================
FILE: studio/backend/models/auth.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Pydantic schemas for Authentication API
"""

from pydantic import BaseModel, Field


class AuthLoginRequest(BaseModel):
    """Login payload: username/password to obtain a JWT."""

    username: str = Field(..., description = "Username")
    password: str = Field(..., description = "Password")


class RefreshTokenRequest(BaseModel):
    """Refresh token payload to obtain new access + refresh tokens."""

    refresh_token: str = Field(
        ..., description = "Refresh token from a previous login or refresh"
    )


class AuthStatusResponse(BaseModel):
    """Indicate whether the seeded admin auth flow is ready."""

    initialized: bool = Field(
        ..., description = "True if the auth database contains a login user"
    )
    default_username: str = Field(..., description = "Default seeded admin username")
    requires_password_change: bool = Field(
        ...,
        description = "True if the seeded admin must still change the default password",
    )


class ChangePasswordRequest(BaseModel):
    """Change the current user's password, typically on first login."""

    current_password: str = Field(
        ..., min_length = 8, description = "Existing password for the authenticated user"
    )
    new_password: str = Field(
        ..., min_length = 8, description = "Replacement password (minimum 8 characters)"
    )


================================================
FILE: studio/backend/models/data_recipe.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Pydantic schemas for Data Recipe (DataDesigner) API.
"""

from __future__ import annotations

from typing import Any

from pydantic import BaseModel, Field


class RecipePayload(BaseModel):
    recipe: dict[str, Any] = Field(default_factory = dict)
    run: dict[str, Any] | None = None
    ui: dict[str, Any] | None = None


class PreviewResponse(BaseModel):
    dataset: list[dict[str, Any]] = Field(default_factory = list)
    processor_artifacts: dict[str, Any] | None = None
    analysis: dict[str, Any] | None = None


class ValidateError(BaseModel):
    message: str
    path: str | None = None
    code: str | None = None


class ValidateResponse(BaseModel):
    valid: bool
    errors: list[ValidateError] = Field(default_factory = list)
    raw_detail: str | None = None


class JobCreateResponse(BaseModel):
    job_id: str


class PublishDatasetRequest(BaseModel):
    repo_id: str = Field(min_length = 3, description = "Hugging Face dataset repo ID")
    description: str = Field(
        min_length = 1,
        max_length = 4000,
        description = "Short dataset description for the dataset card",
    )
    hf_token: str | None = Field(
        default = None,
        description = "Optional Hugging Face token for private or write-protected repos",
    )
    private: bool = Field(
        default = False,
        description = "Create or update the dataset repo as private",
    )
    artifact_path: str | None = Field(
        default = None,
        description = "Execution artifact path captured by the UI for completed runs",
    )


class PublishDatasetResponse(BaseModel):
    success: bool = True
    url: str
    message: str


class SeedInspectRequest(BaseModel):
    dataset_name: str = Field(min_length = 1)
    hf_token: str | None = None
    subset: str | None = None
    split: str | None = "train"
    preview_size: int = Field(default = 10, ge = 1, le = 50)


class SeedInspectUploadRequest(BaseModel):
    filename: str = Field(min_length = 1)
    content_base64: str = Field(min_length = 1)
    preview_size: int = Field(default = 10, ge = 1, le = 50)
    seed_source_type: str | None = None
    unstructured_chunk_size: int | None = Field(default = None, ge = 1, le = 20000)
    unstructured_chunk_overlap: int | None = Field(default = None, ge = 0, le = 20000)


class SeedInspectResponse(BaseModel):
    dataset_name: str
    resolved_path: str
    columns: list[str] = Field(default_factory = list)
    preview_rows: list[dict[str, Any]] = Field(default_factory = list)
    split: str | None = None
    subset: str | None = None


class McpToolsListRequest(BaseModel):
    mcp_providers: list[dict[str, Any]] = Field(default_factory = list)
    timeout_sec: float | None = Field(default = None, gt = 0)


class McpToolsProviderResult(BaseModel):
    name: str
    tools: list[str] = Field(default_factory = list)
    error: str | None = None


class McpToolsListResponse(BaseModel):
    providers: list[McpToolsProviderResult] = Field(default_factory = list)
    duplicate_tools: dict[str, list[str]] = Field(default_factory = dict)


================================================
FILE: studio/backend/models/datasets.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Dataset-related Pydantic models for API requests and responses.
"""

from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Field, model_validator


class CheckFormatRequest(BaseModel):
    """Request for dataset format check"""

    dataset_name: str  # HuggingFace dataset name or local path
    is_vlm: bool = False
    hf_token: Optional[str] = None
    subset: Optional[str] = None
    train_split: Optional[str] = "train"

    @model_validator(mode = "before")
    @classmethod
    def _compat_split(cls, values: Any) -> Any:
        """Accept legacy 'split' field as alias for 'train_split'."""
        if isinstance(values, dict) and "split" in values:
            values.setdefault("train_split", values.pop("split"))
        return values


class CheckFormatResponse(BaseModel):
    """Response for dataset format check"""

    requires_manual_mapping: bool
    detected_format: str
    columns: List[str]
    is_image: bool = False
    is_audio: bool = False
    multimodal_columns: Optional[List[str]] = None
    suggested_mapping: Optional[Dict[str, str]] = None
    detected_image_column: Optional[str] = None
    detected_audio_column: Optional[str] = None
    detected_text_column: Optional[str] = None
    detected_speaker_column: Optional[str] = None
    preview_samples: Optional[List[Dict]] = None
    total_rows: Optional[int] = None
    warning: Optional[str] = None


class AiAssistMappingRequest(BaseModel):
    """Request for LLM-assisted column classification (user-triggered)."""

    columns: List[str]
    samples: List[Dict[str, Any]]  # Preview rows already loaded in the dialog
    dataset_name: Optional[str] = None  # For LLM context
    hf_token: Optional[str] = None  # For fetching dataset card
    model_name: Optional[str] = None
    model_type: Optional[str] = None


class AiAssistMappingResponse(BaseModel):
    """Response from LLM-assisted column classification and conversion advice."""

    success: bool
    suggested_mapping: Optional[Dict[str, str]] = None
    warning: Optional[str] = None
    # Conversion advisor fields
    system_prompt: Optional[str] = None
    label_mapping: Optional[Dict[str, Dict[str, str]]] = None
    dataset_type: Optional[str] = None
    is_conversational: Optional[bool] = None
    user_notification: Optional[str] = None


class UploadDatasetResponse(BaseModel):
    """Response with stored dataset path for training."""

    filename: str = Field(..., description = "Original filename")
    stored_path: str = Field(..., description = "Absolute path stored on backend")


class LocalDatasetItem(BaseModel):
    class Metadata(BaseModel):
        actual_num_records: Optional[int] = None
        target_num_records: Optional[int] = None
        total_num_batches: Optional[int] = None
        num_completed_batches: Optional[int] = None
        columns: Optional[List[str]] = None

    id: str
    label: str
    path: str
    rows: Optional[int] = None
    updated_at: Optional[float] = None
    metadata: Optional[Metadata] = None


class LocalDatasetsResponse(BaseModel):
    datasets: List[LocalDatasetItem] = Field(default_factory = list)


================================================
FILE: studio/backend/models/export.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Pydantic schemas for Export API.
"""

from pydantic import BaseModel, Field
from typing import List, Optional, Literal, Dict, Any


class LoadCheckpointRequest(BaseModel):
    """Request for loading a checkpoint into the export backend."""

    checkpoint_path: str = Field(..., description = "Path to the checkpoint directory")
    max_seq_length: int = Field(
        2048,
        ge = 128,
        le = 32768,
        description = "Maximum sequence length for loading the model",
    )
    load_in_4bit: bool = Field(
        True,
        description = "Whether to load the model in 4-bit quantization",
    )
    trust_remote_code: bool = Field(
        False,
        description = "Allow loading models with custom code. Only enable for checkpoints/base models you trust.",
    )


class ExportStatusResponse(BaseModel):
    """Current export backend status."""

    current_checkpoint: Optional[str] = Field(
        None,
        description = "Path to the currently loaded checkpoint, if any",
    )
    is_vision: bool = Field(
        False,
        description = "True if the loaded checkpoint is a vision model",
    )
    is_peft: bool = Field(
        False,
        description = "True if the loaded checkpoint is a PEFT (LoRA) model",
    )


class ExportOperationResponse(BaseModel):
    """Generic response for export operations."""

    success: bool = Field(..., description = "True if the operation succeeded")
    message: str = Field(..., description = "Human-readable status or error message")
    details: Optional[Dict[str, Any]] = Field(
        default = None,
        description = "Optional extra details about the operation",
    )


class ExportCommonOptions(BaseModel):
    """Common options for export operations that save locally and/or push to Hub."""

    save_directory: str = Field(
        ...,
        description = "Local directory where the exported artifacts will be written",
    )
    push_to_hub: bool = Field(
        False,
        description = "If True, also push the exported model to the Hugging Face Hub",
    )
    repo_id: Optional[str] = Field(
        None,
        description = "Hugging Face Hub repository ID (username/model-name)",
    )
    hf_token: Optional[str] = Field(
        None,
        description = "Hugging Face access token used for Hub operations",
    )
    private: bool = Field(
        False,
        description = "If True, create a private repository on the Hub (where applicable)",
    )
    base_model_id: Optional[str] = Field(
        None,
        description = "HuggingFace model ID of the base model (for model card metadata)",
    )


class ExportMergedModelRequest(ExportCommonOptions):
    """Request for exporting a merged PEFT model."""

    format_type: Literal["16-bit (FP16)", "4-bit (FP4)"] = Field(
        "16-bit (FP16)",
        description = "Export precision / format for the merged model",
    )


class ExportBaseModelRequest(ExportCommonOptions):
    """Request for exporting a non-PEFT (base) model."""

    # Uses fields from ExportCommonOptions only


class ExportGGUFRequest(BaseModel):
    """Request for exporting the current model to GGUF format."""

    save_directory: str = Field(
        ...,
        description = "Directory where GGUF files will be saved",
    )
    quantization_method: str = Field(
        "Q4_K_M",
        description = 'GGUF quantization method (e.g. "Q4_K_M")',
    )
    push_to_hub: bool = Field(
        False,
        description = "If True, also push GGUF artifacts to the Hugging Face Hub",
    )
    repo_id: Optional[str] = Field(
        None,
        description = "Hugging Face Hub repository ID for GGUF upload",
    )
    hf_token: Optional[str] = Field(
        None,
        description = "Hugging Face token for GGUF upload",
    )


class ExportLoRAAdapterRequest(ExportCommonOptions):
    """Request for exporting only the LoRA adapter (not merged)."""

    # Uses fields from ExportCommonOptions only


================================================
FILE: studio/backend/models/inference.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Pydantic schemas for Inference API
"""

from __future__ import annotations

import time
import uuid
from typing import Annotated, Any, Dict, Literal, Optional, List, Union

from pydantic import BaseModel, Discriminator, Field, Tag


class LoadRequest(BaseModel):
    """Request to load a model for inference"""

    model_path: str = Field(..., description = "Model identifier or local path")
    hf_token: Optional[str] = Field(
        None, description = "HuggingFace token for gated models"
    )
    max_seq_length: int = Field(
        4096, ge = 128, le = 32768, description = "Maximum sequence length"
    )
    load_in_4bit: bool = Field(True, description = "Load model in 4-bit quantization")
    is_lora: bool = Field(False, description = "Whether this is a LoRA adapter")
    gguf_variant: Optional[str] = Field(
        None, description = "GGUF quantization variant (e.g. 'Q4_K_M')"
    )
    trust_remote_code: bool = Field(
        False,
        description = "Allow loading models with custom code (e.g. NVIDIA Nemotron). Only enable for repos you trust.",
    )
    chat_template_override: Optional[str] = Field(
        None,
        description = "Custom Jinja2 chat template to use instead of the model's default",
    )
    cache_type_kv: Optional[str] = Field(
        None,
        description = "KV cache data type for both K and V (e.g. 'f16', 'bf16', 'q8_0', 'q4_1', 'q5_1')",
    )


class UnloadRequest(BaseModel):
    """Request to unload a model"""

    model_path: str = Field(..., description = "Model identifier to unload")


class ValidateModelRequest(BaseModel):
    """
    Lightweight validation request to check whether a model identifier
    *can be resolved* into a ModelConfig.

    This does NOT actually load weights into GPU memory.
    """

    model_path: str = Field(..., description = "Model identifier or local path")
    hf_token: Optional[str] = Field(
        None, description = "HuggingFace token for gated models"
    )
    gguf_variant: Optional[str] = Field(
        None, description = "GGUF quantization variant (e.g. 'Q4_K_M')"
    )


class ValidateModelResponse(BaseModel):
    """
    Result of model validation.

    valid == True means ModelConfig.from_identifier() succeeded and basic
    introspection (GGUF / LoRA / vision flags) is available.
    """

    valid: bool = Field(..., description = "Whether the model identifier looks valid")
    message: str = Field(..., description = "Human-readable validation message")
    identifier: Optional[str] = Field(None, description = "Resolved model identifier")
    display_name: Optional[str] = Field(
        None, description = "Display name derived from identifier"
    )
    is_gguf: bool = Field(False, description = "Whether this is a GGUF model (llama.cpp)")
    is_lora: bool = Field(False, description = "Whether this is a LoRA adapter")
    is_vision: bool = Field(False, description = "Whether this is a vision-capable model")


class GenerateRequest(BaseModel):
    """Request for text generation (legacy /generate/stream endpoint)"""

    messages: List[dict] = Field(..., description = "Chat messages in OpenAI format")
    system_prompt: str = Field("", description = "System prompt")
    temperature: float = Field(0.6, ge = 0.0, le = 2.0, description = "Sampling temperature")
    top_p: float = Field(0.95, ge = 0.0, le = 1.0, description = "Top-p sampling")
    top_k: int = Field(20, ge = -1, le = 100, description = "Top-k sampling")
    max_new_tokens: int = Field(
        2048, ge = 1, le = 4096, description = "Maximum tokens to generate"
    )
    repetition_penalty: float = Field(
        1.0, ge = 1.0, le = 2.0, description = "Repetition penalty"
    )
    presence_penalty: float = Field(0.0, ge = 0.0, le = 2.0, description = "Presence penalty")
    image_base64: Optional[str] = Field(
        None, description = "Base64 encoded image for vision models"
    )


class LoadResponse(BaseModel):
    """Response after loading a model"""

    status: str = Field(..., description = "Load status")
    model: str = Field(..., description = "Model identifier")
    display_name: str = Field(..., description = "Display name of the model")
    is_vision: bool = Field(False, description = "Whether model is a vision model")
    is_lora: bool = Field(False, description = "Whether model is a LoRA adapter")
    is_gguf: bool = Field(
        False, description = "Whether model is a GGUF model (llama.cpp)"
    )
    is_audio: bool = Field(False, description = "Whether model is a TTS audio model")
    audio_type: Optional[str] = Field(
        None, description = "Audio codec type: snac, csm, bicodec, dac"
    )
    has_audio_input: bool = Field(
        False, description = "Whether model accepts audio input (ASR)"
    )
    inference: dict = Field(
        ..., description = "Inference parameters (temperature, top_p, top_k, min_p)"
    )
    context_length: Optional[int] = Field(
        None, description = "Model's native context length (from GGUF metadata)"
    )
    supports_reasoning: bool = Field(
        False,
        description = "Whether model supports thinking/reasoning mode (enable_thinking)",
    )
    supports_tools: bool = Field(
        False,
        description = "Whether model supports tool calling (web search, etc.)",
    )
    cache_type_kv: Optional[str] = Field(
        None,
        description = "KV cache data type for K and V (e.g. 'f16', 'bf16', 'q8_0')",
    )
    chat_template: Optional[str] = Field(
        None,
        description = "Jinja2 chat template string (from GGUF metadata or tokenizer)",
    )


class UnloadResponse(BaseModel):
    """Response after unloading a model"""

    status: str = Field(..., description = "Unload status")
    model: str = Field(..., description = "Model identifier that was unloaded")


class InferenceStatusResponse(BaseModel):
    """Current inference backend status"""

    active_model: Optional[str] = Field(
        None, description = "Currently active model identifier"
    )
    is_vision: bool = Field(
        False, description = "Whether the active model is a vision model"
    )
    is_gguf: bool = Field(
        False, description = "Whether the active model is a GGUF model (llama.cpp)"
    )
    gguf_variant: Optional[str] = Field(
        None, description = "GGUF quantization variant (e.g. Q4_K_M)"
    )
    is_audio: bool = Field(
        False, description = "Whether the active model is a TTS audio model"
    )
    audio_type: Optional[str] = Field(
        None, description = "Audio codec type: snac, csm, bicodec, dac"
    )
    has_audio_input: bool = Field(
        False, description = "Whether model accepts audio input (ASR)"
    )
    loading: List[str] = Field(
        default_factory = list, description = "Models currently being loaded"
    )
    loaded: List[str] = Field(
        default_factory = list, description = "Models currently loaded"
    )
    inference: Optional[Dict[str, Any]] = Field(
        None, description = "Recommended inference parameters for the active model"
    )
    supports_reasoning: bool = Field(
        False, description = "Whether the active model supports reasoning/thinking mode"
    )
    supports_tools: bool = Field(
        False, description = "Whether the active model supports tool calling"
    )
    context_length: Optional[int] = Field(
        None, description = "Context length of the active model"
    )


# =====================================================================
# OpenAI-Compatible Chat Completions Models
# =====================================================================


# ── Multimodal content parts (OpenAI vision format) ──────────────


class TextContentPart(BaseModel):
    """Text content part in a multimodal message."""

    type: Literal["text"]
    text: str


class ImageUrl(BaseModel):
    """Image URL object — supports data URIs and remote URLs."""

    url: str = Field(..., description = "data:image/png;base64,... or https://...")
    detail: Optional[Literal["auto", "low", "high"]] = "auto"


class ImageContentPart(BaseModel):
    """Image content part in a multimodal message."""

    type: Literal["image_url"]
    image_url: ImageUrl


def _content_part_discriminator(v):
    if isinstance(v, dict):
        return v.get("type")
    return getattr(v, "type", None)


ContentPart = Annotated[
    Union[
        Annotated[TextContentPart, Tag("text")],
        Annotated[ImageContentPart, Tag("image_url")],
    ],
    Discriminator(_content_part_discriminator),
]
"""Union type for multimodal content parts, discriminated by the 'type' field."""


# ── Messages ─────────────────────────────────────────────────────


class ChatMessage(BaseModel):
    """
    A single message in the conversation.

    ``content`` may be a plain string (text-only) or a list of
    content parts for multimodal messages (OpenAI vision format).
    """

    role: Literal["system", "user", "assistant"] = Field(
        ..., description = "Message role"
    )
    content: Union[str, list[ContentPart]] = Field(
        ..., description = "Message content (string or multimodal parts)"
    )


class ChatCompletionRequest(BaseModel):
    """
    OpenAI-compatible chat completion request.

    Extensions (non-OpenAI fields) are marked with 'x-unsloth'.
    """

    model: str = Field(
        "default",
        description = "Model identifier (informational; the active model is used)",
    )
    messages: list[ChatMessage] = Field(..., description = "Conversation messages")
    stream: bool = Field(True, description = "Whether to stream the response via SSE")
    temperature: float = Field(0.6, ge = 0.0, le = 2.0)
    top_p: float = Field(0.95, ge = 0.0, le = 1.0)
    max_tokens: Optional[int] = Field(
        None, ge = 1, description = "Maximum tokens to generate (None = until EOS)"
    )
    presence_penalty: float = Field(0.0, ge = 0.0, le = 2.0, description = "Presence penalty")

    # ── Unsloth extensions (ignored by standard OpenAI clients) ──
    top_k: int = Field(20, ge = -1, le = 100, description = "[x-unsloth] Top-k sampling")
    min_p: float = Field(
        0.01, ge = 0.0, le = 1.0, description = "[x-unsloth] Min-p sampling threshold"
    )
    repetition_penalty: float = Field(
        1.1, ge = 1.0, le = 2.0, description = "[x-unsloth] Repetition penalty"
    )
    image_base64: Optional[str] = Field(
        None, description = "[x-unsloth] Base64-encoded image for vision models"
    )
    audio_base64: Optional[str] = Field(
        None, description = "[x-unsloth] Base64-encoded WAV for audio-input models (ASR)"
    )
    use_adapter: Optional[Union[bool, str]] = Field(
        None,
        description = (
            "[x-unsloth] Adapter control for compare mode. "
            "null = no change (default), "
            "false = disable adapters (base model), "
            "true = enable the current adapter, "
            "string = enable a specific adapter by name."
        ),
    )
    enable_thinking: Optional[bool] = Field(
        None,
        description = "[x-unsloth] Enable/disable thinking/reasoning mode for supported models",
    )
    enable_tools: Optional[bool] = Field(
        None,
        description = "[x-unsloth] Enable tool calling for supported models",
    )
    enabled_tools: Optional[list[str]] = Field(
        None,
        description = "[x-unsloth] List of enabled tool names (e.g. ['web_search', 'python', 'terminal']). If None, all tools are enabled.",
    )
    auto_heal_tool_calls: Optional[bool] = Field(
        True,
        description = "[x-unsloth] Auto-detect and fix malformed tool calls from model output.",
    )
    max_tool_calls_per_message: Optional[int] = Field(
        10,
        ge = 0,
        description = "[x-unsloth] Maximum number of tool call iterations per message (0 = disabled, 9999 = unlimited).",
    )
    tool_call_timeout: Optional[int] = Field(
        300,
        ge = 1,
        description = "[x-unsloth] Timeout in seconds for each tool call execution (9999 = no limit).",
    )
    session_id: Optional[str] = Field(
        None,
        description = "[x-unsloth] Session/thread ID for scoping tool execution sandbox.",
    )


# ── Streaming response chunks ────────────────────────────────────


class ChoiceDelta(BaseModel):
    """Delta content for a streaming chunk."""

    role: Optional[str] = None
    content: Optional[str] = None


class ChunkChoice(BaseModel):
    """A single choice in a streaming chunk."""

    index: int = 0
    delta: ChoiceDelta
    finish_reason: Optional[Literal["stop", "length"]] = None


class ChatCompletionChunk(BaseModel):
    """A single SSE chunk in OpenAI streaming format."""

    id: str = Field(default_factory = lambda: f"chatcmpl-{uuid.uuid4().hex[:12]}")
    object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
    created: int = Field(default_factory = lambda: int(time.time()))
    model: str = "default"
    choices: list[ChunkChoice]


# ── Non-streaming response ───────────────────────────────────────


class CompletionMessage(BaseModel):
    """The assistant's complete response message."""

    role: Literal["assistant"] = "assistant"
    content: str


class CompletionChoice(BaseModel):
    """A single choice in a non-streaming response."""

    index: int = 0
    message: CompletionMessage
    finish_reason: Literal["stop", "length"] = "stop"


class CompletionUsage(BaseModel):
    """Token usage statistics (approximate)."""

    prompt_tokens: int = 0
    completion_tokens: int = 0
    total_tokens: int = 0


class ChatCompletion(BaseModel):
    """Non-streaming chat completion response."""

    id: str = Field(default_factory = lambda: f"chatcmpl-{uuid.uuid4().hex[:12]}")
    object: Literal["chat.completion"] = "chat.completion"
    created: int = Field(default_factory = lambda: int(time.time()))
    model: str = "default"
    choices: list[CompletionChoice]
    usage: CompletionUsage = Field(default_factory = CompletionUsage)


================================================
FILE: studio/backend/models/models.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Pydantic schemas for Model Management API
"""

from pydantic import BaseModel, Field
from typing import Optional, List, Dict, Any, Literal

ModelType = Literal["text", "vision", "audio", "embeddings"]


class CheckpointInfo(BaseModel):
    """Information about a discovered checkpoint directory."""

    display_name: str = Field(
        ..., description = "User-friendly checkpoint name (folder name)"
    )
    path: str = Field(..., description = "Full path to the checkpoint directory")
    loss: Optional[float] = Field(None, description = "Training loss at this checkpoint")


class ModelCheckpoints(BaseModel):
    """A training run and its associated checkpoints."""

    name: str = Field(..., description = "Training run folder name")
    checkpoints: List[CheckpointInfo] = Field(
        default_factory = list,
        description = "List of checkpoints for this training run (final + intermediate)",
    )
    base_model: Optional[str] = Field(
        None,
        description = "Base model name from adapter_config.json or config.json",
    )
    peft_type: Optional[str] = Field(
        None,
        description = "PEFT type (e.g. LORA) if adapter training, None for full fine-tune",
    )
    lora_rank: Optional[int] = Field(
        None,
        description = "LoRA rank (r) if applicable",
    )


class CheckpointListResponse(BaseModel):
    """Response for listing available checkpoints in an outputs directory."""

    outputs_dir: str = Field(..., description = "Directory that was scanned")
    models: List[ModelCheckpoints] = Field(
        default_factory = list,
        description = "List of training runs with their checkpoints",
    )


class ModelDetails(BaseModel):
    """Detailed model configuration and metadata - can be used for both list and detail views"""

    id: str = Field(..., description = "Model identifier")
    model_name: Optional[str] = Field(
        None, description = "Model identifier (alias for id, for backward compatibility)"
    )
    name: Optional[str] = Field(None, description = "Display name for the model")
    config: Optional[Dict[str, Any]] = Field(
        None, description = "Model configuration dictionary"
    )
    is_vision: bool = Field(False, description = "Whether model is a vision model")
    is_embedding: bool = Field(
        False, description = "Whether model is an embedding/sentence-transformer model"
    )
    is_lora: bool = Field(False, description = "Whether model is a LoRA adapter")
    is_gguf: bool = Field(
        False, description = "Whether model is a GGUF model (llama.cpp format)"
    )
    is_audio: bool = Field(False, description = "Whether model is a TTS audio model")
    audio_type: Optional[str] = Field(
        None, description = "Audio codec type: snac, csm, bicodec, dac"
    )
    has_audio_input: bool = Field(
        False, description = "Whether model accepts audio input (ASR)"
    )
    model_type: Optional[ModelType] = Field(
        None, description = "Collapsed model modality: text, vision, audio, or embeddings"
    )
    base_model: Optional[str] = Field(
        None, description = "Base model if this is a LoRA adapter"
    )
    max_position_embeddings: Optional[int] = Field(
        None, description = "Maximum context length supported by the model"
    )
    model_size_bytes: Optional[int] = Field(
        None, description = "Total size of model weight files in bytes"
    )


class LoRAInfo(BaseModel):
    """LoRA adapter or exported model information"""

    display_name: str = Field(..., description = "Display name for the LoRA")
    adapter_path: str = Field(
        ..., description = "Path to the LoRA adapter or exported model"
    )
    base_model: Optional[str] = Field(None, description = "Base model identifier")
    source: Optional[str] = Field(None, description = "'training' or 'exported'")
    export_type: Optional[str] = Field(
        None, description = "'lora', 'merged', or 'gguf' (for exports)"
    )


class LoRAScanResponse(BaseModel):
    """Response schema for scanning trained LoRA adapters"""

    loras: List[LoRAInfo] = Field(
        default_factory = list, description = "List of found LoRA adapters"
    )
    outputs_dir: str = Field(..., description = "Directory that was scanned")


class ModelListResponse(BaseModel):
    """Response schema for listing models"""

    models: List[ModelDetails] = Field(
        default_factory = list, description = "List of models"
    )
    default_models: List[str] = Field(
        default_factory = list, description = "List of default model IDs"
    )


class GgufVariantDetail(BaseModel):
    """A single GGUF quantization variant in a HuggingFace repo."""

    filename: str = Field(
        ..., description = "GGUF filename (e.g., 'gemma-3-4b-it-Q4_K_M.gguf')"
    )
    quant: str = Field(..., description = "Quantization label (e.g., 'Q4_K_M')")
    size_bytes: int = Field(0, description = "File size in bytes")
    downloaded: bool = Field(
        False, description = "Whether this variant is already in the local HF cache"
    )


class GgufVariantsResponse(BaseModel):
    """Response for listing GGUF quantization variants in a HuggingFace repo."""

    repo_id: str = Field(..., description = "HuggingFace repo ID")
    variants: List[GgufVariantDetail] = Field(
        default_factory = list, description = "Available GGUF variants"
    )
    has_vision: bool = Field(
        False, description = "Whether the model has vision support (mmproj files)"
    )
    default_variant: Optional[str] = Field(
        None, description = "Recommended default quantization variant"
    )


class LocalModelInfo(BaseModel):
    """Discovered local model candidate."""

    id: str = Field(..., description = "Identifier to use for loading/training")
    display_name: str = Field(..., description = "Display label")
    path: str = Field(..., description = "Local path where model data was discovered")
    source: Literal["models_dir", "hf_cache"] = Field(
        ...,
        description = "Discovery source",
    )
    model_id: Optional[str] = Field(
        None,
        description = "HF repo id for cached models, e.g. org/model",
    )
    updated_at: Optional[float] = Field(
        None,
        description = "Unix timestamp of latest observed update",
    )


class LocalModelListResponse(BaseModel):
    """Response schema for listing local/cached models."""

    models_dir: str = Field(
        ..., description = "Directory scanned for custom local models"
    )
    hf_cache_dir: Optional[str] = Field(
        None,
        description = "HF cache root that was scanned",
    )
    models: List[LocalModelInfo] = Field(
        default_factory = list,
        description = "Discovered local/cached models",
    )


================================================
FILE: studio/backend/models/responses.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Pydantic response schemas for endpoints that previously returned raw dicts.
These are small response models for training and model management routes.
"""

from pydantic import BaseModel, Field
from typing import Optional, List


# --- Training route response models ---


class TrainingStopResponse(BaseModel):
    """Response for stopping a training job"""

    status: str = Field(..., description = "Current status: 'stopped' or 'idle'")
    message: str = Field(..., description = "Human-readable status message")


class TrainingMetricsResponse(BaseModel):
    """Response for training metrics history"""

    loss_history: List[float] = Field(
        default_factory = list, description = "Loss values per step"
    )
    lr_history: List[float] = Field(
        default_factory = list, description = "Learning rate per step"
    )
    step_history: List[int] = Field(default_factory = list, description = "Step numbers")
    grad_norm_history: List[float] = Field(
        default_factory = list, description = "Gradient norm values"
    )
    grad_norm_step_history: List[int] = Field(
        default_factory = list, description = "Step numbers for gradient norm values"
    )
    current_loss: Optional[float] = Field(None, description = "Most recent loss value")
    current_lr: Optional[float] = Field(None, description = "Most recent learning rate")
    current_step: Optional[int] = Field(None, description = "Most recent step number")


# --- Model management route response models ---


class LoRABaseModelResponse(BaseModel):
    """Response for getting a LoRA's base model"""

    lora_path: str = Field(..., description = "Path to the LoRA adapter")
    base_model: str = Field(..., description = "Base model identifier")


class VisionCheckResponse(BaseModel):
    """Response for checking if a model is a vision model"""

    model_name: str = Field(..., description = "Model identifier")
    is_vision: bool = Field(..., description = "Whether the model is a vision model")


class EmbeddingCheckResponse(BaseModel):
    """Response for checking if a model is an embedding model"""

    model_name: str = Field(..., description = "Model identifier")
    is_embedding: bool = Field(
        ..., description = "Whether the model is an embedding/sentence-transformer model"
    )


================================================
FILE: studio/backend/models/training.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Pydantic schemas for Training API
"""

from pydantic import BaseModel, Field, model_validator
from typing import Any, Optional, List, Dict, Literal


class TrainingStartRequest(BaseModel):
    """Request schema for starting training"""

    # Model parameters
    model_name: str = Field(
        ..., description = "Model identifier (e.g., 'unsloth/llama-3-8b-bnb-4bit')"
    )
    training_type: str = Field(
        ..., description = "Training type: 'LoRA/QLoRA' or 'Full Finetuning'"
    )
    hf_token: Optional[str] = Field(None, description = "HuggingFace token")
    load_in_4bit: bool = Field(True, description = "Load model in 4-bit quantization")
    max_seq_length: int = Field(2048, description = "Maximum sequence length")
    trust_remote_code: bool = Field(
        False,
        description = "Allow loading models with custom code (e.g. NVIDIA Nemotron). Only enable for repos you trust.",
    )

    # Dataset parameters
    hf_dataset: Optional[str] = Field(
        None, description = "HuggingFace dataset identifier"
    )
    local_datasets: List[str] = Field(
        default_factory = list, description = "List of local dataset paths"
    )
    local_eval_datasets: List[str] = Field(
        default_factory = list, description = "List of local eval dataset paths"
    )
    format_type: str = Field(..., description = "Dataset format type")
    subset: Optional[str] = None
    train_split: Optional[str] = Field("train", description = "Training split name")
    eval_split: Optional[str] = Field(
        None, description = "Eval split name. None = auto-detect"
    )
    eval_steps: float = Field(
        0.00, description = "Fraction of total steps between evals (0-1)"
    )
    dataset_slice_start: Optional[int] = Field(
        None, description = "Inclusive start row index for dataset slicing"
    )
    dataset_slice_end: Optional[int] = Field(
        None, description = "Inclusive end row index for dataset slicing"
    )

    @model_validator(mode = "before")
    @classmethod
    def _compat_split(cls, values: Any) -> Any:
        """Accept legacy 'split' field as alias for 'train_split'."""
        if isinstance(values, dict) and "split" in values:
            values.setdefault("train_split", values.pop("split"))
        return values

    custom_format_mapping: Optional[Dict[str, Any]] = Field(
        None,
        description = (
            "User-provided column-to-role mapping, e.g. {'image': 'image', 'caption': 'text'} "
            "for VLM or {'instruction': 'user', 'output': 'assistant'} for LLM. "
            "Enhanced format includes __system_prompt, __user_template, "
            "__assistant_template, __label_mapping metadata keys."
        ),
    )
    # Training parameters
    num_epochs: int = Field(1, description = "Number of training epochs")
    learning_rate: str = Field("2e-4", description = "Learning rate")
    batch_size: int = Field(1, description = "Batch size")
    gradient_accumulation_steps: int = Field(
        1, description = "Gradient accumulation steps"
    )
    warmup_steps: Optional[int] = Field(None, description = "Warmup steps")
    warmup_ratio: Optional[float] = Field(None, description = "Warmup ratio")
    max_steps: Optional[int] = Field(None, description = "Maximum training steps")
    save_steps: int = Field(100, description = "Steps between checkpoints")
    weight_decay: float = Field(0.01, description = "Weight decay")
    random_seed: int = Field(42, description = "Random seed")
    packing: bool = Field(False, description = "Enable sequence packing")
    optim: str = Field("adamw_8bit", description = "Optimizer")
    lr_scheduler_type: str = Field("linear", description = "Learning rate scheduler type")

    # LoRA parameters
    use_lora: bool = Field(True, description = "Use LoRA (derived from training_type)")
    lora_r: int = Field(16, description = "LoRA rank")
    lora_alpha: int = Field(16, description = "LoRA alpha")
    lora_dropout: float = Field(0.0, description = "LoRA dropout")
    target_modules: List[str] = Field(
        default_factory = list, description = "Target modules for LoRA"
    )
    gradient_checkpointing: str = Field(
        "", description = "Gradient checkpointing setting"
    )
    use_rslora: bool = Field(False, description = "Use RSLoRA")
    use_loftq: bool = Field(False, description = "Use LoftQ")
    train_on_completions: bool = Field(False, description = "Train on completions only")

    # Vision-specific LoRA parameters
    finetune_vision_layers: bool = Field(False, description = "Finetune vision layers")
    finetune_language_layers: bool = Field(
        False, description = "Finetune language layers"
    )
    finetune_attention_modules: bool = Field(
        False, description = "Finetune attention modules"
    )
    finetune_mlp_modules: bool = Field(False, description = "Finetune MLP modules")
    is_dataset_image: bool = Field(
        False, description = "Whether the dataset contains image data"
    )
    is_dataset_audio: bool = Field(
        False, description = "Whether the dataset contains audio data"
    )
    is_embedding: bool = Field(
        False, description = "Whether model is an embedding/sentence-transformer model"
    )

    # Logging parameters
    enable_wandb: bool = Field(False, description = "Enable Weights & Biases logging")
    wandb_token: Optional[str] = Field(None, description = "W&B token")
    wandb_project: Optional[str] = Field(None, description = "W&B project name")
    enable_tensorboard: bool = Field(False, description = "Enable TensorBoard logging")
    tensorboard_dir: Optional[str] = Field(None, description = "TensorBoard directory")


class TrainingJobResponse(BaseModel):
    """Immediate response when training is initiated"""

    job_id: str = Field(..., description = "Unique training job identifier")
    status: Literal["queued", "error"] = Field(..., description = "Initial job status")
    message: str = Field(..., description = "Human-readable status message")
    error: Optional[str] = Field(None, description = "Error details if status is 'error'")


class TrainingStatus(BaseModel):
    """Current training job status - works for streaming or polling"""

    job_id: str = Field(..., description = "Training job identifier")
    phase: Literal[
        "idle",
        "loading_model",
        "loading_dataset",
        "configuring",
        "training",
        "completed",
        "error",
        "stopped",
    ] = Field(..., description = "Current phase of training pipeline")
    is_training_running: bool = Field(
        ..., description = "True if training loop is actively running"
    )
    eval_enabled: bool = Field(
        False,
        description = "True if evaluation dataset is configured for this training run",
    )
    message: str = Field(..., description = "Human-readable status message")
    error: Optional[str] = Field(None, description = "Error details if phase is 'error'")
    details: Optional[dict] = Field(
        None, description = "Phase-specific info, e.g. {'model_size': '8B'}"
    )
    metric_history: Optional[dict] = Field(
        None,
        description = "Full metric history arrays for chart recovery after SSE reconnection. "
        "Keys: 'steps', 'loss', 'lr', 'grad_norm', 'grad_norm_steps' — each a list of numeric values.",
    )


class TrainingProgress(BaseModel):
    """Training progress metrics - for streaming or polling"""

    job_id: str = Field(..., description = "Training job identifier")
    step: int = Field(..., description = "Current training step")
    total_steps: int = Field(..., description = "Total training steps")
    loss: float = Field(..., description = "Current loss value")
    learning_rate: float = Field(..., description = "Current learning rate")
    progress_percent: float = Field(
        ..., description = "Progress percentage (0.0 to 100.0)"
    )
    epoch: Optional[float] = Field(None, description = "Current epoch")
    elapsed_seconds: Optional[float] = Field(
        None, description = "Time elapsed since training started"
    )
    eta_seconds: Optional[float] = Field(None, description = "Estimated time remaining")
    grad_norm: Optional[float] = Field(
        None, description = "L2 norm of gradients, computed before gradient clipping"
    )
    num_tokens: Optional[int] = Field(
        None, description = "Total number of tokens processed so far"
    )
    eval_loss: Optional[float] = Field(
        None, description = "Eval loss from the most recent evaluation step"
    )


================================================
FILE: studio/backend/models/users.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""Pydantic models for authentication tokens.

This module defines the Token response model used by auth routes.
"""

from pydantic import BaseModel, Field


class Token(BaseModel):
    """Authentication response model for session credentials."""

    access_token: str = Field(
        ..., description = "Session access credential used for authenticated API requests"
    )
    refresh_token: str = Field(
        ...,
        description = "Session refresh credential used to renew an expired access credential",
    )
    token_type: str = Field(
        ..., description = "Credential type for the Authorization header, always 'bearer'"
    )
    must_change_password: bool = Field(
        ..., description = "True when the user must change the seeded default password"
    )


================================================
FILE: studio/backend/plugins/__init__.py
================================================


================================================
FILE: studio/backend/plugins/data-designer-unstructured-seed/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0


================================================
FILE: studio/backend/plugins/data-designer-unstructured-seed/pyproject.toml
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

[build-system]
requires = ["setuptools>=68", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "data-designer-unstructured-seed"
version = "0.1.0"
description = "Local Data Designer unstructured seed reader plugin"
requires-python = ">=3.11"
dependencies = [
  "data-designer-engine>=0.5.1,<0.6",
  "pandas>=2,<3",
]

[project.entry-points."data_designer.plugins"]
unstructured = "data_designer_unstructured_seed.plugin:unstructured_seed_plugin"

[tool.setuptools]
package-dir = {"" = "src"}

[tool.setuptools.packages.find]
where = ["src"]


================================================
FILE: studio/backend/plugins/data-designer-unstructured-seed/src/data_designer_unstructured_seed/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from .chunking import (
    DEFAULT_CHUNK_OVERLAP,
    DEFAULT_CHUNK_SIZE,
    build_unstructured_preview_rows,
    materialize_unstructured_seed_dataset,
    resolve_chunking,
)
from .config import UnstructuredSeedSource
from .impl import UnstructuredSeedReader
from .plugin import unstructured_seed_plugin

__all__ = [
    "DEFAULT_CHUNK_OVERLAP",
    "DEFAULT_CHUNK_SIZE",
    "build_unstructured_preview_rows",
    "materialize_unstructured_seed_dataset",
    "resolve_chunking",
    "UnstructuredSeedSource",
    "UnstructuredSeedReader",
    "unstructured_seed_plugin",
]


================================================
FILE: studio/backend/plugins/data-designer-unstructured-seed/src/data_designer_unstructured_seed/chunking.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from __future__ import annotations

import hashlib
import re
from pathlib import Path
from typing import Any

from utils.paths import ensure_dir, unstructured_seed_cache_root

DEFAULT_CHUNK_SIZE = 1200
DEFAULT_CHUNK_OVERLAP = 200
MAX_CHUNK_SIZE = 20000
_MIN_BREAK_RATIO = 0.6
_CACHE_DIR = unstructured_seed_cache_root()


def resolve_chunking(
    chunk_size: Any,
    chunk_overlap: Any,
) -> tuple[int, int]:
    size = _to_int(chunk_size, DEFAULT_CHUNK_SIZE)
    size = max(1, min(size, MAX_CHUNK_SIZE))
    overlap = _to_int(chunk_overlap, DEFAULT_CHUNK_OVERLAP)
    overlap = max(0, min(overlap, max(0, size - 1)))
    return size, overlap


def build_unstructured_preview_rows(
    *,
    source_path: Path,
    preview_size: int,
    chunk_size: Any,
    chunk_overlap: Any,
) -> list[dict[str, str]]:
    parquet_path, rows = materialize_unstructured_seed_dataset(
        source_path = source_path,
        chunk_size = chunk_size,
        chunk_overlap = chunk_overlap,
    )
    count = max(0, int(preview_size))
    if rows:
        return rows[:count]

    try:
        import pandas as pd
    except ImportError as exc:  # pragma: no cover
        raise RuntimeError(
            f"pandas is required for unstructured seed processing: {exc}"
        ) from exc

    dataframe = pd.read_parquet(parquet_path).head(count)
    return [
        {"chunk_text": str(value.get("chunk_text", "")).strip()}
        for value in dataframe.to_dict(orient = "records")
        if str(value.get("chunk_text", "")).strip()
    ]


def materialize_unstructured_seed_dataset(
    *,
    source_path: Path,
    chunk_size: Any,
    chunk_overlap: Any,
) -> tuple[Path, list[dict[str, str]]]:
    resolved = source_path.expanduser().resolve()
    if not resolved.is_file():
        raise FileNotFoundError(f"unstructured seed file not found: {resolved}")

    size, overlap = resolve_chunking(chunk_size, chunk_overlap)
    key = _compute_cache_key(
        source_path = resolved,
        chunk_size = size,
        chunk_overlap = overlap,
    )
    parquet_path = _CACHE_DIR / f"{key}.parquet"
    if parquet_path.exists():
        return parquet_path, []

    text = load_unstructured_text_file(resolved)
    chunks = split_text_into_chunks(
        text = text,
        chunk_size = size,
        chunk_overlap = overlap,
    )
    if not chunks:
        raise ValueError("No text found in unstructured seed source.")

    rows = [{"chunk_text": chunk} for chunk in chunks]
    ensure_dir(_CACHE_DIR)
    try:
        import pandas as pd
    except ImportError as exc:  # pragma: no cover
        raise RuntimeError(
            f"pandas is required for unstructured seed processing: {exc}"
        ) from exc

    tmp_path = _CACHE_DIR / f"{key}.tmp.parquet"
    pd.DataFrame(rows).to_parquet(tmp_path, index = False)
    tmp_path.replace(parquet_path)
    return parquet_path, rows


def load_unstructured_text_file(path: Path) -> str:
    ext = path.suffix.lower()
    if ext not in {".txt", ".md"}:
        raise ValueError(f"Unsupported unstructured seed file type: {ext}")

    raw = path.read_text(encoding = "utf-8", errors = "ignore")
    return normalize_unstructured_text(raw)


def normalize_unstructured_text(text: str) -> str:
    normalized = text.replace("\r\n", "\n").replace("\r", "\n")
    return re.sub(r"\n{3,}", "\n\n", normalized).strip()


def split_text_into_chunks(
    *,
    text: str,
    chunk_size: int,
    chunk_overlap: int,
) -> list[str]:
    if not text:
        return []
    if chunk_size <= 0:
        return [text]

    chunks: list[str] = []
    start = 0
    min_break_index = int(chunk_size * _MIN_BREAK_RATIO)
    text_len = len(text)
    while start < text_len:
        end = min(text_len, start + chunk_size)
        if end < text_len:
            window = text[start:end]
            cut = _find_break_index(window, min_break_index)
            if cut is not None and cut > 0:
                end = start + cut

        if end <= start:
            end = min(text_len, start + chunk_size)

        chunk = text[start:end].strip()
        if chunk:
            chunks.append(chunk)
        if end >= text_len:
            break

        next_start = end - chunk_overlap
        if next_start <= start:
            next_start = end
        start = max(0, next_start)

    return chunks


def _find_break_index(window: str, min_index: int) -> int | None:
    breakpoints = ["\n\n", "\n", " "]
    for token in breakpoints:
        idx = window.rfind(token)
        if idx >= min_index:
            return idx + len(token)
    return None


def _to_int(value: Any, fallback: int) -> int:
    if isinstance(value, bool):
        return fallback
    try:
        parsed = int(str(value).strip())
    except (TypeError, ValueError):
        return fallback
    return parsed


def _compute_cache_key(
    *,
    source_path: Path,
    chunk_size: int,
    chunk_overlap: int,
) -> str:
    stat = source_path.stat()
    payload = "|".join(
        [
            str(source_path),
            str(stat.st_size),
            str(stat.st_mtime_ns),
            str(chunk_size),
            str(chunk_overlap),
        ]
    ).encode("utf-8")
    return hashlib.sha256(payload).hexdigest()


================================================
FILE: studio/backend/plugins/data-designer-unstructured-seed/src/data_designer_unstructured_seed/config.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from __future__ import annotations

from pathlib import Path
from typing import Literal

from pydantic import Field, field_validator

from data_designer.config.seed_source import SeedSource

from .chunking import DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE, resolve_chunking


class UnstructuredSeedSource(SeedSource):
    seed_type: Literal["unstructured"] = "unstructured"
    path: str = Field(..., min_length = 1)
    chunk_size: int = DEFAULT_CHUNK_SIZE
    chunk_overlap: int = DEFAULT_CHUNK_OVERLAP

    @field_validator("path", mode = "after")
    @classmethod
    def _validate_path(cls, value: str) -> str:
        path = Path(value).expanduser()
        if not path.is_file():
            raise ValueError(f"Unstructured seed path is not a file: {path}")
        return value

    @field_validator("chunk_size", mode = "after")
    @classmethod
    def _validate_chunk_size(cls, value: int) -> int:
        size, _ = resolve_chunking(value, 0)
        return size

    @field_validator("chunk_overlap", mode = "after")
    @classmethod
    def _validate_chunk_overlap(cls, value: int, info) -> int:
        size = info.data.get("chunk_size", cls.model_fields["chunk_size"].default)
        _, overlap = resolve_chunking(size, value)
        return overlap


================================================
FILE: studio/backend/plugins/data-designer-unstructured-seed/src/data_designer_unstructured_seed/impl.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from __future__ import annotations

from pathlib import Path

import data_designer.lazy_heavy_imports as lazy
from data_designer.engine.resources.seed_reader import SeedReader

from .chunking import materialize_unstructured_seed_dataset
from .config import UnstructuredSeedSource


class UnstructuredSeedReader(SeedReader[UnstructuredSeedSource]):
    def create_duckdb_connection(self):
        return lazy.duckdb.connect()

    def get_dataset_uri(self) -> str:
        path, _ = materialize_unstructured_seed_dataset(
            source_path = Path(self.source.path),
            chunk_size = self.source.chunk_size,
            chunk_overlap = self.source.chunk_overlap,
        )
        return str(path)


================================================
FILE: studio/backend/plugins/data-designer-unstructured-seed/src/data_designer_unstructured_seed/plugin.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from data_designer.plugins.plugin import Plugin, PluginType

unstructured_seed_plugin = Plugin(
    impl_qualified_name = "data_designer_unstructured_seed.impl.UnstructuredSeedReader",
    config_qualified_name = "data_designer_unstructured_seed.config.UnstructuredSeedSource",
    plugin_type = PluginType.SEED_READER,
)


================================================
FILE: studio/backend/requirements/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0


================================================
FILE: studio/backend/requirements/base.txt
================================================
# Core unsloth packages
unsloth-zoo
unsloth


================================================
FILE: studio/backend/requirements/extras-no-deps.txt
================================================
# Audio extras (installed with --no-deps --no-cache-dir)
descript-audio-codec
descript-audiotools
julius
torchcodec
snac

# TRL and related packages
trl==0.23.1
git+https://github.com/meta-pytorch/OpenEnv.git
# executorch>=1.0.1               # 41.5 MB - no imports in unsloth/zoo/studio
torch-c-dlpack-ext
sentence_transformers==5.2.0
transformers==4.57.6


================================================
FILE: studio/backend/requirements/extras.txt
================================================
# OpenEnv dependencies
tomli
tomli-w

# ExecuTorch dependencies
ruamel.yaml
# coremltools                    # 10.2 MB - Apple CoreML, no imports in unsloth/zoo/studio
expecttest
flatbuffers
hydra-core
hypothesis
kgb
parameterized
pytest<9.0
pytest-json-report
pytest-rerunfailures==15.1
pytest-xdist
# Also needed by sentence_transformers (installed with --no-deps in extras-no-deps.txt)
scikit-learn==1.7.1

# Additional extras
pybind11
langid
jiwer
omegaconf
einx
pyloudnorm
openai-whisper
uroman                           # 4.0 MB - used for Outetts.
MeCab                            # 19.9 MB - used for Outetts.
inflect                          # number-to-words, required by OuteTTS
loguru
flatten_dict
ffmpy
randomname
argbind
tiktoken
ftfy
importlib-resources
librosa
markdown2
matplotlib
pystoi
soundfile
tensorboard
torch-stoi
evaluate
timm
transformers-cfg
open_spiel
addict
easydict
einops
tabulate
fastmcp>=3.0.2
openai>=2.7.2
websockets>=15.0.1


================================================
FILE: studio/backend/requirements/overrides.txt
================================================
# Torch AO overrides (installed with --force-reinstall --no-cache-dir)
torchao==0.14.0
pytorch_tokenizers

# Kernel packages
kernels


================================================
FILE: studio/backend/requirements/single-env/constraints.txt
================================================
# Single-env pins for unsloth + studio + data-designer
# Keep compatible with unsloth transformers bounds.
transformers==4.57.6
trl==0.23.1
huggingface-hub==0.36.2

# Studio stack
datasets==4.3.0
pyarrow==23.0.1

# FastMCP/OpenEnv compat
fastmcp>=3.0.2
mcp>=1.24,<2
websockets>=15.0.1

pandas==2.3.3


================================================
FILE: studio/backend/requirements/single-env/data-designer-deps.txt
================================================
# Data Designer runtime deps installed explicitly (single-env mode).
# DuckDB 1.5 removed Relation.record_batch(); keep <1.5 until upstream ships the fix.
anyascii<1,>=0.3.3
duckdb<1.5,>=1.1.3
faker<21,>=20.1.0
httpx<1,>=0.27.2
httpx-retries<1,>=0.4.2
json-repair<1,>=0.48.0
jsonpath-rust-bindings<2,>=1.0
jsonschema<5,>=4.0.0
litellm<1.80.12,>=1.73.6
lxml<7,>=6.0.2
marko<3,>=2.1.2
networkx<4,>=3.0
python-json-logger<4,>=3
ruff<1,>=0.14.10
scipy<2,>=1.11.0
sqlfluff<4,>=3.2.0
tiktoken<1,>=0.8.0


================================================
FILE: studio/backend/requirements/single-env/data-designer.txt
================================================
# Install Data Designer in same env as Unsloth.
data-designer==0.5.2
data-designer-config==0.5.2
data-designer-engine==0.5.2
prompt-toolkit>=3,<4


================================================
FILE: studio/backend/requirements/single-env/patch_metadata.py
================================================
#!/usr/bin/env python3
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""Relax strict metadata pins so pip check matches known working single-env stack.

Why:
- data-designer pins huggingface-hub>=1.0.1 and pyarrow<20.
- unsloth/transformers pins huggingface-hub<1.
- studio datasets pins pyarrow>=21.

Runtime works in this app with hub 0.36.x + pyarrow 23.x, but metadata conflicts.
"""

from __future__ import annotations

import importlib.metadata as im
import re
from pathlib import Path

TARGETS = (
    "data-designer",
    "data-designer-engine",
    "data-designer-config",
)

PATCHES: tuple[tuple[re.Pattern[str], str], ...] = (
    (
        re.compile(r"^Requires-Dist: huggingface-hub<2,>=1\.0\.1$", re.MULTILINE),
        "Requires-Dist: huggingface-hub<2,>=0.34.0",
    ),
    (
        re.compile(r"^Requires-Dist: pyarrow<20,>=19\.0\.1$", re.MULTILINE),
        "Requires-Dist: pyarrow>=21.0.0",
    ),
)


def metadata_path(dist_name: str) -> Path | None:
    try:
        dist = im.distribution(dist_name)
    except im.PackageNotFoundError:
        return None
    for f in dist.files or []:
        sf = str(f)
        if sf.endswith(".dist-info/METADATA"):
            return Path(dist.locate_file(f))
    return None


def patch_file(path: Path) -> bool:
    original = path.read_text(encoding = "utf-8")
    updated = original
    for pattern, repl in PATCHES:
        updated = pattern.sub(repl, updated)
    if updated == original:
        return False
    path.write_text(updated, encoding = "utf-8")
    return True


def main() -> int:
    changed = 0
    checked = 0
    for name in TARGETS:
        p = metadata_path(name)
        if p is None:
            continue
        checked += 1
        if patch_file(p):
            changed += 1
    print(f"single-env metadata patch: checked={checked}, changed={changed}")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())


================================================
FILE: studio/backend/requirements/studio.txt
================================================
# Studio UI backend dependencies
typer
fastapi
uvicorn
pydantic
matplotlib
pandas
nest_asyncio
datasets==4.3.0
pyjwt
easydict
addict
# gradio>=4.0.0                  # 148 MB - Studio uses React + FastAPI, not Gradio
huggingface-hub==0.36.2
structlog>=24.1.0
diceware
ddgs


================================================
FILE: studio/backend/requirements/triton-kernels.txt
================================================
# Triton kernels (installed with --no-deps, from source)
triton_kernels @ git+https://github.com/triton-lang/triton.git@release/3.6.x#subdirectory=python/triton_kernels


================================================
FILE: studio/backend/routes/.gitkeep
================================================


================================================
FILE: studio/backend/routes/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
API Routes
"""

from routes.training import router as training_router
from routes.models import router as models_router
from routes.inference import router as inference_router
from routes.datasets import router as datasets_router
from routes.auth import router as auth_router
from routes.data_recipe import router as data_recipe_router
from routes.export import router as export_router

__all__ = [
    "training_router",
    "models_router",
    "inference_router",
    "datasets_router",
    "auth_router",
    "data_recipe_router",
    "export_router",
]


================================================
FILE: studio/backend/routes/auth.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Authentication API routes
"""

from fastapi import APIRouter, Depends, HTTPException, status

from models.auth import (
    AuthLoginRequest,
    RefreshTokenRequest,
    AuthStatusResponse,
    ChangePasswordRequest,
)
from models.users import Token
from auth import storage, hashing
from auth.authentication import (
    create_access_token,
    create_refresh_token,
    get_current_subject,
    get_current_subject_allow_password_change,
    refresh_access_token,
)

router = APIRouter()


@router.get("/status", response_model = AuthStatusResponse)
async def auth_status() -> AuthStatusResponse:
    """
    Check whether auth has already been initialized.

    - initialized = False -> frontend should wait for the seeded admin bootstrap.
    - initialized = True  -> frontend should show login or force the first password change.
    """
    return AuthStatusResponse(
        initialized = storage.is_initialized(),
        default_username = storage.DEFAULT_ADMIN_USERNAME,
        requires_password_change = storage.requires_password_change(
            storage.DEFAULT_ADMIN_USERNAME
        )
        if storage.is_initialized()
        else True,
    )


@router.post("/login", response_model = Token)
async def login(payload: AuthLoginRequest) -> Token:
    """
    Login with username/password and receive access + refresh tokens.
    """
    record = storage.get_user_and_secret(payload.username)
    if record is None:
        raise HTTPException(
            status_code = status.HTTP_401_UNAUTHORIZED,
            detail = "Incorrect password. Run 'unsloth studio reset-password' in your terminal to reset it.",
        )

    salt, pwd_hash, _jwt_secret, must_change_password = record
    if not hashing.verify_password(payload.password, salt, pwd_hash):
        raise HTTPException(
            status_code = status.HTTP_401_UNAUTHORIZED,
            detail = "Incorrect password. Run 'unsloth studio reset-password' in your terminal to reset it.",
        )

    access_token = create_access_token(subject = payload.username)
    refresh_token = create_refresh_token(subject = payload.username)
    return Token(
        access_token = access_token,
        refresh_token = refresh_token,
        token_type = "bearer",
        must_change_password = must_change_password,
    )


@router.post("/refresh", response_model = Token)
async def refresh(payload: RefreshTokenRequest) -> Token:
    """
    Exchange a valid refresh token for a new access token.

    The refresh token itself is reusable until it expires (7 days).
    """
    new_access_token, username = refresh_access_token(payload.refresh_token)
    if new_access_token is None or username is None:
        raise HTTPException(
            status_code = status.HTTP_401_UNAUTHORIZED,
            detail = "Invalid or expired refresh token",
        )

    return Token(
        access_token = new_access_token,
        refresh_token = payload.refresh_token,
        token_type = "bearer",
        must_change_password = storage.requires_password_change(username),
    )


@router.post("/change-password", response_model = Token)
async def change_password(
    payload: ChangePasswordRequest,
    current_subject: str = Depends(get_current_subject_allow_password_change),
) -> Token:
    """Allow the authenticated user to replace the default password."""
    record = storage.get_user_and_secret(current_subject)
    if record is None:
        raise HTTPException(
            status_code = status.HTTP_401_UNAUTHORIZED,
            detail = "User session is invalid",
        )

    salt, pwd_hash, _jwt_secret, _must_change_password = record
    if not hashing.verify_password(payload.current_password, salt, pwd_hash):
        raise HTTPException(
            status_code = status.HTTP_401_UNAUTHORIZED,
            detail = "Current password is incorrect",
        )
    if payload.current_password == payload.new_password:
        raise HTTPException(
            status_code = status.HTTP_400_BAD_REQUEST,
            detail = "New password must be different from the current password",
        )

    storage.update_password(current_subject, payload.new_password)
    storage.revoke_user_refresh_tokens(current_subject)
    access_token = create_access_token(subject = current_subject)
    refresh_token = create_refresh_token(subject = current_subject)
    return Token(
        access_token = access_token,
        refresh_token = refresh_token,
        token_type = "bearer",
        must_change_password = False,
    )


================================================
FILE: studio/backend/routes/data_recipe/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""Data Recipe route package."""

from __future__ import annotations

import sys
from pathlib import Path

from fastapi import APIRouter, Depends

from auth.authentication import get_current_subject

backend_path = Path(__file__).parent.parent.parent
if str(backend_path) not in sys.path:
    sys.path.insert(0, str(backend_path))

from .jobs import router as jobs_router
from .mcp import router as mcp_router
from .seed import router as seed_router
from .validate import router as validate_router

router = APIRouter(dependencies = [Depends(get_current_subject)])
router.include_router(seed_router)
router.include_router(validate_router)
router.include_router(jobs_router)
router.include_router(mcp_router)

__all__ = ["router"]


================================================
FILE: studio/backend/routes/data_recipe/jobs.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""Job lifecycle endpoints for data recipe."""

from __future__ import annotations

from typing import Any

from fastapi import APIRouter, HTTPException, Query, Request
from fastapi.responses import JSONResponse, StreamingResponse
from pydantic import ValidationError

from core.data_recipe.huggingface import (
    RecipeDatasetPublishError,
    publish_recipe_dataset,
)
from core.data_recipe.jobs import get_job_manager
from models.data_recipe import (
    JobCreateResponse,
    PublishDatasetRequest,
    PublishDatasetResponse,
    RecipePayload,
)

router = APIRouter()


def _normalize_run_name(value: Any) -> str | None:
    if value is None:
        return None
    if not isinstance(value, str):
        raise HTTPException(
            status_code = 400, detail = "invalid run_name: must be a string"
        )
    trimmed = value.strip()
    if not trimmed:
        return None
    return trimmed[:120]


@router.post("/jobs", response_class = JSONResponse, response_model = JobCreateResponse)
def create_job(payload: RecipePayload):
    recipe = payload.recipe
    if not recipe.get("columns"):
        raise HTTPException(status_code = 400, detail = "Recipe must include columns.")

    run: dict[str, Any] = payload.run or {}
    run.pop("artifact_path", None)
    run.pop("dataset_name", None)
    execution_type = str(run.get("execution_type") or "full").strip().lower()
    if execution_type not in {"preview", "full"}:
        raise HTTPException(
            status_code = 400,
            detail = "invalid execution_type: must be 'preview' or 'full'",
        )
    run["execution_type"] = execution_type
    run["run_name"] = _normalize_run_name(run.get("run_name"))
    run_config_raw = run.get("run_config")
    if run_config_raw is not None:
        try:
            from data_designer.config.run_config import RunConfig

            RunConfig.model_validate(run_config_raw)
        except (ImportError, ValidationError, TypeError, ValueError) as exc:
            raise HTTPException(
                status_code = 400, detail = f"invalid run_config: {exc}"
            ) from exc

    mgr = get_job_manager()
    try:
        job_id = mgr.start(recipe = recipe, run = run)
    except RuntimeError as exc:
        raise HTTPException(status_code = 409, detail = str(exc)) from exc
    except ValueError as exc:
        raise HTTPException(status_code = 400, detail = str(exc)) from exc

    return {"job_id": job_id}


@router.get("/jobs/{job_id}/status")
def job_status(job_id: str):
    mgr = get_job_manager()
    state = mgr.get_status(job_id)
    if state is None:
        raise HTTPException(status_code = 404, detail = "job not found")
    return state


@router.get("/jobs/current")
def current_job():
    mgr = get_job_manager()
    state = mgr.get_current_status()
    if state is None:
        raise HTTPException(status_code = 404, detail = "no job")
    return state


@router.post("/jobs/{job_id}/cancel")
def cancel_job(job_id: str):
    mgr = get_job_manager()
    ok = mgr.cancel(job_id)
    if not ok:
        raise HTTPException(status_code = 404, detail = "job not found")
    return mgr.get_status(job_id)


@router.get("/jobs/{job_id}/analysis")
def job_analysis(job_id: str):
    mgr = get_job_manager()
    analysis = mgr.get_analysis(job_id)
    if analysis is None:
        raise HTTPException(status_code = 404, detail = "analysis not ready")
    return analysis


@router.get("/jobs/{job_id}/dataset")
def job_dataset(
    job_id: str,
    limit: int = Query(default = 20, ge = 1, le = 500),
    offset: int = Query(default = 0, ge = 0),
):
    mgr = get_job_manager()
    result = mgr.get_dataset(job_id, limit = limit, offset = offset)
    if result is None:
        raise HTTPException(status_code = 404, detail = "dataset not ready")
    if "error" in result:
        raise HTTPException(status_code = 422, detail = result["error"])
    return {
        "dataset": result["dataset"],
        "total": result["total"],
        "limit": limit,
        "offset": offset,
    }


@router.post(
    "/jobs/{job_id}/publish",
    response_class = JSONResponse,
    response_model = PublishDatasetResponse,
)
def publish_job_dataset(job_id: str, payload: PublishDatasetRequest):
    repo_id = payload.repo_id.strip()
    description = payload.description.strip()
    hf_token = payload.hf_token.strip() if isinstance(payload.hf_token, str) else None
    artifact_path = (
        payload.artifact_path.strip()
        if isinstance(payload.artifact_path, str)
        else None
    )

    if not repo_id:
        raise HTTPException(status_code = 400, detail = "repo_id is required")
    if not description:
        raise HTTPException(status_code = 400, detail = "description is required")

    mgr = get_job_manager()
    status = mgr.get_status(job_id)
    if status is not None:
        if (
            status.get("status") != "completed"
            or status.get("execution_type") != "full"
        ):
            raise HTTPException(
                status_code = 409,
                detail = "Only completed full runs can be published.",
            )
        status_artifact = status.get("artifact_path")
        if isinstance(status_artifact, str) and status_artifact.strip():
            artifact_path = status_artifact.strip()

    if not artifact_path:
        raise HTTPException(
            status_code = 400,
            detail = "This execution does not have publishable dataset artifacts.",
        )

    try:
        url = publish_recipe_dataset(
            artifact_path = artifact_path,
            repo_id = repo_id,
            description = description,
            hf_token = hf_token or None,
            private = payload.private,
        )
    except RecipeDatasetPublishError as exc:
        raise HTTPException(status_code = 400, detail = str(exc)) from exc
    except Exception as exc:
        raise HTTPException(status_code = 500, detail = str(exc)) from exc

    return {
        "success": True,
        "url": url,
        "message": f"Published dataset to {repo_id}.",
    }


@router.get("/jobs/{job_id}/events")
async def job_events(request: Request, job_id: str):
    mgr = get_job_manager()
    last_id = request.headers.get("last-event-id")
    after_seq: int | None = None
    if last_id:
        try:
            after_seq = int(str(last_id).strip())
        except (TypeError, ValueError):
            after_seq = None

    after_q = request.query_params.get("after")
    if after_q:
        try:
            after_seq = int(str(after_q).strip())
        except (TypeError, ValueError):
            pass

    sub = mgr.subscribe(job_id, after_seq = after_seq)
    if sub is None:
        raise HTTPException(status_code = 404, detail = "job not found")

    async def gen():
        try:
            for event in sub.replay:
                yield sub.format_sse(event)

            while True:
                if await request.is_disconnected():
                    break
                event = await sub.next_event(timeout_sec = 1.0)
                if event is None:
                    continue
                yield sub.format_sse(event)
        finally:
            mgr.unsubscribe(sub)

    return StreamingResponse(gen(), media_type = "text/event-stream")


================================================
FILE: studio/backend/routes/data_recipe/mcp.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""MCP helper endpoints for data recipe."""

from __future__ import annotations

from collections import defaultdict

from fastapi import APIRouter

from core.data_recipe.service import build_mcp_providers
from models.data_recipe import (
    McpToolsListRequest,
    McpToolsListResponse,
    McpToolsProviderResult,
)

router = APIRouter()


@router.post("/mcp/tools", response_model = McpToolsListResponse)
def list_mcp_tools(payload: McpToolsListRequest) -> McpToolsListResponse:
    try:
        from data_designer.engine.mcp import io as mcp_io
    except ImportError as exc:
        return McpToolsListResponse(
            providers = [
                McpToolsProviderResult(
                    name = "",
                    error = f"MCP dependencies unavailable: {exc}",
                )
            ]
        )

    providers: list[McpToolsProviderResult] = []
    tool_to_providers: dict[str, list[str]] = defaultdict(list)

    for provider_payload in payload.mcp_providers:
        provider_name = str(provider_payload.get("name", "")).strip()
        built = build_mcp_providers({"mcp_providers": [provider_payload]})
        if len(built) != 1:
            providers.append(
                McpToolsProviderResult(
                    name = provider_name,
                    error = "Unsupported MCP provider config.",
                )
            )
            continue

        provider = built[0]
        try:
            tools = mcp_io.list_tools(provider, timeout_sec = payload.timeout_sec)
            tool_names = sorted(
                {tool.name for tool in tools if getattr(tool, "name", "")}
            )
            for tool_name in tool_names:
                tool_to_providers[tool_name].append(provider.name)
            providers.append(
                McpToolsProviderResult(
                    name = provider.name,
                    tools = tool_names,
                )
            )
        except Exception as exc:
            providers.append(
                McpToolsProviderResult(
                    name = provider.name or provider_name,
                    error = str(exc).strip() or "Failed to load tools.",
                )
            )

    duplicate_tools = {
        tool_name: provider_names
        for tool_name, provider_names in sorted(tool_to_providers.items())
        if len(provider_names) > 1
    }

    return McpToolsListResponse(
        providers = providers,
        duplicate_tools = duplicate_tools,
    )


================================================
FILE: studio/backend/routes/data_recipe/seed.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""Seed inspect endpoints for data recipe."""

from __future__ import annotations

import base64
import binascii
from itertools import islice
from pathlib import Path
from typing import Any
from uuid import uuid4

from fastapi import APIRouter, HTTPException
from data_designer_unstructured_seed.chunking import (
    build_unstructured_preview_rows,
    resolve_chunking,
)
from core.data_recipe.jsonable import to_preview_jsonable
from utils.paths import ensure_dir, seed_uploads_root

from models.data_recipe import (
    SeedInspectRequest,
    SeedInspectResponse,
    SeedInspectUploadRequest,
)

router = APIRouter()

DATA_EXTS = (".parquet", ".jsonl", ".json", ".csv")
DEFAULT_SPLIT = "train"
LOCAL_UPLOAD_EXTS = {".csv", ".json", ".jsonl"}
UNSTRUCTURED_UPLOAD_EXTS = {".txt", ".md"}
SEED_UPLOAD_DIR = seed_uploads_root()


def _serialize_preview_value(value: Any) -> Any:
    return to_preview_jsonable(value)


def _serialize_preview_rows(rows: list[dict[str, Any]]) -> list[dict[str, Any]]:
    return [
        {str(key): _serialize_preview_value(value) for key, value in row.items()}
        for row in rows
    ]


def _normalize_optional_text(value: str | None) -> str | None:
    if value is None:
        return None
    trimmed = value.strip()
    return trimmed if trimmed else None


def _list_hf_data_files(*, dataset_name: str, token: str | None) -> list[str]:
    try:
        from huggingface_hub import HfApi
        from huggingface_hub.utils import HfHubHTTPError
    except ImportError:
        return []
    try:
        api = HfApi()
        repo_files = api.list_repo_files(dataset_name, repo_type = "dataset", token = token)
        return [file for file in repo_files if file.lower().endswith(DATA_EXTS)]
    except (HfHubHTTPError, OSError, ValueError):
        return []


def _select_best_file(data_files: list[str], split: str = DEFAULT_SPLIT) -> str | None:
    if not data_files:
        return None
    split_lower = split.lower()

    def score(path: str) -> tuple[int, int]:
        name = path.lower()
        if f"/{split_lower}/" in name:
            return (0, len(path))
        if (
            f"_{split_lower}." in name
            or f"-{split_lower}." in name
            or f"/{split_lower}." in name
            or f"/{split_lower}_" in name
            or f"/{split_lower}-" in name
        ):
            return (1, len(path))
        return (2, len(path))

    return sorted(data_files, key = score)[0]


def _resolve_seed_hf_path(
    dataset_name: str, data_files: list[str], split: str = DEFAULT_SPLIT
) -> str | None:
    selected = _select_best_file(data_files, split)
    if not selected:
        return None

    ext = Path(selected).suffix.lower()
    if ext not in DATA_EXTS:
        return f"datasets/{dataset_name}/{selected}"

    parent = Path(selected).parent.as_posix()
    if not parent or parent == ".":
        return f"datasets/{dataset_name}/**/*{ext}"
    return f"datasets/{dataset_name}/{parent}/**/*{ext}"


def _build_stream_load_kwargs(
    *,
    dataset_name: str,
    split: str,
    subset: str | None,
    token: str | None,
    data_file: str | None = None,
) -> dict[str, Any]:
    kwargs: dict[str, Any] = {
        "path": dataset_name,
        "split": split,
        "streaming": True,
        "trust_remote_code": False,
    }
    if data_file:
        kwargs["data_files"] = [data_file]
    if subset:
        kwargs["name"] = subset
    if token:
        kwargs["token"] = token
    return kwargs


def _load_preview_rows(
    *,
    load_dataset_fn,
    load_kwargs: dict[str, Any],
    preview_size: int,
) -> list[dict[str, Any]]:
    streamed_ds = load_dataset_fn(**load_kwargs)
    return [row for row in islice(streamed_ds, preview_size)]


def _extract_columns(rows: list[dict[str, Any]]) -> list[str]:
    columns_seen: dict[str, None] = {}
    for row in rows:
        for key in row.keys():
            columns_seen[str(key)] = None
    return list(columns_seen.keys())


def _sanitize_filename(filename: str) -> str:
    name = Path(filename).name.strip().replace("\x00", "")
    if not name:
        return "seed_upload"
    return name


def _decode_base64_payload(content_base64: str) -> bytes:
    raw = content_base64.strip()
    if "," in raw and raw.lower().startswith("data:"):
        raw = raw.split(",", 1)[1]
    try:
        return base64.b64decode(raw, validate = True)
    except binascii.Error as exc:
        raise HTTPException(status_code = 400, detail = "invalid base64 payload") from exc


def _read_preview_rows_from_local_file(
    path: Path, preview_size: int
) -> list[dict[str, Any]]:
    try:
        import pandas as pd
    except ImportError as exc:
        raise HTTPException(
            status_code = 500, detail = f"seed inspect dependencies unavailable: {exc}"
        ) from exc

    ext = path.suffix.lower()
    try:
        if ext == ".csv":
            df = pd.read_csv(path, nrows = preview_size)
        elif ext == ".jsonl":
            df = pd.read_json(path, lines = True).head(preview_size)
        elif ext == ".json":
            try:
                df = pd.read_json(path).head(preview_size)
            except ValueError:
                df = pd.read_json(path, lines = True).head(preview_size)
        else:
            raise HTTPException(status_code = 422, detail = f"unsupported file type: {ext}")
    except HTTPException:
        raise
    except (ValueError, OSError) as exc:
        raise HTTPException(
            status_code = 422, detail = f"seed inspect failed: {exc}"
        ) from exc

    rows = df.to_dict(orient = "records")
    return _serialize_preview_rows(rows)


def _read_preview_rows_from_unstructured_file(
    *,
    path: Path,
    preview_size: int,
    chunk_size: int | None,
    chunk_overlap: int | None,
) -> list[dict[str, Any]]:
    size, overlap = resolve_chunking(chunk_size, chunk_overlap)
    try:
        rows = build_unstructured_preview_rows(
            source_path = path,
            preview_size = preview_size,
            chunk_size = size,
            chunk_overlap = overlap,
        )
    except (FileNotFoundError, RuntimeError, ValueError, OSError) as exc:
        raise HTTPException(
            status_code = 422, detail = f"seed inspect failed: {exc}"
        ) from exc
    return _serialize_preview_rows(rows)


@router.post("/seed/inspect", response_model = SeedInspectResponse)
def inspect_seed_dataset(payload: SeedInspectRequest) -> SeedInspectResponse:
    dataset_name = payload.dataset_name.strip()
    if not dataset_name or dataset_name.count("/") < 1:
        raise HTTPException(
            status_code = 400,
            detail = "dataset_name must be a Hugging Face repo id like org/repo",
        )

    try:
        from datasets import load_dataset
    except ImportError as exc:
        raise HTTPException(
            status_code = 500, detail = f"seed inspect dependencies unavailable: {exc}"
        ) from exc

    split = _normalize_optional_text(payload.split) or DEFAULT_SPLIT
    subset = _normalize_optional_text(payload.subset)
    token = _normalize_optional_text(payload.hf_token)
    preview_size = int(payload.preview_size)

    preview_rows: list[dict[str, Any]] = []
    data_files = _list_hf_data_files(dataset_name = dataset_name, token = token)

    selected_file = _select_best_file(data_files, split)
    if selected_file:
        try:
            single_file_kwargs = _build_stream_load_kwargs(
                dataset_name = dataset_name,
                split = split,
                subset = subset,
                token = token,
                data_file = selected_file,
            )
            preview_rows = _load_preview_rows(
                load_dataset_fn = load_dataset,
                load_kwargs = single_file_kwargs,
                preview_size = preview_size,
            )
        except (ValueError, OSError, RuntimeError):
            preview_rows = []

    if not preview_rows:
        try:
            split_kwargs = _build_stream_load_kwargs(
                dataset_name = dataset_name,
                split = split,
                subset = subset,
                token = token,
            )
            preview_rows = _load_preview_rows(
                load_dataset_fn = load_dataset,
                load_kwargs = split_kwargs,
                preview_size = preview_size,
            )
        except (ValueError, OSError, RuntimeError) as exc:
            raise HTTPException(
                status_code = 422, detail = f"seed inspect failed: {exc}"
            ) from exc

    if not preview_rows:
        raise HTTPException(
            status_code = 422, detail = "dataset appears empty or unreadable"
        )
    preview_rows = _serialize_preview_rows(preview_rows)
    columns = _extract_columns(preview_rows)

    if not data_files:
        resolved_path = f"datasets/{dataset_name}/**/*.parquet"
    else:
        resolved_path = _resolve_seed_hf_path(dataset_name, data_files, split)
        if not resolved_path:
            raise HTTPException(
                status_code = 422, detail = "unable to resolve seed dataset path"
            )

    return SeedInspectResponse(
        dataset_name = dataset_name,
        resolved_path = resolved_path,
        columns = columns,
        preview_rows = preview_rows,
        split = split,
        subset = subset,
    )


@router.post("/seed/inspect-upload", response_model = SeedInspectResponse)
def inspect_seed_upload(payload: SeedInspectUploadRequest) -> SeedInspectResponse:
    seed_source_type = _normalize_optional_text(payload.seed_source_type) or "local"
    filename = _sanitize_filename(payload.filename)
    ext = Path(filename).suffix.lower()
    if seed_source_type == "unstructured":
        if ext not in UNSTRUCTURED_UPLOAD_EXTS:
            allowed = ", ".join(sorted(UNSTRUCTURED_UPLOAD_EXTS))
            raise HTTPException(
                status_code = 400,
                detail = f"unsupported file type: {ext}. allowed: {allowed}",
            )
    else:
        if ext not in LOCAL_UPLOAD_EXTS:
            allowed = ", ".join(sorted(LOCAL_UPLOAD_EXTS))
            raise HTTPException(
                status_code = 400,
                detail = f"unsupported file type: {ext}. allowed: {allowed}",
            )

    file_bytes = _decode_base64_payload(payload.content_base64)
    if not file_bytes:
        raise HTTPException(status_code = 400, detail = "empty upload payload")
    max_size_bytes = 50 * 1024 * 1024
    if len(file_bytes) > max_size_bytes:
        raise HTTPException(status_code = 413, detail = "file too large (max 50MB)")

    ensure_dir(SEED_UPLOAD_DIR)
    stored_name = f"{uuid4().hex}_{filename}"
    stored_path = SEED_UPLOAD_DIR / stored_name
    stored_path.write_bytes(file_bytes)

    if seed_source_type == "unstructured":
        preview_rows = _read_preview_rows_from_unstructured_file(
            path = stored_path,
            preview_size = int(payload.preview_size),
            chunk_size = payload.unstructured_chunk_size,
            chunk_overlap = payload.unstructured_chunk_overlap,
        )
    else:
        preview_rows = _read_preview_rows_from_local_file(
            stored_path,
            int(payload.preview_size),
        )
    if not preview_rows:
        raise HTTPException(
            status_code = 422, detail = "dataset appears empty or unreadable"
        )
    columns = _extract_columns(preview_rows)

    return SeedInspectResponse(
        dataset_name = filename,
        resolved_path = str(stored_path),
        columns = columns,
        preview_rows = preview_rows,
        split = None,
        subset = None,
    )


================================================
FILE: studio/backend/routes/data_recipe/validate.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""Validation endpoints for data recipe."""

from __future__ import annotations

from typing import Any

from fastapi import APIRouter, HTTPException

from core.data_recipe.service import (
    build_config_builder,
    create_data_designer,
    validate_recipe,
)
from models.data_recipe import RecipePayload, ValidateError, ValidateResponse

router = APIRouter()


def _collect_validation_errors(recipe: dict[str, Any]) -> list[ValidateError]:
    try:
        from data_designer.engine.compiler import (
            _add_internal_row_id_column_if_needed,
            _get_allowed_references,
            _resolve_and_add_seed_columns,
        )
        from data_designer.engine.validation import (
            ViolationLevel,
            validate_data_designer_config,
        )
    except ImportError:
        return []

    try:
        builder = build_config_builder(recipe)
        designer = create_data_designer(recipe)
        resource_provider = designer._create_resource_provider(  # type: ignore[attr-defined]
            "validate-configuration",
            builder,
        )
        config = builder.build()
        _resolve_and_add_seed_columns(config, resource_provider.seed_reader)
        _add_internal_row_id_column_if_needed(config)
        violations = validate_data_designer_config(
            columns = config.columns,
            processor_configs = config.processors or [],
            allowed_references = _get_allowed_references(config),
        )
    except (TypeError, ValueError, AttributeError):
        return []

    errors: list[ValidateError] = []
    for violation in violations:
        if violation.level != ViolationLevel.ERROR:
            continue
        code = getattr(violation.type, "value", None)
        path = violation.column if violation.column else None
        message = str(violation.message).strip() or "Validation failed."
        errors.append(
            ValidateError(
                message = message,
                path = path,
                code = code,
            )
        )
    return errors


@router.post("/validate", response_model = ValidateResponse)
def validate(payload: RecipePayload) -> ValidateResponse:
    recipe = payload.recipe
    if not recipe.get("columns"):
        return ValidateResponse(
            valid = False,
            errors = [ValidateError(message = "Recipe must include columns.")],
        )

    try:
        validate_recipe(recipe)
    except RuntimeError as exc:
        raise HTTPException(status_code = 503, detail = str(exc)) from exc
    except Exception as exc:
        detail = str(exc).strip() or "Validation failed."
        parsed_errors = _collect_validation_errors(recipe)
        return ValidateResponse(
            valid = False,
            errors = parsed_errors or [ValidateError(message = detail)],
            raw_detail = detail,
        )

    return ValidateResponse(valid = True)


================================================
FILE: studio/backend/routes/datasets.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Datasets API routes
"""

import base64
import io
import json
import sys
from pathlib import Path
from uuid import uuid4
from fastapi import APIRouter, Depends, HTTPException, UploadFile
import structlog
from loggers import get_logger

# Add backend directory to path
backend_path = Path(__file__).parent.parent.parent
if str(backend_path) not in sys.path:
    sys.path.insert(0, str(backend_path))

# Import dataset utilities
from utils.datasets import check_dataset_format
from auth.authentication import get_current_subject

router = APIRouter()
logger = get_logger(__name__)


from models.datasets import (
    AiAssistMappingRequest,
    AiAssistMappingResponse,
    CheckFormatRequest,
    CheckFormatResponse,
    LocalDatasetItem,
    LocalDatasetsResponse,
    UploadDatasetResponse,
)
from utils.paths import (
    dataset_uploads_root,
    ensure_dir,
    recipe_datasets_root,
    resolve_dataset_path,
)


def _serialize_preview_value(value):
    """make it json safe for client preview ⊂(◉‿◉)つ"""
    if value is None or isinstance(value, (str, int, float, bool)):
        return value

    try:
        from PIL.Image import Image as PILImage

        if isinstance(value, PILImage):
            buffer = io.BytesIO()
            value.convert("RGB").save(buffer, format = "JPEG", quality = 85)
            return {
                "type": "image",
                "mime": "image/jpeg",
                "width": value.width,
                "height": value.height,
                "data": base64.b64encode(buffer.getvalue()).decode("ascii"),
            }
    except Exception:
        pass

    if isinstance(value, dict):
        return {str(key): _serialize_preview_value(item) for key, item in value.items()}

    if isinstance(value, (list, tuple)):
        return [_serialize_preview_value(item) for item in value]

    return str(value)


def _serialize_preview_rows(rows):
    return [
        {str(key): _serialize_preview_value(value) for key, value in dict(row).items()}
        for row in rows
    ]


# --- Endpoints ---

# Recognized data-file extensions for the single-file fallback approach.
# Tabular formats are preferred over archives for Tier 1 preview because
# archives (e.g. images.zip) may be loaded as ImageFolder datasets with
# synthetic columns (image/label) that don't match the real dataset schema.
_TABULAR_EXTS = (".parquet", ".json", ".jsonl", ".csv", ".tsv", ".arrow")
_ARCHIVE_EXTS = (".tar", ".tar.gz", ".tgz", ".gz", ".zst", ".zip", ".txt")
DATA_EXTS = _TABULAR_EXTS + _ARCHIVE_EXTS
LOCAL_FILE_EXTS = (".json", ".jsonl", ".csv", ".parquet")
LOCAL_UPLOAD_EXTS = {".csv", ".json", ".jsonl", ".parquet"}
LOCAL_DATASETS_ROOT = recipe_datasets_root()
DATASET_UPLOAD_DIR = dataset_uploads_root()


def _safe_read_metadata(path: Path) -> dict | None:
    try:
        payload = json.loads(path.read_text(encoding = "utf-8"))
    except (OSError, ValueError, TypeError):
        return None
    if not isinstance(payload, dict):
        return None
    return payload


def _safe_read_rows_from_metadata(payload: dict | None) -> int | None:
    if not payload:
        return None
    for key in ("actual_num_records", "target_num_records"):
        value = payload.get(key)
        if isinstance(value, int):
            return value
    return None


def _safe_read_metadata_summary(payload: dict | None) -> dict | None:
    if not payload:
        return None

    actual_num_records = (
        payload.get("actual_num_records")
        if isinstance(payload.get("actual_num_records"), int)
        else None
    )
    target_num_records = (
        payload.get("target_num_records")
        if isinstance(payload.get("target_num_records"), int)
        else actual_num_records
    )

    columns: list[str] | None = None
    schema = payload.get("schema")
    if isinstance(schema, dict):
        columns = [str(key) for key in schema.keys()]
    if not columns:
        stats = payload.get("column_statistics")
        if isinstance(stats, list):
            derived = [
                str(item.get("column_name"))
                for item in stats
                if isinstance(item, dict) and item.get("column_name")
            ]
            columns = derived or None

    parquet_files_count = None
    file_paths = payload.get("file_paths")
    if isinstance(file_paths, dict):
        parquet_files = file_paths.get("parquet-files")
        if isinstance(parquet_files, list):
            parquet_files_count = len(parquet_files)

    total_num_batches = (
        payload.get("total_num_batches")
        if isinstance(payload.get("total_num_batches"), int)
        else parquet_files_count
    )
    num_completed_batches = (
        payload.get("num_completed_batches")
        if isinstance(payload.get("num_completed_batches"), int)
        else total_num_batches
    )

    return {
        "actual_num_records": actual_num_records,
        "target_num_records": target_num_records,
        "total_num_batches": total_num_batches,
        "num_completed_batches": num_completed_batches,
        "columns": columns,
    }


def _build_local_dataset_items() -> list[LocalDatasetItem]:
    if not LOCAL_DATASETS_ROOT.exists():
        return []

    items: list[LocalDatasetItem] = []
    for entry in LOCAL_DATASETS_ROOT.iterdir():
        if not entry.is_dir() or not entry.name.startswith("recipe_"):
            continue
        parquet_dir = entry / "parquet-files"
        if not parquet_dir.exists() or not any(parquet_dir.glob("*.parquet")):
            continue

        rows = None
        metadata_summary = None
        metadata_path = entry / "metadata.json"
        if metadata_path.exists():
            metadata_payload = _safe_read_metadata(metadata_path)
            rows = _safe_read_rows_from_metadata(metadata_payload)
            metadata_summary = _safe_read_metadata_summary(metadata_payload)

        try:
            updated_at = entry.stat().st_mtime
        except OSError:
            updated_at = None

        items.append(
            LocalDatasetItem(
                id = entry.name,
                label = entry.name,
                path = str(parquet_dir.resolve()),
                rows = rows,
                updated_at = updated_at,
                metadata = metadata_summary,
            )
        )

    items.sort(key = lambda item: item.updated_at or 0, reverse = True)
    return items


def _load_local_preview_slice(
    *, dataset_path: Path, train_split: str, preview_size: int
):
    from datasets import load_dataset

    if dataset_path.is_dir():
        parquet_dir = (
            dataset_path / "parquet-files"
            if (dataset_path / "parquet-files").exists()
            else dataset_path
        )
        parquet_files = sorted(parquet_dir.glob("*.parquet"))
        if parquet_files:
            dataset = load_dataset(
                "parquet",
                data_files = [str(path) for path in parquet_files],
                split = train_split,
            )
            total_rows = len(dataset)
            preview_slice = dataset.select(range(min(preview_size, total_rows)))
            return preview_slice, total_rows
        else:
            candidate_files: list[Path] = []
            for ext in LOCAL_FILE_EXTS:
                candidate_files.extend(sorted(dataset_path.glob(f"*{ext}")))
            if not candidate_files:
                raise HTTPException(
                    status_code = 400,
                    detail = "Unsupported local dataset directory (expected parquet/json/jsonl/csv files)",
                )
            dataset_path = candidate_files[0]

    if dataset_path.suffix in [".json", ".jsonl"]:
        dataset = load_dataset("json", data_files = str(dataset_path), split = train_split)
    elif dataset_path.suffix == ".csv":
        dataset = load_dataset("csv", data_files = str(dataset_path), split = train_split)
    elif dataset_path.suffix == ".parquet":
        dataset = load_dataset(
            "parquet", data_files = str(dataset_path), split = train_split
        )
    else:
        raise HTTPException(
            status_code = 400, detail = f"Unsupported file format: {dataset_path.suffix}"
        )

    total_rows = len(dataset)
    preview_slice = dataset.select(range(min(preview_size, total_rows)))
    return preview_slice, total_rows


def _sanitize_filename(filename: str) -> str:
    name = Path(filename).name.strip().replace("\x00", "")
    if not name:
        return "dataset_upload"
    return name


@router.post("/upload", response_model = UploadDatasetResponse)
async def upload_dataset(
    file: UploadFile,
    current_subject: str = Depends(get_current_subject),
) -> UploadDatasetResponse:
    filename = _sanitize_filename(file.filename or "dataset_upload")
    ext = Path(filename).suffix.lower()
    if ext not in LOCAL_UPLOAD_EXTS:
        allowed = ", ".join(sorted(LOCAL_UPLOAD_EXTS))
        raise HTTPException(
            status_code = 400,
            detail = f"Unsupported file type: {ext}. Allowed: {allowed}",
        )

    ensure_dir(DATASET_UPLOAD_DIR)
    stem = Path(filename).stem
    stored_name = f"{uuid4().hex}_{stem}{ext}"
    stored_path = DATASET_UPLOAD_DIR / stored_name

    # Stream file to disk in chunks to avoid holding entire file in memory
    with open(stored_path, "wb") as f:
        while chunk := await file.read(1024 * 1024):
            f.write(chunk)

    if stored_path.stat().st_size == 0:
        stored_path.unlink(missing_ok = True)
        raise HTTPException(status_code = 400, detail = "Empty upload payload")

    return UploadDatasetResponse(filename = filename, stored_path = str(stored_path))


@router.get("/local", response_model = LocalDatasetsResponse)
def list_local_datasets(
    current_subject: str = Depends(get_current_subject),
) -> LocalDatasetsResponse:
    return LocalDatasetsResponse(datasets = _build_local_dataset_items())


@router.post("/check-format", response_model = CheckFormatResponse)
def check_format(
    request: CheckFormatRequest,
    current_subject: str = Depends(get_current_subject),
):
    """
    Check if a dataset requires manual column mapping.

    Strategy for HuggingFace datasets:
      1. list_repo_files → pick the first data file → load_dataset(data_files=[…])
         Avoids resolving thousands of files; typically ~2-4 s.
      2. Full streaming load_dataset as a last-resort fallback.

    Local files are loaded directly.

    Using a plain `def` (not async) so FastAPI runs this in a thread-pool,
    preventing any blocking IO from freezing the event loop.
    """
    try:
        from itertools import islice
        from datasets import Dataset, load_dataset
        from utils.datasets import format_dataset

        PREVIEW_SIZE = 10

        logger.info(f"Checking format for dataset: {request.dataset_name}")

        dataset_path = resolve_dataset_path(request.dataset_name)
        total_rows = None

        if dataset_path.exists():
            # ── Local file ──────────────────────────────────────────
            train_split = request.train_split or "train"
            preview_slice, total_rows = _load_local_preview_slice(
                dataset_path = dataset_path,
                train_split = train_split,
                preview_size = PREVIEW_SIZE,
            )
        else:
            # ── HuggingFace dataset ─────────────────────────────────
            # Tier 1: list_repo_files → load only the first data file
            preview_slice = None

            try:
                from huggingface_hub import HfApi

                api = HfApi()
                repo_files = api.list_repo_files(
                    request.dataset_name,
                    repo_type = "dataset",
                    token = request.hf_token or None,
                )
                data_files = [
                    f for f in repo_files if any(f.endswith(ext) for ext in DATA_EXTS)
                ]

                # Prefer tabular formats over archives (e.g. images.zip → ImageFolder
                # with synthetic image/label columns that don't match the real schema).
                tabular_files = [
                    f
                    for f in data_files
                    if any(f.endswith(ext) for ext in _TABULAR_EXTS)
                ]
                candidates = tabular_files or data_files

                # When a subset is specified, narrow to files whose name matches
                # (e.g. subset="testmini" → prefer "testmini.parquet").
                if request.subset and candidates:
                    subset_matches = [
                        f for f in candidates if request.subset in Path(f).stem
                    ]
                    if subset_matches:
                        candidates = subset_matches

                if candidates:
                    first_file = candidates[0]
                    logger.info(f"Tier 1: loading single file {first_file}")
                    load_kwargs = {
                        "path": request.dataset_name,
                        "data_files": [first_file],
                        "split": "train",
                        "streaming": True,
                    }
                    if request.hf_token:
                        load_kwargs["token"] = request.hf_token

                    streamed_ds = load_dataset(**load_kwargs)
                    rows = list(islice(streamed_ds, PREVIEW_SIZE))
                    if rows:
                        preview_slice = Dataset.from_list(rows)
            except Exception as e:
                logger.warning(f"Tier 1 (single-file) failed: {e}")

            if preview_slice is None:
                # Tier 2: full streaming (resolves all files — slow for large repos)
                logger.info("Tier 2: falling back to full streaming load_dataset")
                load_kwargs = {
                    "path": request.dataset_name,
                    "split": request.train_split,
                    "streaming": True,
                }
                if request.subset:
                    load_kwargs["name"] = request.subset
                if request.hf_token:
                    load_kwargs["token"] = request.hf_token

                streamed_ds = load_dataset(**load_kwargs)

                rows = list(islice(streamed_ds, PREVIEW_SIZE))
                if not rows:
                    raise HTTPException(
                        status_code = 400,
                        detail = "Dataset appears to be empty or could not be streamed",
                    )

                preview_slice = Dataset.from_list(rows)
            total_rows = None

        # Run lightweight format check on the preview slice
        result = check_dataset_format(preview_slice, is_vlm = request.is_vlm)

        logger.info(
            f"Format check result: requires_mapping={result['requires_manual_mapping']}, format={result['detected_format']}, is_image={result.get('is_image', False)}"
        )

        # Generate preview samples
        preview_samples = None
        if not result["requires_manual_mapping"]:
            if result.get("suggested_mapping"):
                # Heuristic-detected: show raw data so columns match the API response.
                # Processing (column stripping) happens at training time, not preview.
                preview_samples = _serialize_preview_rows(preview_slice)
            else:
                try:
                    format_result = format_dataset(
                        preview_slice,
                        format_type = "auto",
                        num_proc = 1,  # Only 10 preview rows — no need for multiprocessing
                    )
                    processed = format_result["dataset"]
                    preview_samples = _serialize_preview_rows(processed)
                except Exception as e:
                    logger.warning(
                        f"Processed preview generation failed (non-fatal): {e}"
                    )
                    preview_samples = _serialize_preview_rows(preview_slice)
        else:
            preview_samples = _serialize_preview_rows(preview_slice)

        # Collect warnings: from check_dataset_format + URL-based image detection
        warning = result.get("warning")
        image_col = result.get("detected_image_column")
        if image_col and image_col in (result.get("columns") or []):
            try:
                sample_val = preview_slice[0][image_col]
                if isinstance(sample_val, str) and sample_val.startswith(
                    ("http://", "https://")
                ):
                    url_warning = (
                        "This dataset contains image URLs instead of embedded images. "
                        "Images will be downloaded during training, which may be slow for large datasets."
                    )
                    logger.info(f"URL-based image column detected: {image_col}")
                    warning = f"{warning} {url_warning}" if warning else url_warning
            except Exception:
                pass

        return CheckFormatResponse(
            requires_manual_mapping = result["requires_manual_mapping"],
            detected_format = result["detected_format"],
            columns = result["columns"],
            is_image = result.get("is_image", False),
            is_audio = result.get("is_audio", False),
            multimodal_columns = result.get("multimodal_columns"),
            suggested_mapping = result.get("suggested_mapping"),
            detected_image_column = result.get("detected_image_column"),
            detected_audio_column = result.get("detected_audio_column"),
            detected_text_column = result.get("detected_text_column"),
            detected_speaker_column = result.get("detected_speaker_column"),
            preview_samples = preview_samples,
            total_rows = total_rows,
            warning = warning,
        )

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error checking dataset format: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500, detail = f"Failed to check dataset format: {str(e)}"
        )


@router.post("/ai-assist-mapping", response_model = AiAssistMappingResponse)
def ai_assist_mapping(
    request: AiAssistMappingRequest,
    current_subject: str = Depends(get_current_subject),
):
    """
    Run LLM-assisted dataset conversion advisor (user-triggered).

    Multi-pass analysis using a 7B helper model:
      Pass 1: Classify dataset type from HF card + samples
      Pass 2: Generate conversion strategy (system prompt, templates)
      Pass 3: Validate conversion quality

    Falls back to simple column classification if the advisor fails.
    """
    try:
        from utils.datasets.llm_assist import llm_conversion_advisor

        # Truncate sample values for the LLM prompt
        truncated = [
            {col: str(s.get(col, ""))[:200] for col in request.columns}
            for s in request.samples[:5]
        ]

        result = llm_conversion_advisor(
            column_names = request.columns,
            samples = truncated,
            dataset_name = request.dataset_name,
            hf_token = request.hf_token,
            model_name = request.model_name,
            model_type = request.model_type,
        )

        if result and result.get("success"):
            return AiAssistMappingResponse(
                success = True,
                suggested_mapping = result.get("suggested_mapping"),
                system_prompt = result.get("system_prompt"),
                user_template = result.get("user_template"),
                assistant_template = result.get("assistant_template"),
                label_mapping = result.get("label_mapping"),
                dataset_type = result.get("dataset_type"),
                is_conversational = result.get("is_conversational"),
                user_notification = result.get("user_notification"),
            )

        return AiAssistMappingResponse(
            success = False,
            warning = "AI could not determine column roles. Please assign them manually.",
        )

    except Exception as e:
        logger.error(f"AI assist mapping failed: {e}", exc_info = True)
        raise HTTPException(status_code = 500, detail = f"AI assist failed: {str(e)}")


================================================
FILE: studio/backend/routes/export.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Export API routes: checkpoint discovery and model export operations.
"""

import sys
from pathlib import Path
from fastapi import APIRouter, Depends, HTTPException, Query
import structlog
from loggers import get_logger

# Add backend directory to path
backend_path = Path(__file__).parent.parent.parent
if str(backend_path) not in sys.path:
    sys.path.insert(0, str(backend_path))

# Auth
from auth.authentication import get_current_subject

# Import backend functions
try:
    from core.export import get_export_backend
except ImportError:
    parent_backend = backend_path.parent / "backend"
    if str(parent_backend) not in sys.path:
        sys.path.insert(0, str(parent_backend))
    from core.export import get_export_backend

# Import Pydantic models
from models import (
    LoadCheckpointRequest,
    ExportStatusResponse,
    ExportOperationResponse,
    ExportMergedModelRequest,
    ExportBaseModelRequest,
    ExportGGUFRequest,
    ExportLoRAAdapterRequest,
)

router = APIRouter()
logger = get_logger(__name__)


@router.post("/load-checkpoint", response_model = ExportOperationResponse)
async def load_checkpoint(
    request: LoadCheckpointRequest,
    current_subject: str = Depends(get_current_subject),
):
    """
    Load a checkpoint into the export backend.

    Wraps ExportBackend.load_checkpoint.
    """
    try:
        # Version switching is handled automatically by the subprocess-based
        # export backend — no need for ensure_transformers_version() here.

        # Free GPU memory: shut down any running inference/training subprocesses
        # before loading the export checkpoint (they'd compete for VRAM).
        try:
            from core.inference import get_inference_backend

            inf = get_inference_backend()
            if inf.active_model_name:
                logger.info(
                    "Unloading inference model '%s' to free GPU memory for export",
                    inf.active_model_name,
                )
                inf._shutdown_subprocess()
                inf.active_model_name = None
                inf.models.clear()
        except Exception as e:
            logger.warning("Could not unload inference model: %s", e)

        try:
            from core.training import get_training_backend

            trn = get_training_backend()
            if trn.is_training_active():
                logger.info("Stopping active training to free GPU memory for export")
                trn.stop_training()
                # Wait for training subprocess to actually exit before proceeding,
                # otherwise it may still hold GPU memory when export tries to load.
                for _ in range(60):  # up to 30s
                    if not trn.is_training_active():
                        break
                    import time

                    time.sleep(0.5)
                else:
                    logger.warning(
                        "Training subprocess did not exit within 30s, proceeding anyway"
                    )
        except Exception as e:
            logger.warning("Could not stop training: %s", e)

        backend = get_export_backend()
        success, message = backend.load_checkpoint(
            checkpoint_path = request.checkpoint_path,
            max_seq_length = request.max_seq_length,
            load_in_4bit = request.load_in_4bit,
            trust_remote_code = request.trust_remote_code,
        )

        if not success:
            raise HTTPException(status_code = 400, detail = message)

        return ExportOperationResponse(success = True, message = message)
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error loading checkpoint: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500,
            detail = f"Failed to load checkpoint: {str(e)}",
        )


@router.post("/cleanup", response_model = ExportOperationResponse)
async def cleanup_export_memory(
    current_subject: str = Depends(get_current_subject),
):
    """
    Cleanup export-related models from memory (GPU/CPU).

    Wraps ExportBackend.cleanup_memory.
    """
    try:
        backend = get_export_backend()
        success = backend.cleanup_memory()

        if not success:
            raise HTTPException(
                status_code = 500,
                detail = "Memory cleanup failed. See server logs for details.",
            )

        return ExportOperationResponse(
            success = True,
            message = "Memory cleanup completed successfully",
        )
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error during export memory cleanup: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500,
            detail = f"Failed to cleanup export memory: {str(e)}",
        )


@router.get("/status", response_model = ExportStatusResponse)
async def get_export_status(
    current_subject: str = Depends(get_current_subject),
):
    """
    Get current export backend status (loaded checkpoint, model type, PEFT flag).
    """
    try:
        backend = get_export_backend()
        return ExportStatusResponse(
            current_checkpoint = backend.current_checkpoint,
            is_vision = bool(getattr(backend, "is_vision", False)),
            is_peft = bool(getattr(backend, "is_peft", False)),
        )
    except Exception as e:
        logger.error(f"Error getting export status: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500,
            detail = f"Failed to get export status: {str(e)}",
        )


@router.post("/export/merged", response_model = ExportOperationResponse)
async def export_merged_model(
    request: ExportMergedModelRequest,
    current_subject: str = Depends(get_current_subject),
):
    """
    Export a merged PEFT model (e.g., 16-bit or 4-bit) and optionally push to Hub.

    Wraps ExportBackend.export_merged_model.
    """
    try:
        backend = get_export_backend()
        success, message = backend.export_merged_model(
            save_directory = request.save_directory,
            format_type = request.format_type,
            push_to_hub = request.push_to_hub,
            repo_id = request.repo_id,
            hf_token = request.hf_token,
            private = request.private,
        )

        if not success:
            raise HTTPException(status_code = 400, detail = message)

        return ExportOperationResponse(success = True, message = message)
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error exporting merged model: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500,
            detail = f"Failed to export merged model: {str(e)}",
        )


@router.post("/export/base", response_model = ExportOperationResponse)
async def export_base_model(
    request: ExportBaseModelRequest,
    current_subject: str = Depends(get_current_subject),
):
    """
    Export a non-PEFT base model and optionally push to Hub.

    Wraps ExportBackend.export_base_model.
    """
    try:
        backend = get_export_backend()
        success, message = backend.export_base_model(
            save_directory = request.save_directory,
            push_to_hub = request.push_to_hub,
            repo_id = request.repo_id,
            hf_token = request.hf_token,
            private = request.private,
            base_model_id = request.base_model_id,
        )

        if not success:
            raise HTTPException(status_code = 400, detail = message)

        return ExportOperationResponse(success = True, message = message)
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error exporting base model: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500,
            detail = f"Failed to export base model: {str(e)}",
        )


@router.post("/export/gguf", response_model = ExportOperationResponse)
async def export_gguf(
    request: ExportGGUFRequest,
    current_subject: str = Depends(get_current_subject),
):
    """
    Export the current model to GGUF format and optionally push to Hub.

    Wraps ExportBackend.export_gguf.
    """
    try:
        backend = get_export_backend()
        success, message = backend.export_gguf(
            save_directory = request.save_directory,
            quantization_method = request.quantization_method,
            push_to_hub = request.push_to_hub,
            repo_id = request.repo_id,
            hf_token = request.hf_token,
        )

        if not success:
            raise HTTPException(status_code = 400, detail = message)

        return ExportOperationResponse(success = True, message = message)
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error exporting GGUF model: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500,
            detail = f"Failed to export GGUF model: {str(e)}",
        )


@router.post("/export/lora", response_model = ExportOperationResponse)
async def export_lora_adapter(
    request: ExportLoRAAdapterRequest,
    current_subject: str = Depends(get_current_subject),
):
    """
    Export only the LoRA adapter (if the loaded model is PEFT).

    Wraps ExportBackend.export_lora_adapter.
    """
    try:
        backend = get_export_backend()
        success, message = backend.export_lora_adapter(
            save_directory = request.save_directory,
            push_to_hub = request.push_to_hub,
            repo_id = request.repo_id,
            hf_token = request.hf_token,
            private = request.private,
        )

        if not success:
            raise HTTPException(status_code = 400, detail = message)

        return ExportOperationResponse(success = True, message = message)
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error exporting LoRA adapter: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500,
            detail = f"Failed to export LoRA adapter: {str(e)}",
        )


================================================
FILE: studio/backend/routes/inference.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Inference API routes for model loading and text generation.
"""

import sys
import time
import uuid
from pathlib import Path
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import StreamingResponse, JSONResponse
from typing import Optional
import json
import structlog
from loggers import get_logger
import asyncio
import threading


# Add backend directory to path
backend_path = Path(__file__).parent.parent.parent
if str(backend_path) not in sys.path:
    sys.path.insert(0, str(backend_path))

# Import backend functions
try:
    from core.inference import get_inference_backend
    from core.inference.llama_cpp import LlamaCppBackend
    from utils.models import ModelConfig
    from utils.inference import load_inference_config
    from utils.models.model_config import load_model_defaults
except ImportError:
    parent_backend = backend_path.parent / "backend"
    if str(parent_backend) not in sys.path:
        sys.path.insert(0, str(parent_backend))
    from core.inference import get_inference_backend
    from core.inference.llama_cpp import LlamaCppBackend
    from utils.models import ModelConfig
    from utils.inference import load_inference_config
    from utils.models.model_config import load_model_defaults

from models.inference import (
    LoadRequest,
    UnloadRequest,
    GenerateRequest,
    LoadResponse,
    UnloadResponse,
    InferenceStatusResponse,
    ChatCompletionRequest,
    ChatCompletionChunk,
    ChatCompletion,
    ChunkChoice,
    ChoiceDelta,
    CompletionChoice,
    CompletionMessage,
    ValidateModelRequest,
    ValidateModelResponse,
)
from auth.authentication import get_current_subject

import io
import wave
import base64
import numpy as np

router = APIRouter()
logger = get_logger(__name__)


# GGUF inference backend (llama-server)
_llama_cpp_backend = LlamaCppBackend()


def get_llama_cpp_backend() -> LlamaCppBackend:
    return _llama_cpp_backend


@router.post("/load", response_model = LoadResponse)
async def load_model(
    request: LoadRequest,
    current_subject: str = Depends(get_current_subject),
):
    """
    Load a model for inference.

    The model_path should be a clean identifier from GET /models/list.
    Returns inference configuration parameters (temperature, top_p, top_k, min_p)
    from the model's YAML config, falling back to default.yaml for missing values.

    GGUF models are loaded via llama-server (llama.cpp) instead of Unsloth.
    """
    try:
        # Version switching is handled automatically by the subprocess-based
        # inference backend — no need for ensure_transformers_version() here.

        # ── Already-loaded check: skip reload if the exact model is active ──
        backend = get_inference_backend()
        llama_backend = get_llama_cpp_backend()

        if request.gguf_variant:
            if (
                llama_backend.is_loaded
                and llama_backend.hf_variant
                and llama_backend.hf_variant.lower() == request.gguf_variant.lower()
                and llama_backend.model_identifier
                and llama_backend.model_identifier.lower() == request.model_path.lower()
            ):
                logger.info(
                    f"Model already loaded (GGUF): {request.model_path} variant={request.gguf_variant}, skipping reload"
                )
                inference_config = load_inference_config(llama_backend.model_identifier)
                from utils.models import is_audio_input_type

                _gguf_audio = (
                    llama_backend._audio_type
                    if hasattr(llama_backend, "_audio_type")
                    else None
                )
                _gguf_is_audio = getattr(llama_backend, "_is_audio", False)
                return LoadResponse(
                    status = "already_loaded",
                    model = llama_backend.model_identifier,
                    display_name = llama_backend.model_identifier,
                    is_vision = llama_backend._is_vision,
                    is_lora = False,
                    is_gguf = True,
                    is_audio = _gguf_is_audio,
                    audio_type = _gguf_audio,
                    has_audio_input = is_audio_input_type(_gguf_audio)
                    if _gguf_audio
                    else False,
                    inference = inference_config,
                    context_length = llama_backend.context_length,
                    supports_reasoning = llama_backend.supports_reasoning,
                    chat_template = llama_backend.chat_template,
                )
        else:
            if (
                backend.active_model_name
                and backend.active_model_name.lower() == request.model_path.lower()
            ):
                logger.info(
                    f"Model already loaded (Unsloth): {request.model_path}, skipping reload"
                )
                inference_config = load_inference_config(backend.active_model_name)
                _model_info = backend.models.get(backend.active_model_name, {})
                _chat_template = None
                try:
                    _tpl_info = _model_info.get("chat_template_info", {})
                    _chat_template = _tpl_info.get("template")
                except Exception as e:
                    logger.warning(
                        f"Could not retrieve chat template for {backend.active_model_name}: {e}"
                    )
                return LoadResponse(
                    status = "already_loaded",
                    model = backend.active_model_name,
                    display_name = backend.active_model_name,
                    is_vision = _model_info.get("is_vision", False),
                    is_lora = _model_info.get("is_lora", False),
                    is_gguf = False,
                    is_audio = _model_info.get("is_audio", False),
                    audio_type = _model_info.get("audio_type"),
                    has_audio_input = _model_info.get("has_audio_input", False),
                    inference = inference_config,
                    chat_template = _chat_template,
                )

        # Create config using clean factory method
        # is_lora is auto-detected from adapter_config.json on disk/HF
        config = ModelConfig.from_identifier(
            model_id = request.model_path,
            hf_token = request.hf_token,
            gguf_variant = request.gguf_variant,
        )

        if not config:
            raise HTTPException(
                status_code = 400,
                detail = f"Invalid model identifier: {request.model_path}",
            )

        # ── GGUF path: load via llama-server ──────────────────────
        if config.is_gguf:
            llama_backend = get_llama_cpp_backend()
            unsloth_backend = get_inference_backend()

            # Unload any active Unsloth model first to free VRAM
            if unsloth_backend.active_model_name:
                logger.info(
                    f"Unloading Unsloth model '{unsloth_backend.active_model_name}' before loading GGUF"
                )
                unsloth_backend.unload_model(unsloth_backend.active_model_name)

            # Route to HF mode or local mode based on config
            # Run in a thread so the event loop stays free for progress
            # polling and other requests during the (potentially long)
            # GGUF download + llama-server startup.
            if config.gguf_hf_repo:
                # HF mode: download via huggingface_hub then start llama-server
                success = await asyncio.to_thread(
                    llama_backend.load_model,
                    hf_repo = config.gguf_hf_repo,
                    hf_variant = config.gguf_variant,
                    hf_token = request.hf_token,
                    model_identifier = config.identifier,
                    is_vision = config.is_vision,
                    n_ctx = request.max_seq_length,
                    chat_template_override = request.chat_template_override,
                    cache_type_kv = request.cache_type_kv,
                )
            else:
                # Local mode: llama-server loads via -m <path>
                success = await asyncio.to_thread(
                    llama_backend.load_model,
                    gguf_path = config.gguf_file,
                    mmproj_path = config.gguf_mmproj_file,
                    model_identifier = config.identifier,
                    is_vision = config.is_vision,
                    n_ctx = request.max_seq_length,
                    chat_template_override = request.chat_template_override,
                    cache_type_kv = request.cache_type_kv,
                )

            if not success:
                raise HTTPException(
                    status_code = 500,
                    detail = f"Failed to load GGUF model: {config.display_name}",
                )

            logger.info(f"Loaded GGUF model via llama-server: {config.identifier}")

            # Detect TTS audio by probing the loaded model's vocabulary
            from utils.models import is_audio_input_type

            _gguf_audio = llama_backend.detect_audio_type()
            _gguf_is_audio = _gguf_audio in ("snac", "bicodec", "dac")
            llama_backend._is_audio = _gguf_is_audio
            llama_backend._audio_type = _gguf_audio
            if _gguf_is_audio:
                logger.info(f"GGUF model detected as audio: audio_type={_gguf_audio}")
                await asyncio.to_thread(llama_backend.init_audio_codec, _gguf_audio)

            inference_config = load_inference_config(config.identifier)

            return LoadResponse(
                status = "loaded",
                model = config.identifier,
                display_name = config.display_name,
                is_vision = config.is_vision,
                is_lora = False,
                is_gguf = True,
                is_audio = _gguf_is_audio,
                audio_type = _gguf_audio,
                has_audio_input = is_audio_input_type(_gguf_audio),
                inference = inference_config,
                context_length = llama_backend.context_length,
                supports_reasoning = llama_backend.supports_reasoning,
                supports_tools = llama_backend.supports_tools,
                cache_type_kv = llama_backend.cache_type_kv,
                chat_template = llama_backend.chat_template,
            )

        # ── Standard path: load via Unsloth/transformers ──────────
        backend = get_inference_backend()

        # Unload any active GGUF model first
        llama_backend = get_llama_cpp_backend()
        if llama_backend.is_loaded:
            logger.info("Unloading GGUF model before loading Unsloth model")
            llama_backend.unload_model()

        # Shut down any export subprocess to free VRAM
        try:
            from core.export import get_export_backend

            exp_backend = get_export_backend()
            if exp_backend.current_checkpoint:
                logger.info(
                    "Shutting down export subprocess to free GPU memory for inference"
                )
                exp_backend._shutdown_subprocess()
                exp_backend.current_checkpoint = None
                exp_backend.is_vision = False
                exp_backend.is_peft = False
        except Exception as e:
            logger.warning("Could not shut down export subprocess: %s", e)

        # Auto-detect quantization for LoRA adapters from adapter_config.json
        # The training pipeline patches this file with "unsloth_training_method"
        # which is 'qlora' or 'lora'. Only LoRA (16-bit) needs load_in_4bit=False.
        load_in_4bit = request.load_in_4bit
        if config.is_lora and config.path:
            import json
            from pathlib import Path

            adapter_cfg_path = Path(config.path) / "adapter_config.json"
            if adapter_cfg_path.exists():
                try:
                    with open(adapter_cfg_path) as f:
                        adapter_cfg = json.load(f)
                    training_method = adapter_cfg.get("unsloth_training_method")
                    if training_method == "lora" and load_in_4bit:
                        logger.info(
                            f"adapter_config.json says unsloth_training_method='lora' — "
                            f"setting load_in_4bit=False to match 16-bit training"
                        )
                        load_in_4bit = False
                    elif training_method == "qlora" and not load_in_4bit:
                        logger.info(
                            f"adapter_config.json says unsloth_training_method='qlora' — "
                            f"setting load_in_4bit=True to match QLoRA training"
                        )
                        load_in_4bit = True
                    elif training_method:
                        logger.info(
                            f"Training method: {training_method}, load_in_4bit={load_in_4bit}"
                        )
                    else:
                        # No unsloth_training_method — fallback to base model name
                        if (
                            config.base_model
                            and "-bnb-4bit" not in config.base_model.lower()
                            and load_in_4bit
                        ):
                            logger.info(
                                f"No unsloth_training_method in adapter_config.json. "
                                f"Base model '{config.base_model}' has no -bnb-4bit suffix — "
                                f"setting load_in_4bit=False"
                            )
                            load_in_4bit = False
                except Exception as e:
                    logger.warning(f"Could not read adapter_config.json: {e}")

        # Load the model in a thread so the event loop stays free
        # for download progress polling and other requests.
        success = await asyncio.to_thread(
            backend.load_model,
            config = config,
            max_seq_length = request.max_seq_length,
            load_in_4bit = load_in_4bit,
            hf_token = request.hf_token,
            trust_remote_code = request.trust_remote_code,
        )

        if not success:
            # Check if YAML says this model needs trust_remote_code
            if not request.trust_remote_code:
                model_defaults = load_model_defaults(config.identifier)
                yaml_trust = model_defaults.get("inference", {}).get(
                    "trust_remote_code", False
                )
                if yaml_trust:
                    raise HTTPException(
                        status_code = 400,
                        detail = (
                            f"Model '{config.display_name}' requires trust_remote_code to be enabled. "
                            f"Please enable 'Trust remote code' in Chat Settings and try again."
                        ),
                    )
            raise HTTPException(
                status_code = 500, detail = f"Failed to load model: {config.display_name}"
            )

        logger.info(f"Loaded model: {config.identifier}")

        # Load inference configuration parameters
        inference_config = load_inference_config(config.identifier)

        # Get chat template from tokenizer
        _chat_template = None
        try:
            _model_info = backend.models.get(config.identifier, {})
            _tpl_info = _model_info.get("chat_template_info", {})
            _chat_template = _tpl_info.get("template")
        except Exception:
            pass

        return LoadResponse(
            status = "loaded",
            model = config.identifier,
            display_name = config.display_name,
            is_vision = config.is_vision,
            is_lora = config.is_lora,
            is_gguf = False,
            is_audio = config.is_audio,
            audio_type = config.audio_type,
            has_audio_input = config.has_audio_input,
            inference = inference_config,
            chat_template = _chat_template,
        )

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error loading model: {e}", exc_info = True)
        msg = str(e)
        # Surface a friendlier message for models that Unsloth cannot load
        not_supported_hints = [
            "No config file found",
            "not yet supported",
            "is not supported",
            "does not support",
        ]
        if any(h.lower() in msg.lower() for h in not_supported_hints):
            msg = f"This model is not supported yet. Try a different model. (Original error: {msg})"
        raise HTTPException(status_code = 500, detail = f"Failed to load model: {msg}")


@router.post("/validate", response_model = ValidateModelResponse)
async def validate_model(
    request: ValidateModelRequest,
    current_subject: str = Depends(get_current_subject),
):
    """
    Lightweight validation endpoint for model identifiers.

    This checks that ModelConfig.from_identifier() can resolve the given
    model_path, but it does NOT actually load model weights into GPU memory.
    """
    try:
        config = ModelConfig.from_identifier(
            model_id = request.model_path,
            hf_token = request.hf_token,
            gguf_variant = request.gguf_variant,
        )

        if not config:
            raise HTTPException(
                status_code = 400,
                detail = f"Invalid model identifier: {request.model_path}",
            )

        return ValidateModelResponse(
            valid = True,
            message = "Model identifier is valid.",
            identifier = config.identifier,
            display_name = getattr(config, "display_name", config.identifier),
            is_gguf = getattr(config, "is_gguf", False),
            is_lora = getattr(config, "is_lora", False),
            is_vision = getattr(config, "is_vision", False),
        )

    except HTTPException:
        raise
    except Exception as e:
        logger.error(
            f"Error validating model identifier '{request.model_path}': {e}",
            exc_info = True,
        )
        raise HTTPException(
            status_code = 400,
            detail = f"Invalid model: {str(e)}",
        )


@router.post("/unload", response_model = UnloadResponse)
async def unload_model(
    request: UnloadRequest,
    current_subject: str = Depends(get_current_subject),
):
    """
    Unload a model from memory.
    Routes to the correct backend (llama-server for GGUF, Unsloth otherwise).
    """
    try:
        # Check if the GGUF backend has this model loaded or is loading it
        llama_backend = get_llama_cpp_backend()
        if llama_backend.is_active and (
            llama_backend.model_identifier == request.model_path
            or not llama_backend.is_loaded
        ):
            llama_backend.unload_model()
            logger.info(f"Unloaded GGUF model: {request.model_path}")
            return UnloadResponse(status = "unloaded", model = request.model_path)

        # Otherwise, unload from Unsloth backend
        backend = get_inference_backend()
        backend.unload_model(request.model_path)
        logger.info(f"Unloaded model: {request.model_path}")
        return UnloadResponse(status = "unloaded", model = request.model_path)

    except Exception as e:
        logger.error(f"Error unloading model: {e}", exc_info = True)
        raise HTTPException(status_code = 500, detail = f"Failed to unload model: {str(e)}")


@router.post("/generate/stream")
async def generate_stream(
    request: GenerateRequest,
    current_subject: str = Depends(get_current_subject),
):
    """
    Generate a chat response with Server-Sent Events (SSE) streaming.

    For vision models, provide image_base64 with the base64-encoded image.
    """
    backend = get_inference_backend()

    if not backend.active_model_name:
        raise HTTPException(
            status_code = 400, detail = "No model loaded. Call POST /inference/load first."
        )

    # Decode image if provided (for vision models)
    image = None
    if request.image_base64:
        try:
            import base64
            from PIL import Image
            from io import BytesIO

            # Check if current model supports vision
            model_info = backend.models.get(backend.active_model_name, {})
            if not model_info.get("is_vision"):
                raise HTTPException(
                    status_code = 400,
                    detail = "Image provided but current model is text-only. Load a vision model.",
                )

            image_data = base64.b64decode(request.image_base64)
            image = Image.open(BytesIO(image_data))
            image = backend.resize_image(image)

        except HTTPException:
            raise
        except Exception as e:
            raise HTTPException(
                status_code = 400, detail = f"Failed to decode image: {str(e)}"
            )

    async def stream():
        try:
            for chunk in backend.generate_chat_response(
                messages = request.messages,
                system_prompt = request.system_prompt,
                image = image,
                temperature = request.temperature,
                top_p = request.top_p,
                top_k = request.top_k,
                max_new_tokens = request.max_new_tokens,
                repetition_penalty = request.repetition_penalty,
            ):
                yield f"data: {json.dumps({'content': chunk})}\n\n"
            yield "data: [DONE]\n\n"

        except Exception as e:
            backend.reset_generation_state()
            logger.error(f"Error during generation: {e}", exc_info = True)
            yield f"data: {json.dumps({'error': 'An internal error occurred'})}\n\n"

    return StreamingResponse(
        stream(),
        media_type = "text/event-stream",
        headers = {
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
        },
    )


@router.get("/status", response_model = InferenceStatusResponse)
async def get_status(
    current_subject: str = Depends(get_current_subject),
):
    """
    Get current inference backend status.
    Reports whichever backend (Unsloth or llama-server) is currently active.
    """
    try:
        llama_backend = get_llama_cpp_backend()

        # If a GGUF model is loaded via llama-server, report that
        if llama_backend.is_loaded:
            _model_id = llama_backend.model_identifier
            _inference_cfg = load_inference_config(_model_id) if _model_id else None
            return InferenceStatusResponse(
                active_model = _model_id,
                is_vision = llama_backend.is_vision,
                is_gguf = True,
                gguf_variant = llama_backend.hf_variant,
                is_audio = getattr(llama_backend, "_is_audio", False),
                audio_type = getattr(llama_backend, "_audio_type", None),
                loading = [],
                loaded = [_model_id],
                inference = _inference_cfg,
                supports_reasoning = llama_backend.supports_reasoning,
                supports_tools = llama_backend.supports_tools,
                context_length = llama_backend.context_length,
            )

        # Otherwise, report Unsloth backend status
        backend = get_inference_backend()

        is_vision = False
        is_audio = False
        audio_type = None
        has_audio_input = False
        if backend.active_model_name:
            model_info = backend.models.get(backend.active_model_name, {})
            is_vision = model_info.get("is_vision", False)
            is_audio = model_info.get("is_audio", False)
            audio_type = model_info.get("audio_type")
            has_audio_input = model_info.get("has_audio_input", False)

        # gpt-oss safetensors models support reasoning via harmony channels
        supports_reasoning = False
        if backend.active_model_name and hasattr(backend, "_is_gpt_oss_model"):
            supports_reasoning = backend._is_gpt_oss_model()

        return InferenceStatusResponse(
            active_model = backend.active_model_name,
            is_vision = is_vision,
            is_gguf = False,
            is_audio = is_audio,
            audio_type = audio_type,
            has_audio_input = has_audio_input,
            loading = list(getattr(backend, "loading_models", set())),
            loaded = list(backend.models.keys()),
            supports_reasoning = supports_reasoning,
        )

    except Exception as e:
        logger.error(f"Error getting status: {e}", exc_info = True)
        raise HTTPException(status_code = 500, detail = f"Failed to get status: {str(e)}")


# =====================================================================
# Audio (TTS) Generation  (/audio/generate)
# =====================================================================


@router.post("/audio/generate")
async def generate_audio(
    payload: ChatCompletionRequest,
    request: Request,
    current_subject: str = Depends(get_current_subject),
):
    """
    Generate audio (TTS) from the latest user message.
    Returns a JSON response with base64-encoded WAV audio.
    Works with both GGUF (llama-server) and Unsloth/transformers backends.
    """
    import base64

    # Extract text from the last user message
    _, chat_messages, _ = _extract_content_parts(payload.messages)
    if not chat_messages:
        raise HTTPException(status_code = 400, detail = "No messages provided.")
    last_user_msg = next(
        (m for m in reversed(chat_messages) if m["role"] == "user"), None
    )
    if not last_user_msg:
        raise HTTPException(status_code = 400, detail = "No user message found.")
    text = last_user_msg["content"]

    # Pick backend — both return (wav_bytes, sample_rate)
    llama_backend = get_llama_cpp_backend()
    if llama_backend.is_loaded and getattr(llama_backend, "_is_audio", False):
        model_name = llama_backend.model_identifier
        gen = lambda: llama_backend.generate_audio_response(
            text = text,
            audio_type = llama_backend._audio_type,
            temperature = payload.temperature,
            top_p = payload.top_p,
            top_k = payload.top_k,
            min_p = payload.min_p,
            max_new_tokens = payload.max_tokens or 2048,
            repetition_penalty = payload.repetition_penalty,
        )
    else:
        backend = get_inference_backend()
        if not backend.active_model_name:
            raise HTTPException(status_code = 400, detail = "No model loaded.")
        model_info = backend.models.get(backend.active_model_name, {})
        if not model_info.get("is_audio"):
            raise HTTPException(
                status_code = 400, detail = "Active model is not an audio model."
            )
        model_name = backend.active_model_name
        gen = lambda: backend.generate_audio_response(
            text = text,
            temperature = payload.temperature,
            top_p = payload.top_p,
            top_k = payload.top_k,
            min_p = payload.min_p,
            max_new_tokens = payload.max_tokens or 2048,
            repetition_penalty = payload.repetition_penalty,
            use_adapter = payload.use_adapter,
        )

    try:
        wav_bytes, sample_rate = await asyncio.get_event_loop().run_in_executor(
            None, gen
        )
    except Exception as e:
        logger.error(f"Audio generation error: {e}", exc_info = True)
        raise HTTPException(status_code = 500, detail = str(e))

    audio_b64 = base64.b64encode(wav_bytes).decode("ascii")
    return JSONResponse(
        content = {
            "id": f"chatcmpl-{uuid.uuid4().hex[:12]}",
            "object": "chat.completion.audio",
            "model": model_name,
            "audio": {"data": audio_b64, "format": "wav", "sample_rate": sample_rate},
            "choices": [
                {
                    "index": 0,
                    "message": {
                        "role": "assistant",
                        "content": f'[Generated audio from: "{text[:100]}"]',
                    },
                    "finish_reason": "stop",
                }
            ],
        }
    )


# =====================================================================
# OpenAI-Compatible Chat Completions  (/chat/completions)
# =====================================================================


def _decode_audio_base64(b64: str) -> np.ndarray:
    """Decode base64 audio (any format) → float32 numpy array at 16kHz."""
    import torch
    import torchaudio
    import tempfile
    import os
    from utils.paths import ensure_dir, tmp_root

    raw = base64.b64decode(b64)
    # torchaudio.load needs a file path or file-like object with format hint
    # Write to a temp file so torchaudio can auto-detect the format
    with tempfile.NamedTemporaryFile(
        suffix = ".audio",
        delete = False,
        dir = str(ensure_dir(tmp_root())),
    ) as tmp:
        tmp.write(raw)
        tmp_path = tmp.name
    try:
        waveform, sr = torchaudio.load(tmp_path)
    finally:
        os.unlink(tmp_path)

    # Convert to mono if stereo
    if waveform.shape[0] > 1:
        waveform = waveform.mean(dim = 0, keepdim = True)

    # Resample to 16kHz if needed
    if sr != 16000:
        resampler = torchaudio.transforms.Resample(orig_freq = sr, new_freq = 16000)
        waveform = resampler(waveform)

    return waveform.squeeze(0).numpy()


def _extract_content_parts(
    messages: list,
) -> tuple[str, list[dict], "Optional[str]"]:
    """
    Parse OpenAI-format messages into components the inference backend expects.

    Handles both plain-string ``content`` and multimodal content-part arrays
    (``[{type: "text", ...}, {type: "image_url", ...}]``).

    Returns:
        system_prompt:  The system message text (empty string if none provided).
        chat_messages:  Non-system messages with content flattened to strings.
        image_base64:   Base64 data of the *first* image found, or ``None``.
    """
    system_prompt = ""
    chat_messages: list[dict] = []
    first_image_b64: Optional[str] = None

    for msg in messages:
        # ── System messages → extract as system_prompt ────────
        if msg.role == "system":
            if isinstance(msg.content, str):
                system_prompt = msg.content
            elif isinstance(msg.content, list):
                # Unlikely but handle: join text parts
                system_prompt = "\n".join(
                    p.text for p in msg.content if p.type == "text"
                )
            continue

        # ── User / assistant messages ─────────────────────────
        if isinstance(msg.content, str):
            # Plain string content — pass through
            chat_messages.append({"role": msg.role, "content": msg.content})
        elif isinstance(msg.content, list):
            # Multimodal content parts
            text_parts: list[str] = []
            for part in msg.content:
                if part.type == "text":
                    text_parts.append(part.text)
                elif part.type == "image_url" and first_image_b64 is None:
                    url = part.image_url.url
                    if url.startswith("data:"):
                        # data:image/png;base64,<DATA> → extract <DATA>
                        first_image_b64 = url.split(",", 1)[1] if "," in url else None
                    else:
                        logger.warning(
                            f"Remote image URLs not yet supported: {url[:80]}..."
                        )
            combined_text = "\n".join(text_parts) if text_parts else ""
            chat_messages.append({"role": msg.role, "content": combined_text})

    return system_prompt, chat_messages, first_image_b64


@router.post("/chat/completions")
async def openai_chat_completions(
    payload: ChatCompletionRequest,
    request: Request,
    current_subject: str = Depends(get_current_subject),
):
    """
    OpenAI-compatible chat completions endpoint.

    Supports multimodal messages: ``content`` may be a plain string or a
    list of content parts (``text`` / ``image_url``).

    Streaming (default):  returns SSE chunks matching OpenAI's format.
    Non-streaming:        returns a single ChatCompletion JSON object.

    Automatically routes to the correct backend:
    - GGUF models → llama-server via LlamaCppBackend
    - Other models → Unsloth/transformers via InferenceBackend
    """
    llama_backend = get_llama_cpp_backend()
    using_gguf = llama_backend.is_loaded

    # ── Determine which backend is active ─────────────────────
    if using_gguf:
        model_name = llama_backend.model_identifier or payload.model
        if getattr(llama_backend, "_is_audio", False):
            return await generate_audio(payload, request)
    else:
        backend = get_inference_backend()
        if not backend.active_model_name:
            raise HTTPException(
                status_code = 400,
                detail = "No model loaded. Call POST /inference/load first.",
            )
        model_name = backend.active_model_name or payload.model

        # ── Audio TTS path: auto-route to audio generation ────
        # (Whisper is ASR not TTS — handled below in audio input path)
        model_info = backend.models.get(backend.active_model_name, {})
        if model_info.get("is_audio") and model_info.get("audio_type") != "whisper":
            return await generate_audio(payload, request)

        # ── Whisper without audio: return clear error ──
        if model_info.get("audio_type") == "whisper" and not payload.audio_base64:
            raise HTTPException(
                status_code = 400,
                detail = "Whisper models require audio input. Please upload an audio file.",
            )

        # ── Audio INPUT path: decode WAV and route to audio input generation ──
        if payload.audio_base64 and model_info.get("has_audio_input"):
            audio_array = _decode_audio_base64(payload.audio_base64)
            system_prompt, chat_messages, _ = _extract_content_parts(payload.messages)
            cancel_event = threading.Event()
            completion_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
            created = int(time.time())

            def audio_input_generate():
                if model_info.get("audio_type") == "whisper":
                    return backend.generate_whisper_response(
                        audio_array = audio_array,
                        cancel_event = cancel_event,
                    )
                return backend.generate_audio_input_response(
                    messages = chat_messages,
                    system_prompt = system_prompt,
                    audio_array = audio_array,
                    temperature = payload.temperature,
                    top_p = payload.top_p,
                    top_k = payload.top_k,
                    min_p = payload.min_p,
                    max_new_tokens = payload.max_tokens or 2048,
                    repetition_penalty = payload.repetition_penalty,
                    cancel_event = cancel_event,
                )

            if payload.stream:

                async def audio_input_stream():
                    try:
                        first_chunk = ChatCompletionChunk(
                            id = completion_id,
                            created = created,
                            model = model_name,
                            choices = [
                                ChunkChoice(
                                    delta = ChoiceDelta(role = "assistant"),
                                    finish_reason = None,
                                )
                            ],
                        )
                        yield f"data: {first_chunk.model_dump_json(exclude_none = True)}\n\n"

                        for chunk_text in audio_input_generate():
                            if await request.is_disconnected():
                                cancel_event.set()
                                return
                            if chunk_text:
                                chunk = ChatCompletionChunk(
                                    id = completion_id,
                                    created = created,
                                    model = model_name,
                                    choices = [
                                        ChunkChoice(
                                            delta = ChoiceDelta(content = chunk_text),
                                            finish_reason = None,
                                        )
                                    ],
                                )
                                yield f"data: {chunk.model_dump_json(exclude_none = True)}\n\n"

                        final_chunk = ChatCompletionChunk(
                            id = completion_id,
                            created = created,
                            model = model_name,
                            choices = [
                                ChunkChoice(delta = ChoiceDelta(), finish_reason = "stop")
                            ],
                        )
                        yield f"data: {final_chunk.model_dump_json(exclude_none = True)}\n\n"
                        yield "data: [DONE]\n\n"
                    except asyncio.CancelledError:
                        cancel_event.set()
                        raise
                    except Exception as e:
                        logger.error(
                            f"Error during audio input streaming: {e}", exc_info = True
                        )
                        yield f"data: {json.dumps({'error': {'message': 'An internal error occurred', 'type': 'server_error'}})}\n\n"

                return StreamingResponse(
                    audio_input_stream(),
                    media_type = "text/event-stream",
                    headers = {
                        "Cache-Control": "no-cache",
                        "Connection": "keep-alive",
                        "X-Accel-Buffering": "no",
                    },
                )
            else:
                full_text = "".join(audio_input_generate())
                response = ChatCompletion(
                    id = completion_id,
                    created = created,
                    model = model_name,
                    choices = [
                        CompletionChoice(
                            message = CompletionMessage(content = full_text),
                            finish_reason = "stop",
                        )
                    ],
                )
                return JSONResponse(content = response.model_dump())

    # ── Parse messages (handles multimodal content parts) ─────
    system_prompt, chat_messages, extracted_image_b64 = _extract_content_parts(
        payload.messages
    )

    if not chat_messages:
        raise HTTPException(
            status_code = 400,
            detail = "At least one non-system message is required.",
        )

    # ── GGUF path: proxy to llama-server /v1/chat/completions ──
    if using_gguf:
        # Reject images if this GGUF model doesn't support vision
        image_b64 = extracted_image_b64 or payload.image_base64
        if image_b64 and not llama_backend.is_vision:
            raise HTTPException(
                status_code = 400,
                detail = "Image provided but current GGUF model does not support vision.",
            )

        # Convert image to PNG for llama-server (stb_image has limited format support)
        if image_b64:
            try:
                import base64 as _b64
                from io import BytesIO as _BytesIO
                from PIL import Image as _Image

                raw = _b64.b64decode(image_b64)
                img = _Image.open(_BytesIO(raw))
                if img.mode == "RGBA":
                    img = img.convert("RGB")
                buf = _BytesIO()
                img.save(buf, format = "PNG")
                image_b64 = _b64.b64encode(buf.getvalue()).decode("ascii")
            except Exception as e:
                raise HTTPException(
                    status_code = 400, detail = f"Failed to process image: {e}"
                )

        # Build message list with system prompt prepended
        gguf_messages = []
        if system_prompt:
            gguf_messages.append({"role": "system", "content": system_prompt})
        gguf_messages.extend(chat_messages)

        cancel_event = threading.Event()

        completion_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
        created = int(time.time())

        # ── Tool-calling path (agentic loop) ──────────────────
        use_tools = (
            payload.enable_tools and llama_backend.supports_tools and not image_b64
        )

        if use_tools:
            from core.inference.tools import ALL_TOOLS

            if payload.enabled_tools is not None:
                tools_to_use = [
                    t
                    for t in ALL_TOOLS
                    if t["function"]["name"] in payload.enabled_tools
                ]
            else:
                tools_to_use = ALL_TOOLS

            def gguf_generate_with_tools():
                return llama_backend.generate_chat_completion_with_tools(
                    messages = gguf_messages,
                    tools = tools_to_use,
                    temperature = payload.temperature,
                    top_p = payload.top_p,
                    top_k = payload.top_k,
                    min_p = payload.min_p,
                    max_tokens = payload.max_tokens,
                    repetition_penalty = payload.repetition_penalty,
                    presence_penalty = payload.presence_penalty,
                    cancel_event = cancel_event,
                    enable_thinking = payload.enable_thinking,
                    auto_heal_tool_calls = payload.auto_heal_tool_calls
                    if payload.auto_heal_tool_calls is not None
                    else True,
                    max_tool_iterations = payload.max_tool_calls_per_message
                    if payload.max_tool_calls_per_message is not None
                    else 10,
                    tool_call_timeout = payload.tool_call_timeout
                    if payload.tool_call_timeout is not None
                    else 300,
                    session_id = payload.session_id,
                )

            _tool_sentinel = object()

            async def gguf_tool_stream():
                try:
                    first_chunk = ChatCompletionChunk(
                        id = completion_id,
                        created = created,
                        model = model_name,
                        choices = [
                            ChunkChoice(
                                delta = ChoiceDelta(role = "assistant"),
                                finish_reason = None,
                            )
                        ],
                    )
                    yield f"data: {first_chunk.model_dump_json(exclude_none = True)}\n\n"

                    # Iterate the synchronous generator in a thread so
                    # the event loop stays free for disconnect detection.
                    gen = gguf_generate_with_tools()
                    prev_text = ""
                    while True:
                        if await request.is_disconnected():
                            cancel_event.set()
                            return

                        event = await asyncio.to_thread(next, gen, _tool_sentinel)
                        if event is _tool_sentinel:
                            break

                        if event["type"] == "status":
                            # Emit tool status as a custom SSE event
                            status_data = json.dumps(
                                {
                                    "type": "tool_status",
                                    "content": event["text"],
                                }
                            )
                            yield f"data: {status_data}\n\n"
                            continue

                        if event["type"] in ("tool_start", "tool_end"):
                            yield f"data: {json.dumps(event)}\n\n"
                            continue

                        # "content" type -- cumulative text
                        cumulative = event.get("text", "")
                        new_text = cumulative[len(prev_text) :]
                        prev_text = cumulative
                        if not new_text:
                            continue
                        chunk = ChatCompletionChunk(
                            id = completion_id,
                            created = created,
                            model = model_name,
                            choices = [
                                ChunkChoice(
                                    delta = ChoiceDelta(content = new_text),
                                    finish_reason = None,
                                )
                            ],
                        )
                        yield f"data: {chunk.model_dump_json(exclude_none = True)}\n\n"

                    final_chunk = ChatCompletionChunk(
                        id = completion_id,
                        created = created,
                        model = model_name,
                        choices = [
                            ChunkChoice(
                                delta = ChoiceDelta(),
                                finish_reason = "stop",
                            )
                        ],
                    )
                    yield f"data: {final_chunk.model_dump_json(exclude_none = True)}\n\n"
                    yield "data: [DONE]\n\n"

                except asyncio.CancelledError:
                    cancel_event.set()
                    raise
                except Exception as e:
                    import traceback

                    tb = traceback.format_exc()
                    logger.error(f"Error during GGUF tool streaming: {e}\n{tb}")
                    error_chunk = {
                        "error": {
                            "message": "An internal error occurred",
                            "type": "server_error",
                        },
                    }
                    yield f"data: {json.dumps(error_chunk)}\n\n"

            return StreamingResponse(
                gguf_tool_stream(),
                media_type = "text/event-stream",
                headers = {
                    "Cache-Control": "no-cache",
                    "Connection": "keep-alive",
                    "X-Accel-Buffering": "no",
                },
            )

        # ── Standard GGUF path (no tools) ─────────────────────

        def gguf_generate():
            return llama_backend.generate_chat_completion(
                messages = gguf_messages,
                image_b64 = image_b64,
                temperature = payload.temperature,
                top_p = payload.top_p,
                top_k = payload.top_k,
                min_p = payload.min_p,
                max_tokens = payload.max_tokens,
                repetition_penalty = payload.repetition_penalty,
                presence_penalty = payload.presence_penalty,
                cancel_event = cancel_event,
                enable_thinking = payload.enable_thinking,
            )

        _gguf_sentinel = object()

        if payload.stream:

            async def gguf_stream_chunks():
                try:
                    # First chunk: role
                    first_chunk = ChatCompletionChunk(
                        id = completion_id,
                        created = created,
                        model = model_name,
                        choices = [
                            ChunkChoice(
                                delta = ChoiceDelta(role = "assistant"),
                                finish_reason = None,
                            )
                        ],
                    )
                    yield f"data: {first_chunk.model_dump_json(exclude_none = True)}\n\n"

                    # Iterate the synchronous generator in a thread so
                    # the event loop stays free for disconnect detection.
                    gen = gguf_generate()
                    prev_text = ""
                    while True:
                        if await request.is_disconnected():
                            cancel_event.set()
                            return
                        cumulative = await asyncio.to_thread(next, gen, _gguf_sentinel)
                        if cumulative is _gguf_sentinel:
                            break
                        new_text = cumulative[len(prev_text) :]
                        prev_text = cumulative
                        if not new_text:
                            continue
                        chunk = ChatCompletionChunk(
                            id = completion_id,
                            created = created,
                            model = model_name,
                            choices = [
                                ChunkChoice(
                                    delta = ChoiceDelta(content = new_text),
                                    finish_reason = None,
                                )
                            ],
                        )
                        yield f"data: {chunk.model_dump_json(exclude_none = True)}\n\n"

                    # Final chunk
                    final_chunk = ChatCompletionChunk(
                        id = completion_id,
                        created = created,
                        model = model_name,
                        choices = [
                            ChunkChoice(
                                delta = ChoiceDelta(),
                                finish_reason = "stop",
                            )
                        ],
                    )
                    yield f"data: {final_chunk.model_dump_json(exclude_none = True)}\n\n"
                    yield "data: [DONE]\n\n"

                except asyncio.CancelledError:
                    cancel_event.set()
                    raise
                except Exception as e:
                    logger.error(f"Error during GGUF streaming: {e}", exc_info = True)
                    error_chunk = {
                        "error": {
                            "message": "An internal error occurred",
                            "type": "server_error",
                        },
                    }
                    yield f"data: {json.dumps(error_chunk)}\n\n"

            return StreamingResponse(
                gguf_stream_chunks(),
                media_type = "text/event-stream",
                headers = {
                    "Cache-Control": "no-cache",
                    "Connection": "keep-alive",
                    "X-Accel-Buffering": "no",
                },
            )
        else:
            try:
                full_text = ""
                for token in gguf_generate():
                    full_text = token

                response = ChatCompletion(
                    id = completion_id,
                    created = created,
                    model = model_name,
                    choices = [
                        CompletionChoice(
                            message = CompletionMessage(content = full_text),
                            finish_reason = "stop",
                        )
                    ],
                )
                return JSONResponse(content = response.model_dump())

            except Exception as e:
                logger.error(f"Error during GGUF completion: {e}", exc_info = True)
                raise HTTPException(status_code = 500, detail = str(e))

    # ── Standard Unsloth path ─────────────────────────────────

    # Decode image (from content parts OR legacy field)
    image_b64 = extracted_image_b64 or payload.image_base64
    image = None

    if image_b64:
        try:
            import base64
            from PIL import Image
            from io import BytesIO

            model_info = backend.models.get(backend.active_model_name, {})
            if not model_info.get("is_vision"):
                raise HTTPException(
                    status_code = 400,
                    detail = "Image provided but current model is text-only. Load a vision model.",
                )

            image_data = base64.b64decode(image_b64)
            image = Image.open(BytesIO(image_data))
            image = backend.resize_image(image)

        except HTTPException:
            raise
        except Exception as e:
            raise HTTPException(status_code = 400, detail = f"Failed to decode image: {e}")

    # Shared generation kwargs
    gen_kwargs = dict(
        messages = chat_messages,
        system_prompt = system_prompt,
        image = image,
        temperature = payload.temperature,
        top_p = payload.top_p,
        top_k = payload.top_k,
        min_p = payload.min_p,
        max_new_tokens = payload.max_tokens or 2048,
        repetition_penalty = payload.repetition_penalty,
    )

    # Choose generation path (adapter-controlled or standard)
    cancel_event = threading.Event()

    if payload.use_adapter is not None:

        def generate():
            return backend.generate_with_adapter_control(
                use_adapter = payload.use_adapter,
                cancel_event = cancel_event,
                **gen_kwargs,
            )
    else:

        def generate():
            return backend.generate_chat_response(
                cancel_event = cancel_event, **gen_kwargs
            )

    completion_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
    created = int(time.time())

    # ── Streaming response ────────────────────────────────────────
    if payload.stream:

        async def stream_chunks():
            try:
                first_chunk = ChatCompletionChunk(
                    id = completion_id,
                    created = created,
                    model = model_name,
                    choices = [
                        ChunkChoice(
                            delta = ChoiceDelta(role = "assistant"),
                            finish_reason = None,
                        )
                    ],
                )
                yield f"data: {first_chunk.model_dump_json(exclude_none = True)}\n\n"

                prev_text = ""
                # Run sync generator in thread pool to avoid blocking
                # the event loop. Critical for compare mode: two SSE
                # requests arrive concurrently but the orchestrator
                # serializes them via _gen_lock. Without run_in_executor
                # the second request's blocking lock acquisition would
                # freeze the entire event loop, stalling both streams.
                _DONE = object()  # sentinel for generator exhaustion
                loop = asyncio.get_event_loop()
                gen = generate()
                while True:
                    # next(gen, _DONE) returns _DONE instead of raising
                    # StopIteration — StopIteration cannot propagate
                    # through asyncio futures (Python limitation).
                    cumulative = await loop.run_in_executor(None, next, gen, _DONE)
                    if cumulative is _DONE:
                        break
                    if await request.is_disconnected():
                        cancel_event.set()
                        backend.reset_generation_state()
                        return
                    new_text = cumulative[len(prev_text) :]
                    prev_text = cumulative
                    if not new_text:
                        continue
                    chunk = ChatCompletionChunk(
                        id = completion_id,
                        created = created,
                        model = model_name,
                        choices = [
                            ChunkChoice(
                                delta = ChoiceDelta(content = new_text),
                                finish_reason = None,
                            )
                        ],
                    )
                    yield f"data: {chunk.model_dump_json(exclude_none = True)}\n\n"

                final_chunk = ChatCompletionChunk(
                    id = completion_id,
                    created = created,
                    model = model_name,
                    choices = [
                        ChunkChoice(
                            delta = ChoiceDelta(),
                            finish_reason = "stop",
                        )
                    ],
                )
                yield f"data: {final_chunk.model_dump_json(exclude_none = True)}\n\n"
                yield "data: [DONE]\n\n"

            except asyncio.CancelledError:
                cancel_event.set()
                backend.reset_generation_state()
                raise
            except Exception as e:
                backend.reset_generation_state()
                logger.error(f"Error during OpenAI streaming: {e}", exc_info = True)
                error_chunk = {
                    "error": {
                        "message": "An internal error occurred",
                        "type": "server_error",
                    },
                }
                yield f"data: {json.dumps(error_chunk)}\n\n"

        return StreamingResponse(
            stream_chunks(),
            media_type = "text/event-stream",
            headers = {
                "Cache-Control": "no-cache",
                "Connection": "keep-alive",
                "X-Accel-Buffering": "no",
            },
        )

    # ── Non-streaming response ────────────────────────────────────
    else:
        try:
            full_text = ""
            for token in generate():
                full_text = token

            response = ChatCompletion(
                id = completion_id,
                created = created,
                model = model_name,
                choices = [
                    CompletionChoice(
                        message = CompletionMessage(content = full_text),
                        finish_reason = "stop",
                    )
                ],
            )
            return JSONResponse(content = response.model_dump())

        except Exception as e:
            backend.reset_generation_state()
            logger.error(f"Error during OpenAI completion: {e}", exc_info = True)
            raise HTTPException(status_code = 500, detail = str(e))


# =====================================================================
# OpenAI-Compatible Models Listing  (/models → /v1/models)
# =====================================================================


@router.get("/models")
async def openai_list_models(
    current_subject: str = Depends(get_current_subject),
):
    """
    OpenAI-compatible model listing endpoint.

    Returns the currently loaded model in the format expected by
    OpenAI-compatible clients (``GET /v1/models``).
    """
    models = []

    # Check GGUF backend
    llama_backend = get_llama_cpp_backend()
    if llama_backend.is_loaded:
        models.append(
            {
                "id": llama_backend.model_identifier,
                "object": "model",
                "owned_by": "local",
            }
        )

    # Check Unsloth backend
    backend = get_inference_backend()
    if backend.active_model_name:
        models.append(
            {
                "id": backend.active_model_name,
                "object": "model",
                "owned_by": "local",
            }
        )

    return {"object": "list", "data": models}


================================================
FILE: studio/backend/routes/models.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Model Management API routes
"""

import os
import sys
from pathlib import Path
from fastapi import APIRouter, Body, Depends, HTTPException, Query
from typing import List, Optional
import structlog
from loggers import get_logger

import re as _re

_VALID_REPO_ID = _re.compile(r"^[A-Za-z0-9._-]+/[A-Za-z0-9._-]+$")


def _is_valid_repo_id(repo_id: str) -> bool:
    return bool(_VALID_REPO_ID.fullmatch(repo_id))


# Add backend directory to path
backend_path = Path(__file__).parent.parent.parent
if str(backend_path) not in sys.path:
    sys.path.insert(0, str(backend_path))

from auth.authentication import get_current_subject

# Import backend functions
try:
    from utils.models import (
        scan_trained_loras,
        scan_exported_models,
        load_model_defaults,
        get_base_model_from_lora,
        is_vision_model,
        is_embedding_model,
        scan_checkpoints,
        list_gguf_variants,
        ModelConfig,
    )
    from utils.models.model_config import (
        _pick_best_gguf,
        _extract_quant_label,
        is_audio_input_type,
    )
    from core.inference import get_inference_backend
    from utils.paths import (
        outputs_root,
        exports_root,
        resolve_output_dir,
        resolve_export_dir,
    )
except ImportError:
    # Fallback: try to import from parent directory
    parent_backend = backend_path.parent / "backend"
    if str(parent_backend) not in sys.path:
        sys.path.insert(0, str(parent_backend))
    from utils.models import (
        scan_trained_loras,
        scan_exported_models,
        load_model_defaults,
        get_base_model_from_lora,
        is_vision_model,
        is_embedding_model,
        scan_checkpoints,
        list_gguf_variants,
        ModelConfig,
    )
    from utils.models.model_config import (
        _pick_best_gguf,
        _extract_quant_label,
        is_audio_input_type,
    )
    from core.inference import get_inference_backend
    from utils.paths import (
        outputs_root,
        exports_root,
        resolve_output_dir,
        resolve_export_dir,
    )

from models import (
    CheckpointInfo,
    CheckpointListResponse,
    LocalModelInfo,
    LocalModelListResponse,
    ModelCheckpoints,
    ModelDetails,
    LoRAScanResponse,
    LoRAInfo,
    ModelListResponse,
)
from models.models import GgufVariantDetail, GgufVariantsResponse, ModelType
from models.responses import (
    LoRABaseModelResponse,
    VisionCheckResponse,
    EmbeddingCheckResponse,
)

router = APIRouter()
logger = get_logger(__name__)


def derive_model_type(
    is_vision: bool, audio_type: Optional[str], is_embedding: bool = False
) -> ModelType:
    """Collapse individual capability flags into a single model modality string."""
    if is_embedding:
        return "embeddings"
    if audio_type is not None:
        return "audio"
    if is_vision:
        return "vision"
    return "text"


def _resolve_hf_cache_dir() -> Path:
    """Resolve local HF cache root used by hub downloads."""
    try:
        from huggingface_hub.constants import HF_HUB_CACHE

        return Path(HF_HUB_CACHE)
    except Exception:
        return Path.home() / ".cache" / "huggingface" / "hub"


def _scan_models_dir(models_dir: Path) -> List[LocalModelInfo]:
    if not models_dir.exists() or not models_dir.is_dir():
        return []

    found: List[LocalModelInfo] = []
    for child in models_dir.iterdir():
        if not child.is_dir():
            continue
        has_model_files = (
            (child / "config.json").exists()
            or (child / "adapter_config.json").exists()
            or any(child.glob("*.safetensors"))
            or any(child.glob("*.bin"))
            or any(child.glob("*.gguf"))
        )
        if not has_model_files:
            continue
        try:
            updated_at = child.stat().st_mtime
        except OSError:
            updated_at = None
        found.append(
            LocalModelInfo(
                id = str(child),
                display_name = child.name,
                path = str(child),
                source = "models_dir",
                updated_at = updated_at,
            ),
        )
    # Also scan for standalone .gguf files directly in the models directory
    for gguf_file in models_dir.glob("*.gguf"):
        if gguf_file.is_file():
            try:
                updated_at = gguf_file.stat().st_mtime
            except OSError:
                updated_at = None
            found.append(
                LocalModelInfo(
                    id = str(gguf_file),
                    display_name = gguf_file.stem,
                    path = str(gguf_file),
                    source = "models_dir",
                    updated_at = updated_at,
                ),
            )

    return found


def _scan_hf_cache(cache_dir: Path) -> List[LocalModelInfo]:
    if not cache_dir.exists() or not cache_dir.is_dir():
        return []

    found: List[LocalModelInfo] = []
    for repo_dir in cache_dir.glob("models--*"):
        if not repo_dir.is_dir():
            continue

        repo_name = repo_dir.name[len("models--") :]
        if not repo_name:
            continue
        model_id = repo_name.replace("--", "/")

        try:
            updated_at = repo_dir.stat().st_mtime
        except OSError:
            updated_at = None

        found.append(
            LocalModelInfo(
                id = model_id,
                model_id = model_id,
                display_name = model_id.split("/")[-1],
                path = str(repo_dir),
                source = "hf_cache",
                updated_at = updated_at,
            ),
        )
    return found


@router.get("/local", response_model = LocalModelListResponse)
async def list_local_models(
    models_dir: str = Query(
        default = "./models", description = "Directory to scan for local model folders"
    ),
    current_subject: str = Depends(get_current_subject),
):
    """
    List local model candidates from custom models dir and HF cache.
    """
    # Validate models_dir against an allowlist of trusted directories.
    # Only the trusted Path objects are used for filesystem access -- the
    # user-supplied string is only used for matching, never for path construction.
    hf_cache_dir = _resolve_hf_cache_dir()
    allowed_roots = [Path("./models").resolve(), hf_cache_dir]
    try:
        from utils.paths import studio_root, outputs_root

        allowed_roots.extend([studio_root(), outputs_root()])
    except Exception:
        pass

    requested = os.path.realpath(os.path.expanduser(models_dir))
    models_root = None
    for root in allowed_roots:
        root_str = os.path.realpath(str(root))
        if requested == root_str or requested.startswith(root_str + os.sep):
            models_root = root  # Use the trusted root, not the user-supplied path
            break
    if models_root is None:
        raise HTTPException(
            status_code = 403,
            detail = "Directory not allowed",
        )

    try:
        local_models = _scan_models_dir(models_root) + _scan_hf_cache(hf_cache_dir)

        deduped: dict[str, LocalModelInfo] = {}
        for model in local_models:
            if model.id not in deduped:
                deduped[model.id] = model

        models = sorted(
            deduped.values(),
            key = lambda item: (item.updated_at or 0),
            reverse = True,
        )

        return LocalModelListResponse(
            models_dir = str(models_root),
            hf_cache_dir = str(hf_cache_dir),
            models = models,
        )
    except Exception as e:
        logger.error(f"Error listing local models: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500,
            detail = f"Failed to list local models: {str(e)}",
        )


@router.get("/list")
async def list_models(
    current_subject: str = Depends(get_current_subject),
):
    """
    List available models (default models and loaded models).

    This endpoint returns the default models and any currently loaded models.
    """
    try:
        inference_backend = get_inference_backend()

        # Get default models
        default_models = inference_backend.default_models

        # Get loaded models
        loaded_models = []
        for model_name, model_data in inference_backend.models.items():
            _is_vision = model_data.get("is_vision", False)
            _audio_type = model_data.get("audio_type")
            model_info = ModelDetails(
                id = model_name,
                name = model_name.split("/")[-1] if "/" in model_name else model_name,
                is_vision = _is_vision,
                is_lora = model_data.get("is_lora", False),
                is_audio = model_data.get("is_audio", False),
                audio_type = _audio_type,
                has_audio_input = model_data.get("has_audio_input", False),
                model_type = derive_model_type(_is_vision, _audio_type),
            )
            loaded_models.append(model_info)

        # Include active GGUF model (loaded via llama-server)
        from routes.inference import get_llama_cpp_backend

        llama_backend = get_llama_cpp_backend()
        if llama_backend.is_loaded and llama_backend.model_identifier:
            loaded_models.append(
                ModelDetails(
                    id = llama_backend.model_identifier,
                    name = llama_backend.model_identifier.split("/")[-1],
                    is_gguf = True,
                    is_vision = llama_backend.is_vision,
                    is_audio = getattr(llama_backend, "_is_audio", False),
                    audio_type = getattr(llama_backend, "_audio_type", None),
                )
            )

        # Combine default and loaded models
        all_models = []
        seen_ids = set()

        # Add default models
        for model_id in default_models:
            if model_id not in seen_ids:
                model_info = ModelDetails(
                    id = model_id,
                    name = model_id.split("/")[-1] if "/" in model_id else model_id,
                    is_gguf = model_id.upper().endswith("-GGUF"),
                )
                all_models.append(model_info)
                seen_ids.add(model_id)

        # Add loaded models
        for model_info in loaded_models:
            if model_info.id not in seen_ids:
                all_models.append(model_info)
                seen_ids.add(model_info.id)

        return ModelListResponse(models = all_models, default_models = default_models)

    except Exception as e:
        logger.error(f"Error listing models: {e}", exc_info = True)
        raise HTTPException(status_code = 500, detail = f"Failed to list models: {str(e)}")


def _get_max_position_embeddings(config) -> Optional[int]:
    """Extract max_position_embeddings from a model config, checking text_config fallback."""
    if hasattr(config, "max_position_embeddings"):
        return config.max_position_embeddings
    if hasattr(config, "text_config") and hasattr(
        config.text_config, "max_position_embeddings"
    ):
        return config.text_config.max_position_embeddings
    return None


def _get_model_size_bytes(
    model_name: str, hf_token: Optional[str] = None
) -> Optional[int]:
    """Get total size of model weight files from HF Hub."""
    try:
        from huggingface_hub import HfApi

        api = HfApi(token = hf_token)
        info = api.repo_info(model_name, repo_type = "model", token = hf_token)
        if not info.siblings:
            return None

        weight_exts = (".safetensors", ".bin", ".pt", ".pth", ".gguf")
        total = 0
        for sibling in info.siblings:
            if sibling.rfilename and any(
                sibling.rfilename.endswith(ext) for ext in weight_exts
            ):
                if sibling.size is not None:
                    total += sibling.size

        return total if total > 0 else None
    except Exception as e:
        logger.warning(f"Could not get model size for {model_name}: {e}")
        return None


@router.get("/config/{model_name:path}")
async def get_model_config(
    model_name: str,
    hf_token: Optional[str] = Query(None),
    current_subject: str = Depends(get_current_subject),
):
    """
    Get configuration for a specific model.

    This endpoint wraps the backend load_model_defaults function.
    """
    try:
        from utils.models.model_config import is_local_path

        if not is_local_path(model_name):
            model_name = model_name.lower()

        logger.info(f"Getting model config for: {model_name}")
        from utils.models.model_config import detect_audio_type

        # Load model defaults from backend
        config_dict = load_model_defaults(model_name)

        # Detect model capabilities (pass HF token for gated models)
        is_vision = is_vision_model(model_name)
        is_embedding = is_embedding_model(model_name, hf_token = hf_token)
        audio_type = detect_audio_type(model_name, hf_token = hf_token)

        # Check if it's a LoRA adapter
        is_lora = False
        base_model = None
        max_position_embeddings = None
        try:
            model_config = ModelConfig.from_identifier(model_name)
            is_lora = model_config.is_lora
            base_model = model_config.base_model if is_lora else None
            max_position_embeddings = _get_max_position_embeddings(model_config)
        except Exception:
            pass

        # Fallback: try AutoConfig directly if not found yet
        if max_position_embeddings is None:
            try:
                from transformers import AutoConfig as _AutoConfig

                _trust = model_name.lower().startswith("unsloth/")
                _ac = _AutoConfig.from_pretrained(
                    model_name, trust_remote_code = _trust, token = hf_token
                )
                max_position_embeddings = _get_max_position_embeddings(_ac)
            except Exception:
                pass

        logger.info(
            f"Model config result for {model_name}: is_vision={is_vision}, is_embedding={is_embedding}, audio_type={audio_type}, is_lora={is_lora}, max_position_embeddings={max_position_embeddings}"
        )
        return ModelDetails(
            id = model_name,
            model_name = model_name,
            config = config_dict,
            is_vision = is_vision,
            is_embedding = is_embedding,
            is_lora = is_lora,
            is_audio = audio_type is not None,
            audio_type = audio_type,
            has_audio_input = is_audio_input_type(audio_type),
            model_type = derive_model_type(is_vision, audio_type, is_embedding),
            base_model = base_model,
            max_position_embeddings = max_position_embeddings,
            model_size_bytes = _get_model_size_bytes(model_name, hf_token),
        )

    except Exception as e:
        logger.error(f"Error getting model config: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500, detail = f"Failed to get model config: {str(e)}"
        )


@router.get("/loras")
async def scan_loras(
    outputs_dir: str = Query(
        default = str(outputs_root()), description = "Directory to scan for LoRA adapters"
    ),
    exports_dir: str = Query(
        default = str(exports_root()), description = "Directory to scan for exported models"
    ),
    current_subject: str = Depends(get_current_subject),
):
    """
    Scan for trained LoRA adapters and exported models.

    Returns both training outputs (from outputs_dir) and exported models
    (from exports_dir) in a single list, distinguished by source field.
    """
    try:
        resolved_outputs_dir = str(resolve_output_dir(outputs_dir))
        resolved_exports_dir = str(resolve_export_dir(exports_dir))
        lora_list = []

        # Scan training outputs
        trained_loras = scan_trained_loras(outputs_dir = resolved_outputs_dir)
        for display_name, adapter_path in trained_loras:
            base_model = get_base_model_from_lora(adapter_path)
            lora_list.append(
                LoRAInfo(
                    display_name = display_name,
                    adapter_path = adapter_path,
                    base_model = base_model,
                    source = "training",
                )
            )

        # Scan exported models (merged, LoRA, base — skips GGUF)
        exported = scan_exported_models(exports_dir = resolved_exports_dir)
        for display_name, model_path, export_type, base_model in exported:
            lora_list.append(
                LoRAInfo(
                    display_name = display_name,
                    adapter_path = model_path,
                    base_model = base_model,
                    source = "exported",
                    export_type = export_type,
                )
            )

        return LoRAScanResponse(loras = lora_list, outputs_dir = resolved_outputs_dir)

    except Exception as e:
        logger.error(f"Error scanning LoRAs: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500, detail = f"Failed to scan LoRA adapters: {str(e)}"
        )


@router.get("/loras/{lora_path:path}/base-model", response_model = LoRABaseModelResponse)
async def get_lora_base_model(
    lora_path: str,
    current_subject: str = Depends(get_current_subject),
):
    """
    Get the base model for a LoRA adapter.

    This endpoint wraps the backend get_base_model_from_lora function.
    """
    try:
        base_model = get_base_model_from_lora(lora_path)

        if base_model is None:
            raise HTTPException(
                status_code = 404,
                detail = f"Could not determine base model for LoRA: {lora_path}",
            )

        return LoRABaseModelResponse(
            lora_path = lora_path,
            base_model = base_model,
        )

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error getting LoRA base model: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500, detail = f"Failed to get base model: {str(e)}"
        )


@router.get("/check-vision/{model_name:path}", response_model = VisionCheckResponse)
async def check_vision_model(
    model_name: str,
    current_subject: str = Depends(get_current_subject),
):
    """
    Check if a model is a vision model.

    This endpoint wraps the backend is_vision_model function.
    """
    try:
        logger.info(f"Checking if vision model: {model_name}")
        is_vision = is_vision_model(model_name)

        logger.info(f"Vision check result for {model_name}: is_vision={is_vision}")
        return VisionCheckResponse(
            model_name = model_name,
            is_vision = is_vision,
        )

    except Exception as e:
        logger.error(f"Error checking vision model: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500, detail = f"Failed to check vision model: {str(e)}"
        )


@router.get("/check-embedding/{model_name:path}", response_model = EmbeddingCheckResponse)
async def check_embedding_model(
    model_name: str,
    hf_token: Optional[str] = Query(None),
    current_subject: str = Depends(get_current_subject),
):
    """
    Check if a model is an embedding model.

    This endpoint wraps the backend is_embedding_model function.
    """
    try:
        logger.info(f"Checking if embedding model: {model_name}")
        is_embedding = is_embedding_model(model_name, hf_token = hf_token)

        logger.info(
            f"Embedding check result for {model_name}: is_embedding={is_embedding}"
        )
        return EmbeddingCheckResponse(
            model_name = model_name,
            is_embedding = is_embedding,
        )

    except Exception as e:
        logger.error(f"Error checking embedding model: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500, detail = f"Failed to check embedding model: {str(e)}"
        )


@router.get("/gguf-variants", response_model = GgufVariantsResponse)
async def get_gguf_variants(
    repo_id: str = Query(
        ..., description = "HuggingFace repo ID (e.g. 'unsloth/gemma-3-4b-it-GGUF')"
    ),
    hf_token: Optional[str] = Query(
        None, description = "HuggingFace token for private repos"
    ),
    current_subject: str = Depends(get_current_subject),
):
    """
    List available GGUF quantization variants for a HuggingFace repo.

    Returns all available quantization variants (Q4_K_M, Q8_0, BF16, etc.)
    with file sizes, whether the model supports vision, and the recommended
    default variant.
    """
    try:
        variants, has_vision = list_gguf_variants(repo_id, hf_token = hf_token)

        # Determine default variant
        filenames = [v.filename for v in variants]
        best = _pick_best_gguf(filenames)
        default_variant = _extract_quant_label(best) if best else None

        # Check which variants are fully downloaded in the HF cache.
        # For split GGUFs, ALL shards must be present -- sum cached bytes
        # per variant and compare against the expected total.
        # HF cache dir uses the exact case from the repo_id at download time,
        # which may differ from the canonical HF repo_id, so do a
        # case-insensitive match.
        cached_bytes_by_quant: dict[str, int] = {}
        try:
            import re as _re
            from huggingface_hub import constants as hf_constants

            # Sanitize repo_id: must be "owner/name" with safe chars only
            if not _is_valid_repo_id(repo_id):
                raise ValueError(f"Invalid repo_id format: {repo_id}")

            cache_dir = Path(hf_constants.HF_HUB_CACHE)
            target = f"models--{repo_id.replace('/', '--')}".lower()
            for entry in cache_dir.iterdir():
                if entry.name.lower() == target:
                    snapshots = entry / "snapshots"
                    if snapshots.is_dir():
                        for snap in snapshots.iterdir():
                            for f in snap.rglob("*.gguf"):
                                q = _extract_quant_label(f.name)
                                cached_bytes_by_quant[q] = (
                                    cached_bytes_by_quant.get(q, 0) + f.stat().st_size
                                )
                    break
        except Exception:
            pass

        def _is_fully_downloaded(variant) -> bool:
            cached = cached_bytes_by_quant.get(variant.quant, 0)
            if cached == 0 or variant.size_bytes == 0:
                return False
            # Allow small rounding tolerance (symlinks vs real sizes)
            return cached >= variant.size_bytes * 0.99

        return GgufVariantsResponse(
            repo_id = repo_id,
            variants = [
                GgufVariantDetail(
                    filename = v.filename,
                    quant = v.quant,
                    size_bytes = v.size_bytes,
                    downloaded = _is_fully_downloaded(v),
                )
                for v in variants
            ],
            has_vision = has_vision,
            default_variant = default_variant,
        )

    except Exception as e:
        logger.error(f"Error listing GGUF variants for '{repo_id}': {e}", exc_info = True)
        raise HTTPException(
            status_code = 500,
            detail = f"Failed to list GGUF variants: {str(e)}",
        )


@router.get("/gguf-download-progress")
async def get_gguf_download_progress(
    repo_id: str = Query(..., description = "HuggingFace repo ID"),
    variant: str = Query("", description = "Quantization variant (e.g. UD-TQ1_0)"),
    expected_bytes: int = Query(0, description = "Expected total download size in bytes"),
    current_subject: str = Depends(get_current_subject),
):
    """Return download progress by checking cached GGUF files for a specific variant.

    Tracks completed shard downloads in snapshots and in-progress downloads
    in the blobs directory (incomplete files).
    """
    try:
        if not _is_valid_repo_id(repo_id):
            return {
                "downloaded_bytes": 0,
                "expected_bytes": expected_bytes,
                "progress": 0,
            }

        from huggingface_hub import constants as hf_constants

        cache_dir = Path(hf_constants.HF_HUB_CACHE)
        target = f"models--{repo_id.replace('/', '--')}".lower()
        variant_lower = variant.lower().replace("-", "").replace("_", "")
        downloaded_bytes = 0
        in_progress_bytes = 0
        for entry in cache_dir.iterdir():
            if entry.name.lower() == target:
                # Count completed .gguf files matching this variant in snapshots
                for f in entry.rglob("*.gguf"):
                    fname = f.name.lower().replace("-", "").replace("_", "")
                    if not variant_lower or variant_lower in fname:
                        downloaded_bytes += f.stat().st_size
                # Check blobs for in-progress downloads (.incomplete files)
                blobs_dir = entry / "blobs"
                if blobs_dir.is_dir():
                    for f in blobs_dir.iterdir():
                        if f.is_file() and f.name.endswith(".incomplete"):
                            in_progress_bytes += f.stat().st_size
                break

        total_progress_bytes = downloaded_bytes + in_progress_bytes
        progress = (
            min(total_progress_bytes / expected_bytes, 0.99)
            if expected_bytes > 0
            else 0
        )
        # Only report 1.0 when all bytes are in completed files (not in-progress)
        if expected_bytes > 0 and downloaded_bytes >= expected_bytes:
            progress = 1.0
        return {
            "downloaded_bytes": total_progress_bytes,
            "expected_bytes": expected_bytes,
            "progress": round(progress, 3),
        }
    except Exception:
        return {"downloaded_bytes": 0, "expected_bytes": expected_bytes, "progress": 0}


@router.get("/download-progress")
async def get_download_progress(
    repo_id: str = Query(..., description = "HuggingFace repo ID"),
    current_subject: str = Depends(get_current_subject),
):
    """Return download progress for any HuggingFace model repo.

    Checks the local HF cache for completed blobs and in-progress
    (.incomplete) downloads. Uses the HF API to determine the expected
    total size on the first call, then caches it for subsequent polls.
    """
    _empty = {"downloaded_bytes": 0, "expected_bytes": 0, "progress": 0}
    try:
        if not _is_valid_repo_id(repo_id):
            return _empty

        from huggingface_hub import constants as hf_constants

        cache_dir = Path(hf_constants.HF_HUB_CACHE)
        target = f"models--{repo_id.replace('/', '--')}".lower()
        completed_bytes = 0
        in_progress_bytes = 0

        for entry in cache_dir.iterdir():
            if entry.name.lower() != target:
                continue
            blobs_dir = entry / "blobs"
            if not blobs_dir.is_dir():
                break
            for f in blobs_dir.iterdir():
                if not f.is_file():
                    continue
                if f.name.endswith(".incomplete"):
                    in_progress_bytes += f.stat().st_size
                else:
                    completed_bytes += f.stat().st_size
            break

        downloaded_bytes = completed_bytes + in_progress_bytes
        if downloaded_bytes == 0:
            return _empty

        # Get expected size from HF API (cached per repo_id)
        expected_bytes = _get_repo_size_cached(repo_id)
        if expected_bytes <= 0:
            # Cannot determine total; report bytes only, no percentage
            return {
                "downloaded_bytes": downloaded_bytes,
                "expected_bytes": 0,
                "progress": 0,
            }

        # Use 95% threshold for completion (blob deduplication can make
        # completed_bytes differ slightly from expected_bytes).
        # Do NOT use "no .incomplete files" as a completion signal --
        # HF downloads files sequentially, so between files there are
        # no .incomplete files even though the download is far from done.
        if completed_bytes >= expected_bytes * 0.95:
            progress = 1.0
        else:
            progress = min(downloaded_bytes / expected_bytes, 0.99)
        return {
            "downloaded_bytes": downloaded_bytes,
            "expected_bytes": expected_bytes,
            "progress": round(progress, 3),
        }
    except Exception as e:
        logger.warning(f"Error checking download progress for {repo_id}: {e}")
        return _empty


_repo_size_cache: dict[str, int] = {}


def _get_repo_size_cached(repo_id: str) -> int:
    if repo_id in _repo_size_cache:
        return _repo_size_cache[repo_id]
    try:
        from huggingface_hub import model_info as hf_model_info

        info = hf_model_info(repo_id, token = None, files_metadata = True)
        total = sum(s.size for s in info.siblings if s.size)
        _repo_size_cache[repo_id] = total
        return total
    except Exception as e:
        logger.warning(f"Failed to get repo size for {repo_id}: {e}")
        return 0


@router.get("/cached-gguf")
async def list_cached_gguf(
    current_subject: str = Depends(get_current_subject),
):
    """List GGUF repos that have already been downloaded to the HF cache.

    Uses scan_cache_dir() for proper repo IDs, then deduplicates by
    lowercased key (HF cache dirs are lowercased but the canonical repo
    ID preserves casing).
    """
    try:
        from huggingface_hub import scan_cache_dir

        hf_cache = scan_cache_dir()
        seen_lower: dict[str, dict] = {}
        for repo_info in hf_cache.repos:
            if repo_info.repo_type != "model":
                continue
            repo_id = repo_info.repo_id
            if not repo_id.upper().endswith("-GGUF"):
                continue
            # Check for actual .gguf files and sum sizes
            total_size = 0
            has_gguf = False
            for revision in repo_info.revisions:
                for f in revision.files:
                    if f.file_name.endswith(".gguf"):
                        has_gguf = True
                        total_size += f.size_on_disk
            if not has_gguf:
                continue
            # Deduplicate: keep the entry with the most data
            key = repo_id.lower()
            existing = seen_lower.get(key)
            if existing is None or total_size > existing["size_bytes"]:
                seen_lower[key] = {
                    "repo_id": repo_id,
                    "size_bytes": total_size,
                    "cache_path": str(repo_info.repo_path),
                }
        cached = sorted(seen_lower.values(), key = lambda c: c["repo_id"])
        return {"cached": cached}
    except Exception as e:
        logger.error(f"Error listing cached GGUF repos: {e}", exc_info = True)
        return {"cached": []}


@router.get("/cached-models")
async def list_cached_models(
    current_subject: str = Depends(get_current_subject),
):
    """List non-GGUF model repos that have been downloaded to the HF cache.

    Only includes repos that actually contain model weight files
    (.safetensors, .bin), not repos with only config/metadata.
    """
    _WEIGHT_EXTENSIONS = (".safetensors", ".bin")

    try:
        from huggingface_hub import scan_cache_dir

        hf_cache = scan_cache_dir()
        seen_lower: dict[str, dict] = {}
        for repo_info in hf_cache.repos:
            if repo_info.repo_type != "model":
                continue
            repo_id = repo_info.repo_id
            if repo_id.upper().endswith("-GGUF"):
                continue
            total_size = sum(
                f.size_on_disk for rev in repo_info.revisions for f in rev.files
            )
            if total_size == 0:
                continue
            # Skip repos that only have config/metadata files (no weights)
            has_weights = any(
                f.file_name.endswith(_WEIGHT_EXTENSIONS)
                for rev in repo_info.revisions
                for f in rev.files
            )
            if not has_weights:
                continue
            key = repo_id.lower()
            existing = seen_lower.get(key)
            if existing is None or total_size > existing["size_bytes"]:
                seen_lower[key] = {
                    "repo_id": repo_id,
                    "size_bytes": total_size,
                }
        cached = sorted(seen_lower.values(), key = lambda c: c["repo_id"])
        return {"cached": cached}
    except Exception as e:
        logger.error(f"Error listing cached models: {e}", exc_info = True)
        return {"cached": []}


@router.delete("/delete-cached")
async def delete_cached_model(
    repo_id: str = Body(...),
    variant: Optional[str] = Body(None),
    current_subject: str = Depends(get_current_subject),
):
    """Delete a cached model repo (or a specific GGUF variant) from the HF cache.

    When *variant* is provided, only the GGUF files matching that quant label
    are removed (e.g. ``UD-Q4_K_XL``).  Otherwise the entire repo is deleted.
    Refuses if the model is currently loaded for inference.
    """
    if not _is_valid_repo_id(repo_id):
        raise HTTPException(status_code = 400, detail = "Invalid repo_id format")

    # Check if model is currently loaded
    try:
        from routes.inference import get_llama_cpp_backend

        llama_backend = get_llama_cpp_backend()
        if llama_backend.is_loaded and llama_backend.model_identifier:
            loaded_id = llama_backend.model_identifier.lower()
            if loaded_id == repo_id.lower() or loaded_id.startswith(repo_id.lower()):
                raise HTTPException(
                    status_code = 400,
                    detail = "Unload the model before deleting",
                )
    except HTTPException:
        raise
    except Exception:
        pass

    try:
        inference_backend = get_inference_backend()
        if inference_backend.active_model_name:
            active = inference_backend.active_model_name.lower()
            if active == repo_id.lower() or active.startswith(repo_id.lower()):
                raise HTTPException(
                    status_code = 400,
                    detail = "Unload the model before deleting",
                )
    except HTTPException:
        raise
    except Exception:
        pass

    try:
        from huggingface_hub import scan_cache_dir

        hf_cache = scan_cache_dir()
        target_repo = None
        for repo_info in hf_cache.repos:
            if repo_info.repo_type != "model":
                continue
            if repo_info.repo_id.lower() == repo_id.lower():
                target_repo = repo_info
                break

        if target_repo is None:
            raise HTTPException(status_code = 404, detail = "Model not found in cache")

        # ── Per-variant GGUF deletion ────────────────────────────
        if variant:
            deleted_bytes = 0
            deleted_count = 0
            for rev in target_repo.revisions:
                for f in rev.files:
                    if not f.file_name.endswith(".gguf"):
                        continue
                    quant = _extract_quant_label(f.file_name)
                    if quant.lower() != variant.lower():
                        continue
                    # Delete the blob (actual data) and the snapshot symlink
                    try:
                        blob = Path(f.blob_path)
                        snap = Path(f.file_path)
                        size = blob.stat().st_size if blob.exists() else 0
                        if snap.exists() or snap.is_symlink():
                            snap.unlink()
                        if blob.exists():
                            blob.unlink()
                        deleted_bytes += size
                        deleted_count += 1
                    except Exception as e:
                        logger.warning(f"Failed to delete {f.file_name}: {e}")

            if deleted_count == 0:
                raise HTTPException(
                    status_code = 404,
                    detail = f"Variant {variant} not found in cache for {repo_id}",
                )

            freed_mb = deleted_bytes / (1024 * 1024)
            logger.info(
                f"Deleted {deleted_count} file(s) for {repo_id} variant {variant}: "
                f"{freed_mb:.1f} MB freed"
            )
            return {"status": "deleted", "repo_id": repo_id, "variant": variant}

        # ── Full repo deletion ───────────────────────────────────
        revision_hashes = [rev.commit_hash for rev in target_repo.revisions]
        if not revision_hashes:
            raise HTTPException(status_code = 404, detail = "No revisions found for model")

        delete_strategy = hf_cache.delete_revisions(*revision_hashes)
        logger.info(
            f"Deleting cached model {repo_id}: "
            f"{delete_strategy.expected_freed_size_str} will be freed"
        )
        delete_strategy.execute()

        return {"status": "deleted", "repo_id": repo_id}

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error deleting cached model {repo_id}: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500,
            detail = f"Failed to delete cached model: {str(e)}",
        )


@router.get("/checkpoints", response_model = CheckpointListResponse)
async def list_checkpoints(
    outputs_dir: str = Query(
        default = str(outputs_root()),
        description = "Directory to scan for checkpoints",
    ),
    current_subject: str = Depends(get_current_subject),
):
    """
    List available checkpoints in the outputs directory.

    Scans the outputs folder for training runs and their checkpoints.
    """
    try:
        resolved_outputs_dir = str(resolve_output_dir(outputs_dir))
        raw_models = scan_checkpoints(outputs_dir = resolved_outputs_dir)

        models = [
            ModelCheckpoints(
                name = model_name,
                checkpoints = [
                    CheckpointInfo(display_name = display_name, path = path, loss = loss)
                    for display_name, path, loss in checkpoints
                ],
                base_model = metadata.get("base_model"),
                peft_type = metadata.get("peft_type"),
                lora_rank = metadata.get("lora_rank"),
            )
            for model_name, checkpoints, metadata in raw_models
        ]

        return CheckpointListResponse(
            outputs_dir = resolved_outputs_dir,
            models = models,
        )
    except Exception as e:
        logger.error(f"Error listing checkpoints: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500,
            detail = f"Failed to list checkpoints: {str(e)}",
        )


================================================
FILE: studio/backend/routes/training.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Training API routes
"""

import sys
from pathlib import Path
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import StreamingResponse
from typing import Dict, Optional, Any
import structlog
from loggers import get_logger
import asyncio
from datetime import datetime

# Add backend directory to path
# The backend code should be in the same directory structure
backend_path = Path(__file__).parent.parent.parent
if str(backend_path) not in sys.path:
    sys.path.insert(0, str(backend_path))

# Import backend functions
try:
    from core.training import get_training_backend
    from utils.models.model_config import load_model_defaults
    from utils.paths import resolve_dataset_path
except ImportError:
    # Fallback: try to import from parent directory
    parent_backend = backend_path.parent / "backend"
    if str(parent_backend) not in sys.path:
        sys.path.insert(0, str(parent_backend))
    from core.training import get_training_backend
    from utils.models.model_config import load_model_defaults
    from utils.paths import resolve_dataset_path

# Auth
from auth.authentication import get_current_subject

from models import (
    TrainingStartRequest,
    TrainingJobResponse,
    TrainingStatus,
    TrainingProgress,
)
from models.responses import TrainingStopResponse, TrainingMetricsResponse
from pydantic import BaseModel as PydanticBaseModel


class TrainingStopRequest(PydanticBaseModel):
    save: bool = True


router = APIRouter()
logger = get_logger(__name__)


def _validate_local_dataset_paths(
    paths: list[str], label: str = "Local dataset"
) -> list[str]:
    """Resolve and validate a list of local dataset paths. Returns validated absolute paths."""
    validated = []
    missing = []
    for dataset_path in paths:
        dataset_file = resolve_dataset_path(dataset_path)
        if not dataset_file.exists():
            missing.append(f"{dataset_path} (resolved: {dataset_file})")
            continue
        logger.info(f"Found {label.lower()} file: {dataset_file}")
        validated.append(str(dataset_file))

    if missing:
        missing_detail = "; ".join(missing[:3])
        raise HTTPException(
            status_code = 400,
            detail = f"{label} not found: {missing_detail}",
        )
    return validated


@router.get("/hardware")
async def get_hardware_utilization(
    current_subject: str = Depends(get_current_subject),
):
    """
    Get a live snapshot of GPU hardware utilization.

    Designed to be polled by the frontend during training.
    Returns GPU utilization %, temperature, VRAM usage, and power draw
    via nvidia-smi for maximum accuracy.
    """
    from utils.hardware import get_gpu_utilization

    return get_gpu_utilization()


@router.post("/start")
async def start_training(
    request: TrainingStartRequest,
    current_subject: str = Depends(get_current_subject),
):
    """
    Start a training job.

    This endpoint initiates training in the background and returns immediately.
    Use the /status endpoint to check training progress.
    """
    try:
        logger.info(f"Starting training job with model: {request.model_name}")

        # NOTE: No in-process ensure_transformers_version() call here.
        # The subprocess (worker.py) activates the correct version in a
        # fresh Python interpreter before importing any ML libraries.

        backend = get_training_backend()

        # Generate job ID and attach to backend for later status/progress calls
        job_id = f"job_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
        backend.current_job_id = job_id

        # Check if training is already active
        if backend.is_training_active():
            existing_job_id: Optional[str] = getattr(backend, "current_job_id", "")
            return TrainingJobResponse(
                job_id = existing_job_id or job_id,
                status = "error",
                message = (
                    "Training is already in progress. "
                    "Stop current training before starting a new one."
                ),
                error = "Training already active",
            )

        # Validate dataset paths if provided
        if request.local_datasets:
            request.local_datasets = _validate_local_dataset_paths(
                request.local_datasets, "Local dataset"
            )
        if request.local_eval_datasets and request.eval_steps > 0:
            request.local_eval_datasets = _validate_local_dataset_paths(
                request.local_eval_datasets, "Local eval dataset"
            )

        # Convert request to kwargs for backend
        training_kwargs = {
            "model_name": request.model_name,
            "training_type": request.training_type,
            "hf_token": request.hf_token or "",
            "load_in_4bit": request.load_in_4bit,
            "max_seq_length": request.max_seq_length,
            "hf_dataset": request.hf_dataset or "",
            "local_datasets": request.local_datasets,
            "local_eval_datasets": request.local_eval_datasets,
            "format_type": request.format_type,
            "subset": request.subset,
            "train_split": request.train_split,
            "eval_split": request.eval_split,
            "eval_steps": request.eval_steps,
            "dataset_slice_start": request.dataset_slice_start,
            "dataset_slice_end": request.dataset_slice_end,
            "custom_format_mapping": request.custom_format_mapping,
            "num_epochs": request.num_epochs,
            "learning_rate": request.learning_rate,
            "batch_size": request.batch_size,
            "gradient_accumulation_steps": request.gradient_accumulation_steps,
            "warmup_steps": request.warmup_steps,
            "warmup_ratio": request.warmup_ratio,
            "max_steps": request.max_steps,
            "save_steps": request.save_steps,
            "weight_decay": request.weight_decay,
            "random_seed": request.random_seed,
            "packing": request.packing,
            "optim": request.optim,
            "lr_scheduler_type": request.lr_scheduler_type,
            "use_lora": request.use_lora,
            "lora_r": request.lora_r,
            "lora_alpha": request.lora_alpha,
            "lora_dropout": request.lora_dropout,
            "target_modules": request.target_modules
            if request.target_modules
            else None,
            "gradient_checkpointing": request.gradient_checkpointing.strip()
            if request.gradient_checkpointing and request.gradient_checkpointing.strip()
            else "unsloth",
            "use_rslora": request.use_rslora,
            "use_loftq": request.use_loftq,
            "train_on_completions": request.train_on_completions,
            "finetune_vision_layers": request.finetune_vision_layers,
            "finetune_language_layers": request.finetune_language_layers,
            "finetune_attention_modules": request.finetune_attention_modules,
            "finetune_mlp_modules": request.finetune_mlp_modules,
            "is_dataset_image": request.is_dataset_image,
            "is_dataset_audio": request.is_dataset_audio,
            "is_embedding": request.is_embedding,
            "enable_wandb": request.enable_wandb,
            "wandb_token": request.wandb_token or "",
            "wandb_project": request.wandb_project or "",
            "enable_tensorboard": request.enable_tensorboard,
            "tensorboard_dir": request.tensorboard_dir or "",
            "trust_remote_code": request.trust_remote_code,
        }

        # Training page has no trust_remote_code toggle — the value comes from
        # YAML model defaults applied when the user selects a model.  As a safety
        # net, consult the YAML directly so models that need it always get it.
        if not training_kwargs["trust_remote_code"]:
            model_defaults = load_model_defaults(request.model_name)
            yaml_trust = model_defaults.get("training", {}).get(
                "trust_remote_code", False
            )
            if yaml_trust:
                logger.info(
                    f"YAML config sets trust_remote_code=True for {request.model_name}"
                )
                training_kwargs["trust_remote_code"] = True

        # Free GPU memory: shut down any running inference/export subprocesses
        # before training starts (they'd compete for VRAM otherwise)
        try:
            from core.inference import get_inference_backend

            inf_backend = get_inference_backend()
            if inf_backend.active_model_name:
                logger.info(
                    "Unloading inference model '%s' to free GPU memory for training",
                    inf_backend.active_model_name,
                )
                inf_backend._shutdown_subprocess()
                inf_backend.active_model_name = None
                inf_backend.models.clear()
        except Exception as e:
            logger.warning("Could not unload inference model: %s", e)

        try:
            from core.export import get_export_backend

            exp_backend = get_export_backend()
            if exp_backend.current_checkpoint:
                logger.info(
                    "Shutting down export subprocess to free GPU memory for training"
                )
                exp_backend._shutdown_subprocess()
                exp_backend.current_checkpoint = None
                exp_backend.is_vision = False
                exp_backend.is_peft = False
        except Exception as e:
            logger.warning("Could not shut down export subprocess: %s", e)

        # start_training now spawns a subprocess (non-blocking)
        success = backend.start_training(**training_kwargs)

        if not success:
            progress_error = backend.trainer.training_progress.error
            return TrainingJobResponse(
                job_id = job_id,
                status = "error",
                message = progress_error or "Failed to start training subprocess",
                error = progress_error or "subprocess_start_failed",
            )

        return TrainingJobResponse(
            job_id = job_id,
            status = "queued",
            message = "Training job queued and starting in subprocess",
            error = None,
        )

    except Exception as e:
        logger.error(f"Error starting training: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500,
            detail = f"Failed to start training: {str(e)}",
        )


@router.post("/stop", response_model = TrainingStopResponse)
async def stop_training(
    body: TrainingStopRequest = TrainingStopRequest(),
    current_subject: str = Depends(get_current_subject),
):
    """
    Stop the currently running training job.

    Body:
        save (bool): If True (default), save the model at the current checkpoint.
    """
    try:
        backend = get_training_backend()
        is_active = backend.is_training_active()
        logger.info("Stop requested: save=%s is_active=%s", body.save, is_active)

        if not is_active:
            return TrainingStopResponse(
                status = "idle", message = "No training job is currently running"
            )

        # Call backend stop method
        backend.stop_training(save = body.save)

        return TrainingStopResponse(
            status = "stopped",
            message = "Stop requested. Training will stop at the next safe step.",
        )

    except Exception as e:
        logger.error(f"Error stopping training: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500, detail = f"Failed to stop training: {str(e)}"
        )


@router.post("/reset")
async def reset_training(
    current_subject: str = Depends(get_current_subject),
):
    """
    Reset training state so the user can return to configuration.
    """
    try:
        backend = get_training_backend()
        is_active = backend.is_training_active()

        if is_active:
            if backend._cancel_requested:
                # Cancel (save=False) was requested — force-terminate so we can reset immediately
                logger.info(
                    "Force-terminating subprocess for immediate reset (cancel path)"
                )
                backend.force_terminate()
            else:
                logger.warning(
                    "Rejected reset while training active: is_active=%s", is_active
                )
                raise HTTPException(
                    status_code = 409,
                    detail = "Training is still running. Stop training and wait for it to finish before resetting.",
                )

        logger.info("Reset training state: clearing runtime + metric history")
        backend._should_stop = False  # Clear stop flag so status returns to idle
        backend.trainer._update_progress(
            is_training = False,
            is_completed = False,
            error = None,
            status_message = "Ready to train",
            step = 0,
            loss = 0.0,
            epoch = 0,
            total_steps = 0,
        )
        backend.loss_history = []
        backend.lr_history = []
        backend.step_history = []
        backend.grad_norm_history = []
        backend.grad_norm_step_history = []
        return {"status": "ok"}
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error resetting training: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500,
            detail = f"Failed to reset training: {str(e)}",
        )


@router.get("/status")
async def get_training_status(
    current_subject: str = Depends(get_current_subject),
):
    """
    Get the current training status.
    """
    try:
        backend = get_training_backend()
        job_id: str = getattr(backend, "current_job_id", "") or ""

        # Check if training is active
        is_active = backend.is_training_active()

        # Get progress info from trainer
        try:
            progress = backend.trainer.get_training_progress()
        except Exception:
            progress = None

        status_message = (
            getattr(progress, "status_message", None) if progress else None
        ) or "Ready to train"
        error_message = getattr(progress, "error", None) if progress else None

        # Check if training was stopped by user
        trainer_stopped = getattr(backend, "_should_stop", False)

        # Derive high-level phase
        if error_message:
            phase = "error"
        elif is_active:
            msg_lower = status_message.lower()
            if "loading" in msg_lower or "importing" in msg_lower:
                phase = "loading_model"
            elif any(
                k in msg_lower for k in ["preparing", "initializing", "configuring"]
            ):
                phase = "configuring"
            else:
                phase = "training"
        elif trainer_stopped:
            phase = "stopped"
        elif progress and getattr(progress, "is_completed", False):
            phase = "completed"
        else:
            phase = "idle"

        details = None
        if progress:
            details = {
                "epoch": getattr(progress, "epoch", 0),
                "step": getattr(progress, "step", 0),
                "total_steps": getattr(progress, "total_steps", 0),
                "loss": getattr(progress, "loss", 0.0),
                "learning_rate": getattr(progress, "learning_rate", 0.0),
            }

        # Build metric history for chart recovery after SSE reconnection
        metric_history = None
        if backend.step_history:
            metric_history = {
                "steps": list(backend.step_history),
                "loss": list(backend.loss_history),
                "lr": list(backend.lr_history),
                "grad_norm": list(getattr(backend, "grad_norm_history", [])),
                "grad_norm_steps": list(getattr(backend, "grad_norm_step_history", [])),
                "eval_loss": list(backend.eval_loss_history),
                "eval_steps": list(backend.eval_step_history),
            }

        return TrainingStatus(
            job_id = job_id,
            phase = phase,
            is_training_running = is_active,
            eval_enabled = backend.eval_enabled,
            message = status_message,
            error = error_message,
            details = details,
            metric_history = metric_history,
        )

    except Exception as e:
        logger.error(f"Error getting training status: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500, detail = f"Failed to get training status: {str(e)}"
        )


@router.get("/metrics", response_model = TrainingMetricsResponse)
async def get_training_metrics(
    current_subject: str = Depends(get_current_subject),
):
    """
    Get training metrics (loss, learning rate, steps).
    """
    try:
        backend = get_training_backend()

        # Get metrics from backend
        loss_history = backend.loss_history
        lr_history = backend.lr_history
        step_history = backend.step_history
        grad_norm_history = getattr(backend, "grad_norm_history", [])
        grad_norm_step_history = getattr(backend, "grad_norm_step_history", [])

        # Get current values
        current_loss = loss_history[-1] if loss_history else None
        current_lr = lr_history[-1] if lr_history else None
        current_step = step_history[-1] if step_history else None

        return TrainingMetricsResponse(
            loss_history = loss_history,
            lr_history = lr_history,
            step_history = step_history,
            grad_norm_history = grad_norm_history,
            grad_norm_step_history = grad_norm_step_history,
            current_loss = current_loss,
            current_lr = current_lr,
            current_step = current_step,
        )

    except Exception as e:
        logger.error(f"Error getting training metrics: {e}", exc_info = True)
        raise HTTPException(
            status_code = 500, detail = f"Failed to get training metrics: {str(e)}"
        )


@router.get("/progress")
async def stream_training_progress(
    request: Request,
    current_subject: str = Depends(get_current_subject),
):
    """
    Stream training progress updates using Server-Sent Events (SSE).

    This endpoint provides real-time updates on training progress.
    Supports reconnection via the SSE spec:
      - Sends `id:` with each event so the browser tracks position.
      - Sends `retry:` to control reconnection interval.
      - Sends named `event:` types (progress, heartbeat, complete, error).
      - Reads `Last-Event-ID` header on reconnect to replay missed steps.
    """
    # Read Last-Event-ID header for reconnection resume
    last_event_id = request.headers.get("last-event-id")
    resume_from_step: Optional[int] = None
    if last_event_id is not None:
        try:
            resume_from_step = int(last_event_id)
            logger.info(f"SSE reconnect: resuming from step {resume_from_step}")
        except ValueError:
            logger.warning(f"Invalid Last-Event-ID: {last_event_id}")

    async def event_generator():
        backend = get_training_backend()
        job_id: str = getattr(backend, "current_job_id", "") or ""

        # ── Helpers ──────────────────────────────────────────────
        def build_progress(
            step: int,
            loss: float,
            learning_rate: float,
            total_steps: int,
            epoch: Optional[float] = None,
            progress: Optional[Any] = None,
            grad_norm_override: Optional[float] = None,
            eval_loss_override: Optional[float] = None,
        ) -> TrainingProgress:
            total = max(total_steps, 0)
            if step < 0 or total == 0:
                progress_percent = 0.0
            else:
                progress_percent = (
                    float(step) / float(total) * 100.0 if total > 0 else 0.0
                )

            # Get actual values from progress object if available
            elapsed_seconds = (
                getattr(progress, "elapsed_seconds", None) if progress else None
            )
            eta_seconds = getattr(progress, "eta_seconds", None) if progress else None
            grad_norm = grad_norm_override
            if grad_norm is None and progress:
                grad_norm = getattr(progress, "grad_norm", None)
            num_tokens = getattr(progress, "num_tokens", None) if progress else None
            eval_loss = eval_loss_override
            if eval_loss is None and progress:
                eval_loss = getattr(progress, "eval_loss", None)

            return TrainingProgress(
                job_id = job_id,
                step = step,
                total_steps = total,
                loss = loss,
                learning_rate = learning_rate,
                progress_percent = progress_percent,
                epoch = epoch,
                elapsed_seconds = elapsed_seconds,
                eta_seconds = eta_seconds,
                grad_norm = grad_norm,
                num_tokens = num_tokens,
                eval_loss = eval_loss,
            )

        def format_sse(
            data: str,
            event: str = "progress",
            event_id: Optional[int] = None,
        ) -> str:
            """Format a single SSE message with id/event/data fields."""
            lines = []
            if event_id is not None:
                lines.append(f"id: {event_id}")
            lines.append(f"event: {event}")
            lines.append(f"data: {data}")
            lines.append("")  # trailing blank line
            lines.append("")  # double newline terminates the event
            return "\n".join(lines)

        # ── Retry directive ──────────────────────────────────────
        # Tell the browser to reconnect after 3 seconds if the connection drops
        yield "retry: 3000\n\n"

        # ── Replay missed steps on reconnect ─────────────────────
        if resume_from_step is not None and backend.step_history:
            replayed = 0
            grad_norm_by_step = {
                step_val: grad_val
                for step_val, grad_val in zip(
                    getattr(backend, "grad_norm_step_history", []),
                    getattr(backend, "grad_norm_history", []),
                )
            }
            for i, step_val in enumerate(backend.step_history):
                if step_val > resume_from_step:
                    loss_val = (
                        backend.loss_history[i]
                        if i < len(backend.loss_history)
                        else 0.0
                    )
                    lr_val = (
                        backend.lr_history[i] if i < len(backend.lr_history) else 0.0
                    )
                    tp_replay = getattr(
                        getattr(backend, "trainer", None), "training_progress", None
                    )
                    total_replay = (
                        getattr(tp_replay, "total_steps", step_val)
                        if tp_replay
                        else step_val
                    )
                    epoch_replay = (
                        getattr(tp_replay, "epoch", None) if tp_replay else None
                    )
                    payload = build_progress(
                        step_val,
                        loss_val,
                        lr_val,
                        total_replay,
                        epoch_replay,
                        progress = tp_replay,
                        grad_norm_override = grad_norm_by_step.get(step_val),
                    )
                    yield format_sse(
                        payload.model_dump_json(), event = "progress", event_id = step_val
                    )
                    replayed += 1
            if replayed:
                logger.info(f"SSE reconnect: replayed {replayed} missed steps")

        # ── Initial status (only on fresh connections) ───────────
        if resume_from_step is None:
            is_active = backend.is_training_active()
            tp = getattr(getattr(backend, "trainer", None), "training_progress", None)
            initial_total_steps = getattr(tp, "total_steps", 0) if tp else 0
            initial_epoch = getattr(tp, "epoch", None) if tp else None

            initial_progress = build_progress(
                step = 0,
                loss = 0.0,
                learning_rate = 0.0,
                total_steps = initial_total_steps,
                epoch = initial_epoch,
                progress = tp,
            )
            yield format_sse(
                initial_progress.model_dump_json(), event = "progress", event_id = 0
            )

            # If not active, send final state and exit
            if not is_active:
                if backend.step_history:
                    final_step = backend.step_history[-1]
                    final_loss = (
                        backend.loss_history[-1] if backend.loss_history else 0.0
                    )
                    final_lr = backend.lr_history[-1] if backend.lr_history else 0.0
                    final_total_steps = (
                        getattr(tp, "total_steps", final_step) if tp else final_step
                    )
                    final_epoch = getattr(tp, "epoch", None) if tp else None
                    payload = build_progress(
                        final_step,
                        final_loss,
                        final_lr,
                        final_total_steps,
                        final_epoch,
                        progress = tp,
                    )
                    yield format_sse(
                        payload.model_dump_json(), event = "complete", event_id = final_step
                    )
                else:
                    yield format_sse(
                        build_progress(-1, 0.0, 0.0, 0, progress = tp).model_dump_json(),
                        event = "complete",
                        event_id = 0,
                    )
                return

        # ── Live polling loop ────────────────────────────────────
        last_step = resume_from_step if resume_from_step is not None else -1
        no_update_count = 0
        max_no_updates = (
            1800  # Timeout after 30 minutes (large models need time for compilation)
        )

        while backend.is_training_active():
            try:
                if backend.step_history:
                    current_step = backend.step_history[-1]
                    current_loss = (
                        backend.loss_history[-1] if backend.loss_history else 0.0
                    )
                    current_lr = backend.lr_history[-1] if backend.lr_history else 0.0
                    tp_inner = getattr(
                        getattr(backend, "trainer", None), "training_progress", None
                    )
                    current_total_steps = (
                        getattr(tp_inner, "total_steps", current_step)
                        if tp_inner
                        else current_step
                    )
                    current_epoch = (
                        getattr(tp_inner, "epoch", None) if tp_inner else None
                    )

                    # Only send if step changed
                    if current_step != last_step:
                        progress_payload = build_progress(
                            current_step,
                            current_loss,
                            current_lr,
                            current_total_steps,
                            current_epoch,
                            progress = tp_inner,
                        )
                        yield format_sse(
                            progress_payload.model_dump_json(),
                            event = "progress",
                            event_id = current_step,
                        )
                        last_step = current_step
                        no_update_count = 0
                    else:
                        no_update_count += 1
                        # Send heartbeat every 10 seconds
                        if no_update_count % 10 == 0:
                            heartbeat_payload = build_progress(
                                current_step,
                                current_loss,
                                current_lr,
                                current_total_steps,
                                current_epoch,
                                progress = tp_inner,
                            )
                            yield format_sse(
                                heartbeat_payload.model_dump_json(),
                                event = "heartbeat",
                                event_id = current_step,
                            )
                else:
                    # No steps yet, but training is active (model loading, etc.)
                    no_update_count += 1
                    if no_update_count % 5 == 0:
                        # Pull total_steps and status from trainer so
                        # the frontend can show "Tokenizing…" etc.
                        tp_prep = getattr(
                            getattr(backend, "trainer", None),
                            "training_progress",
                            None,
                        )
                        prep_total = (
                            getattr(tp_prep, "total_steps", 0) if tp_prep else 0
                        )
                        preparing_payload = build_progress(
                            0,
                            0.0,
                            0.0,
                            prep_total,
                            progress = tp_prep,
                        )
                        yield format_sse(
                            preparing_payload.model_dump_json(),
                            event = "heartbeat",
                            event_id = 0,
                        )

                # Timeout check
                if no_update_count > max_no_updates:
                    logger.warning("Progress stream timeout - no updates received")
                    tp_timeout = getattr(
                        getattr(backend, "trainer", None), "training_progress", None
                    )
                    timeout_payload = build_progress(
                        last_step, 0.0, 0.0, 0, progress = tp_timeout
                    )
                    yield format_sse(
                        timeout_payload.model_dump_json(),
                        event = "error",
                        event_id = last_step if last_step >= 0 else 0,
                    )
                    break

                await asyncio.sleep(1)  # Poll every second

            except Exception as e:
                logger.error(f"Error in progress stream: {e}", exc_info = True)
                tp_error = getattr(
                    getattr(backend, "trainer", None), "training_progress", None
                )
                error_payload = build_progress(0, 0.0, 0.0, 0, progress = tp_error)
                yield format_sse(
                    error_payload.model_dump_json(),
                    event = "error",
                    event_id = last_step if last_step >= 0 else 0,
                )
                break

        # ── Final "complete" event ───────────────────────────────
        final_step = backend.step_history[-1] if backend.step_history else last_step
        final_loss = backend.loss_history[-1] if backend.loss_history else 0.0
        final_lr = backend.lr_history[-1] if backend.lr_history else 0.0
        final_tp = getattr(getattr(backend, "trainer", None), "training_progress", None)
        final_total_steps = (
            getattr(final_tp, "total_steps", final_step) if final_tp else final_step
        )
        final_epoch = getattr(final_tp, "epoch", None) if final_tp else None
        final_payload = build_progress(
            final_step,
            final_loss,
            final_lr,
            final_total_steps,
            final_epoch,
            progress = final_tp,
        )
        yield format_sse(
            final_payload.model_dump_json(),
            event = "complete",
            event_id = final_step if final_step >= 0 else 0,
        )

    return StreamingResponse(
        event_generator(),
        media_type = "text/event-stream",
        headers = {
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "X-Accel-Buffering": "no",
        },
    )


================================================
FILE: studio/backend/run.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Run script for Unsloth UI Backend.
Works independently and can be moved to any directory.
"""

import os
import sys

# Suppress annoying C-level dependency warnings globally (e.g. SwigPyPacked)
os.environ["PYTHONWARNINGS"] = "ignore"

from pathlib import Path

# Add the backend directory to Python path
backend_dir = Path(__file__).parent
if str(backend_dir) not in sys.path:
    sys.path.insert(0, str(backend_dir))

from loggers import get_logger

logger = get_logger(__name__)


def _resolve_external_ip() -> str:
    """
    Resolve the machine's external IP address.

    Tries (in order):
    1. GCE metadata server (instant, works on Google Cloud VMs)
    2. ifconfig.me (works anywhere with internet)
    3. LAN IP via UDP socket trick (fallback)
    """
    import urllib.request
    import socket

    # 1. Try GCE metadata server (responds in <10ms on GCE, times out fast elsewhere)
    try:
        req = urllib.request.Request(
            "http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0/access-configs/0/external-ip",
            headers = {"Metadata-Flavor": "Google"},
        )
        with urllib.request.urlopen(req, timeout = 1) as resp:
            ip = resp.read().decode().strip()
            if ip:
                return ip
    except Exception:
        pass

    # 2. Try public IP service
    try:
        with urllib.request.urlopen("https://ifconfig.me", timeout = 3) as resp:
            ip = resp.read().decode().strip()
            if ip:
                return ip
    except Exception:
        pass

    # 3. Fallback: LAN IP via UDP socket trick
    try:
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.connect(("8.8.8.8", 80))
        ip = s.getsockname()[0]
        s.close()
        return ip
    except Exception:
        return "0.0.0.0"


def _is_port_free(host: str, port: int) -> bool:
    """Check if a port is available for binding."""
    import socket

    try:
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
            s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
            s.bind((host, port))
            return True
    except OSError:
        return False


def _find_free_port(host: str, start: int, max_attempts: int = 20) -> int:
    """Find a free port starting from `start`, trying up to max_attempts ports."""
    for offset in range(max_attempts):
        candidate = start + offset
        if _is_port_free(host, candidate):
            return candidate
    raise RuntimeError(
        f"Could not find a free port in range {start}-{start + max_attempts - 1}"
    )


def _graceful_shutdown(server = None):
    """Explicitly shut down all subprocess backends and the uvicorn server.

    Called from signal handlers to ensure child processes are cleaned up
    before the parent exits. This is critical on Windows where atexit
    handlers are unreliable after Ctrl+C.
    """
    logger.info("Graceful shutdown initiated — cleaning up subprocesses...")

    # 1. Shut down uvicorn server (releases the listening socket)
    if server is not None:
        server.should_exit = True

    # 2. Clean up inference subprocess (if instantiated)
    try:
        from core.inference.orchestrator import _inference_backend

        if _inference_backend is not None:
            _inference_backend._shutdown_subprocess(timeout = 5.0)
    except Exception as e:
        logger.warning("Error shutting down inference subprocess: %s", e)

    # 3. Clean up export subprocess (if instantiated)
    try:
        from core.export.orchestrator import _export_backend

        if _export_backend is not None:
            _export_backend._shutdown_subprocess(timeout = 5.0)
    except Exception as e:
        logger.warning("Error shutting down export subprocess: %s", e)

    # 4. Clean up training subprocess (if active)
    try:
        from core.training.training import _training_backend

        if _training_backend is not None:
            _training_backend.force_terminate()
    except Exception as e:
        logger.warning("Error shutting down training subprocess: %s", e)

    # 5. Kill llama-server subprocess (if loaded)
    try:
        from routes.inference import _llama_cpp_backend

        if _llama_cpp_backend is not None:
            _llama_cpp_backend._kill_process()
    except Exception as e:
        logger.warning("Error shutting down llama-server: %s", e)

    logger.info("All subprocesses cleaned up")


# The uvicorn server instance — set by run_server(), used by callers
# that need to tell the server to exit (e.g. signal handlers).
_server = None

# Shutdown event — used to wake the main loop on signal
_shutdown_event = None


def run_server(
    host: str = "0.0.0.0",
    port: int = 8888,
    frontend_path: Path = Path(__file__).resolve().parent.parent / "frontend" / "dist",
    silent: bool = False,
):
    """
    Start the FastAPI server.

    Args:
        host: Host to bind to
        port: Port to bind to (auto-increments if in use)
        frontend_path: Path to frontend build directory (optional)
        silent: Suppress startup messages

    Note:
        Signal handlers are NOT registered here so that embedders
        (e.g. Colab notebooks) keep their own interrupt semantics.
        Standalone callers should register handlers after calling this.
    """
    global _server, _shutdown_event

    import nest_asyncio

    nest_asyncio.apply()

    import asyncio
    from threading import Thread, Event
    import time
    import uvicorn

    from main import app, setup_frontend
    from utils.paths import ensure_studio_directories

    # Create all standard directories on startup
    ensure_studio_directories()

    # Auto-find free port if requested port is in use
    if not _is_port_free(host, port):
        original_port = port
        port = _find_free_port(host, port)
        if not silent:
            print(f"Port {original_port} is in use, using port {port} instead")

    # Setup frontend if path provided
    if frontend_path:
        if setup_frontend(app, frontend_path):
            if not silent:
                print(f"✅ Frontend loaded from {frontend_path}")
        else:
            if not silent:
                print(f"⚠️ Frontend not found at {frontend_path}")

    # Create the uvicorn server and expose it for signal handlers
    config = uvicorn.Config(
        app, host = host, port = port, log_level = "info", access_log = False
    )
    _server = uvicorn.Server(config)
    _shutdown_event = Event()

    # Run server in a daemon thread
    def _run():
        asyncio.run(_server.serve())

    thread = Thread(target = _run, daemon = True)
    thread.start()
    time.sleep(3)

    if not silent:
        display_host = _resolve_external_ip() if host == "0.0.0.0" else host

        print("")
        print("=" * 50)
        print(f"🦥 Open your web browser, and enter http://localhost:{port}")
        print("=" * 50)
        print("")
        print("=" * 50)
        print(f"🦥 Unsloth Studio is running on port {port}")
        print(f"   Local Access:          http://localhost:{port}")
        print(f"   Worldwide Web Address: http://{display_host}:{port}")
        print(f"   API:                   http://{display_host}:{port}/api")
        print(f"   Health:                http://{display_host}:{port}/api/health")
        print("=" * 50)

    return app


# For direct execution (also invoked by CLI via os.execvp / subprocess)
if __name__ == "__main__":
    import argparse
    import signal

    parser = argparse.ArgumentParser(description = "Run Unsloth UI Backend server")
    parser.add_argument("--host", default = "0.0.0.0", help = "Host to bind to")
    parser.add_argument("--port", type = int, default = 8888, help = "Port to bind to")
    parser.add_argument(
        "--frontend",
        type = str,
        default = Path(__file__).resolve().parent.parent / "frontend" / "dist",
        help = "Path to frontend build",
    )
    parser.add_argument("--silent", action = "store_true", help = "Suppress output")

    args = parser.parse_args()

    kwargs = dict(host = args.host, port = args.port, silent = args.silent)
    if args.frontend is not None:
        kwargs["frontend_path"] = Path(args.frontend)
    run_server(**kwargs)

    # ── Signal handler — ensures subprocess cleanup on Ctrl+C ────
    def _signal_handler(signum, frame):
        _graceful_shutdown(_server)
        _shutdown_event.set()

    signal.signal(signal.SIGINT, _signal_handler)
    signal.signal(signal.SIGTERM, _signal_handler)

    # On Windows, some terminals send SIGBREAK for Ctrl+C / Ctrl+Break
    if hasattr(signal, "SIGBREAK"):
        signal.signal(signal.SIGBREAK, _signal_handler)

    # Keep running until shutdown signal.
    # NOTE: Event.wait() without a timeout blocks at the C level on Linux,
    # which prevents Python from delivering SIGINT (Ctrl+C).  Using a
    # short timeout in a loop lets the interpreter process pending signals.
    while not _shutdown_event.is_set():
        _shutdown_event.wait(timeout = 1)


================================================
FILE: studio/backend/state/.gitkeep
================================================


================================================
FILE: studio/backend/state/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0


================================================
FILE: studio/backend/tests/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0


================================================
FILE: studio/backend/tests/conftest.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Shared pytest configuration for the backend test suite.
Ensures that the backend root is on sys.path so that
`import utils.utils` (and similar flat imports) resolve correctly.
"""

import sys
from pathlib import Path

# Add backend root to sys.path (mirrors how the app itself is launched)
_backend_root = Path(__file__).resolve().parent.parent
if str(_backend_root) not in sys.path:
    sys.path.insert(0, str(_backend_root))


================================================
FILE: studio/backend/tests/test_data_recipe_seed.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from pathlib import Path


def test_seed_inspect_load_kwargs_disables_remote_code_execution():
    seed_route = (
        Path(__file__).resolve().parent.parent / "routes" / "data_recipe" / "seed.py"
    ).read_text()

    assert '"trust_remote_code": False' in seed_route


================================================
FILE: studio/backend/tests/test_utils.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Tests for utils/hardware and utils/utils — device detection, GPU memory, error formatting.

These tests are designed to pass on ANY platform:
  • NVIDIA GPU  (CUDA backend, requires torch)
  • Apple Silicon (MLX backend, requires mlx)
  • CPU-only     (no GPU at all)

No ML framework is imported at the top level.
Tests that need torch/mlx internals for mocking are skipped when unavailable.

Run with:
    cd studio/backend
    python -m pytest tests/test_utils.py -v
"""

import platform
from unittest.mock import patch, MagicMock

import pytest

# --- Conditional framework imports ---
try:
    import torch

    HAS_TORCH = True
except ImportError:
    HAS_TORCH = False

try:
    import mlx.core as mx

    HAS_MLX = True
except ImportError:
    HAS_MLX = False

needs_torch = pytest.mark.skipif(not HAS_TORCH, reason = "PyTorch not installed")
needs_mlx = pytest.mark.skipif(not HAS_MLX, reason = "MLX not installed")

from utils.hardware import (
    get_device,
    detect_hardware,
    is_apple_silicon,
    clear_gpu_cache,
    get_gpu_memory_info,
    log_gpu_memory,
    DeviceType,
)
import utils.hardware.hardware as _hw_module
from utils.utils import format_error_message


# ========== Helpers ==========


def _actual_device() -> str:
    """Return the real device string for the current machine."""
    if HAS_TORCH and torch.cuda.is_available():
        return "cuda"
    if is_apple_silicon() and HAS_MLX:
        return "mlx"
    return "cpu"


def _reset_and_detect():
    """Reset the cached DEVICE global and re-run detection."""
    _hw_module.DEVICE = None
    return detect_hardware()


# ========== get_device() ==========


class TestGetDevice:
    """Tests for get_device() — should agree with the real hardware."""

    def setup_method(self):
        self._saved_device = _hw_module.DEVICE

    def teardown_method(self):
        _hw_module.DEVICE = self._saved_device

    def test_returns_valid_device_type(self):
        result = get_device()
        assert result in (DeviceType.CUDA, DeviceType.MLX, DeviceType.CPU)

    def test_matches_actual_hardware(self):
        assert get_device().value == _actual_device()

    # --- Mocked paths ---

    @needs_torch
    def test_returns_cuda_when_cuda_available(self):
        with (
            patch("utils.hardware.hardware._has_torch", return_value = True),
            patch("torch.cuda.is_available", return_value = True),
        ):
            assert _reset_and_detect() == DeviceType.CUDA

    @needs_mlx
    def test_returns_mlx_when_on_apple_silicon_with_mlx(self):
        with (
            patch("utils.hardware.hardware._has_torch", return_value = False),
            patch("utils.hardware.hardware.is_apple_silicon", return_value = True),
            patch("utils.hardware.hardware._has_mlx", return_value = True),
        ):
            assert _reset_and_detect() == DeviceType.MLX

    def test_returns_cpu_when_nothing_available(self):
        with (
            patch("utils.hardware.hardware._has_torch", return_value = False),
            patch("utils.hardware.hardware.is_apple_silicon", return_value = False),
            patch("utils.hardware.hardware._has_mlx", return_value = False),
        ):
            assert _reset_and_detect() == DeviceType.CPU


# ========== is_apple_silicon() ==========


class TestIsAppleSilicon:
    def test_returns_bool(self):
        assert isinstance(is_apple_silicon(), bool)

    def test_true_on_darwin_arm64(self):
        with patch("utils.hardware.hardware.platform") as mock_plat:
            mock_plat.system.return_value = "Darwin"
            mock_plat.machine.return_value = "arm64"
            assert is_apple_silicon() is True

    def test_false_on_linux_x86(self):
        with patch("utils.hardware.hardware.platform") as mock_plat:
            mock_plat.system.return_value = "Linux"
            mock_plat.machine.return_value = "x86_64"
            assert is_apple_silicon() is False

    def test_false_on_darwin_x86(self):
        """Intel Mac should return False."""
        with patch("utils.hardware.hardware.platform") as mock_plat:
            mock_plat.system.return_value = "Darwin"
            mock_plat.machine.return_value = "x86_64"
            assert is_apple_silicon() is False


# ========== clear_gpu_cache() ==========


class TestClearGpuCache:
    """clear_gpu_cache() must never raise, regardless of platform."""

    def test_does_not_raise(self):
        clear_gpu_cache()

    @needs_torch
    def test_calls_cuda_cache_when_cuda(self):
        with (
            patch("utils.hardware.hardware.get_device", return_value = DeviceType.CUDA),
            patch("torch.cuda.empty_cache") as mock_empty,
            patch("torch.cuda.ipc_collect") as mock_ipc,
        ):
            clear_gpu_cache()
            mock_empty.assert_called_once()
            mock_ipc.assert_called_once()

    @needs_mlx
    def test_mlx_does_not_raise(self):
        """MLX cache clear is a no-op — should just succeed."""
        with patch("utils.hardware.hardware.get_device", return_value = DeviceType.MLX):
            clear_gpu_cache()

    def test_noop_on_cpu(self):
        with patch("utils.hardware.hardware.get_device", return_value = DeviceType.CPU):
            clear_gpu_cache()


# ========== get_gpu_memory_info() ==========


class TestGetGpuMemoryInfo:
    def test_returns_dict(self):
        result = get_gpu_memory_info()
        assert isinstance(result, dict)

    def test_has_available_key(self):
        assert "available" in get_gpu_memory_info()

    def test_has_backend_key(self):
        assert "backend" in get_gpu_memory_info()

    def test_backend_matches_device(self):
        result = get_gpu_memory_info()
        assert result["backend"] == get_device().value

    # --- When a GPU IS available ---

    @pytest.mark.skipif(
        _actual_device() == "cpu", reason = "No GPU available on this machine"
    )
    def test_gpu_available_fields(self):
        result = get_gpu_memory_info()
        assert result["available"] is True
        assert result["total_gb"] > 0
        assert result["allocated_gb"] >= 0
        assert result["free_gb"] >= 0
        assert 0 <= result["utilization_pct"] <= 100
        assert "device_name" in result

    # --- CUDA-specific mocked test ---

    @needs_torch
    def test_cuda_path_returns_correct_fields(self):
        mock_props = MagicMock()
        mock_props.total_memory = 16 * (1024**3)
        mock_props.name = "NVIDIA Test GPU"

        with (
            patch("utils.hardware.hardware.get_device", return_value = DeviceType.CUDA),
            patch("torch.cuda.current_device", return_value = 0),
            patch("torch.cuda.get_device_properties", return_value = mock_props),
            patch("torch.cuda.memory_allocated", return_value = 4 * (1024**3)),
            patch("torch.cuda.memory_reserved", return_value = 6 * (1024**3)),
        ):
            result = get_gpu_memory_info()

        assert result["available"] is True
        assert result["backend"] == "cuda"
        assert result["device_name"] == "NVIDIA Test GPU"
        assert abs(result["total_gb"] - 16.0) < 0.01
        assert abs(result["allocated_gb"] - 4.0) < 0.01
        assert abs(result["free_gb"] - 12.0) < 0.01
        assert abs(result["utilization_pct"] - 25.0) < 0.1

    # --- MLX-specific mocked test ---

    @needs_mlx
    def test_mlx_path_returns_correct_fields(self):
        mock_psutil_mem = MagicMock()
        mock_psutil_mem.total = 32 * (1024**3)  # 32 GB unified

        mock_psutil = MagicMock()
        mock_psutil.virtual_memory.return_value = mock_psutil_mem

        with (
            patch("utils.hardware.hardware.get_device", return_value = DeviceType.MLX),
            patch.dict("sys.modules", {"psutil": mock_psutil}),
        ):
            result = get_gpu_memory_info()

        assert result["available"] is True
        assert result["backend"] == "mlx"
        assert "Apple Silicon" in result["device_name"]
        assert abs(result["total_gb"] - 32.0) < 0.01

    # --- CPU-only path ---

    def test_cpu_path_returns_unavailable(self):
        with patch("utils.hardware.hardware.get_device", return_value = DeviceType.CPU):
            result = get_gpu_memory_info()
        assert result["available"] is False
        assert result["backend"] == "cpu"

    # --- Error resilience ---

    @needs_torch
    def test_cuda_error_returns_unavailable(self):
        with (
            patch("utils.hardware.hardware.get_device", return_value = DeviceType.CUDA),
            patch(
                "torch.cuda.current_device",
                side_effect = RuntimeError("CUDA init failed"),
            ),
        ):
            result = get_gpu_memory_info()
        assert result["available"] is False
        assert "error" in result


# ========== log_gpu_memory() ==========


class TestLogGpuMemory:
    def test_does_not_raise(self):
        log_gpu_memory("test")

    def test_logs_gpu_info_when_available(self, caplog):
        fake_info = {
            "available": True,
            "backend": "cuda",
            "device_name": "FakeGPU",
            "allocated_gb": 2.0,
            "total_gb": 16.0,
            "utilization_pct": 12.5,
            "free_gb": 14.0,
        }
        import structlog
        from loggers import get_logger

        with (
            patch(
                "utils.hardware.hardware.get_gpu_memory_info", return_value = fake_info
            ),
            caplog.at_level(logging.INFO, logger = "utils.hardware.hardware"),
        ):
            log_gpu_memory("unit-test")

        assert "unit-test" in caplog.text
        assert "CUDA" in caplog.text
        assert "FakeGPU" in caplog.text

    def test_logs_cpu_fallback_when_no_gpu(self, caplog):
        fake_info = {"available": False, "backend": "cpu"}
        import structlog
        from loggers import get_logger

        with (
            patch(
                "utils.hardware.hardware.get_gpu_memory_info", return_value = fake_info
            ),
            caplog.at_level(logging.INFO, logger = "utils.hardware.hardware"),
        ):
            log_gpu_memory("cpu-test")

        assert "No GPU available" in caplog.text


# ========== format_error_message() ==========


class TestFormatErrorMessage:
    def test_not_found(self):
        err = Exception("Repository not found for unsloth/test")
        msg = format_error_message(err, "unsloth/test")
        assert "not found" in msg.lower()
        assert "test" in msg

    def test_unauthorized(self):
        err = Exception("401 Unauthorized")
        msg = format_error_message(err, "some/model")
        assert "authentication" in msg.lower() or "unauthorized" in msg.lower()

    def test_gated_model(self):
        err = Exception("Access to model requires authentication")
        msg = format_error_message(err, "meta/llama")
        assert "authentication" in msg.lower()

    def test_invalid_token(self):
        err = Exception("Invalid user token")
        msg = format_error_message(err, "any/model")
        assert "invalid" in msg.lower()

    # --- OOM on CUDA ---

    @needs_torch
    def test_cuda_oom(self):
        err = Exception("CUDA out of memory")
        with patch("utils.hardware.get_device", return_value = DeviceType.CUDA):
            msg = format_error_message(err, "big/model")
        assert "GPU" in msg
        assert "big/model" not in msg
        assert "model" in msg

    # --- OOM on MLX ---

    @needs_mlx
    def test_mlx_oom(self):
        err = Exception("MLX backend out of memory")
        with patch("utils.hardware.get_device", return_value = DeviceType.MLX):
            msg = format_error_message(err, "unsloth/huge-model")
        assert "Apple Silicon" in msg

    # --- OOM on CPU ---

    def test_cpu_oom(self):
        err = Exception("not enough memory to allocate")
        with patch("utils.hardware.get_device", return_value = DeviceType.CPU):
            msg = format_error_message(err, "any/model")
        assert "system" in msg.lower()

    # --- Generic fallback ---

    def test_generic_error(self):
        err = Exception("Something completely unexpected")
        msg = format_error_message(err, "any/model")
        assert msg == "Something completely unexpected"


================================================
FILE: studio/backend/utils/.gitkeep
================================================


================================================
FILE: studio/backend/utils/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0


================================================
FILE: studio/backend/utils/cache_cleanup.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Utility for cleaning up the Unsloth compiled cache directory.

The unsloth_compiled_cache is created by unsloth_zoo/compiler.py during
FastModel.from_pretrained() and contains model-type-specific compiled Python
files. It should be cleared between model loads to avoid stale artefacts.
"""

import shutil
import structlog
from loggers import get_logger
from pathlib import Path

logger = get_logger(__name__)

# Possible locations where unsloth_compiled_cache may appear
_BACKEND_DIR = Path(__file__).resolve().parent.parent  # studio/backend
_PROJECT_ROOT = _BACKEND_DIR.parent.parent  # repo root

_CACHE_DIRS = [
    _BACKEND_DIR / "unsloth_compiled_cache",
    _PROJECT_ROOT / "unsloth_compiled_cache",
    _PROJECT_ROOT / "studio" / "tmp" / "unsloth_compiled_cache",
]


def clear_unsloth_compiled_cache() -> None:
    """Remove every known unsloth_compiled_cache directory (idempotent)."""
    for cache_dir in _CACHE_DIRS:
        if cache_dir.exists():
            logger.info(f"Removing unsloth compiled cache: {cache_dir}")
            shutil.rmtree(cache_dir, ignore_errors = True)


================================================
FILE: studio/backend/utils/datasets/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Dataset utilities package.

This package provides utilities for dataset format detection, conversion,
and processing for LLM and VLM fine-tuning workflows.

Modules:
- format_detection: Detect dataset formats (Alpaca, ShareGPT, ChatML)
- format_conversion: Convert between dataset formats
- chat_templates: Apply chat templates to datasets
- vlm_processing: Vision-Language Model processing utilities
- data_collators: Custom data collators for training
- model_mappings: Model-to-template mapping constants
"""

# Format detection
from .format_detection import (
    detect_dataset_format,
    detect_custom_format_heuristic,
    detect_multimodal_dataset,
    detect_vlm_dataset_structure,
)

# Format conversion
from .format_conversion import (
    standardize_chat_format,
    convert_chatml_to_alpaca,
    convert_alpaca_to_chatml,
    convert_to_vlm_format,
    convert_llava_to_vlm_format,
    convert_sharegpt_with_images_to_vlm_format,
)

# Chat templates
from .chat_templates import (
    apply_chat_template_to_dataset,
    get_dataset_info_summary,
    get_tokenizer_chat_template,
    DEFAULT_ALPACA_TEMPLATE,
)

# VLM processing
from .vlm_processing import (
    generate_smart_vlm_instruction,
)

# Data collators
from .data_collators import (
    DataCollatorSpeechSeq2SeqWithPadding,
    DeepSeekOCRDataCollator,
    VLMDataCollator,
)

# Model mappings (constants)
from .model_mappings import (
    TEMPLATE_TO_MODEL_MAPPER,
    MODEL_TO_TEMPLATE_MAPPER,
    TEMPLATE_TO_RESPONSES_MAPPER,
)

# Legacy imports from the original dataset_utils.py for backward compatibility
# These functions have not yet been refactored into separate modules
from .dataset_utils import (
    check_dataset_format,
    format_and_template_dataset,
    format_dataset,
)

# Public API
__all__ = [
    # Detection
    "detect_dataset_format",
    "detect_custom_format_heuristic",
    "detect_multimodal_dataset",
    "detect_vlm_dataset_structure",
    # Conversion
    "standardize_chat_format",
    "convert_chatml_to_alpaca",
    "convert_alpaca_to_chatml",
    "convert_to_vlm_format",
    "convert_llava_to_vlm_format",
    "convert_sharegpt_with_images_to_vlm_format",
    # Templates
    "apply_chat_template_to_dataset",
    "get_dataset_info_summary",
    "get_tokenizer_chat_template",
    "DEFAULT_ALPACA_TEMPLATE",
    # VLM
    "generate_smart_vlm_instruction",
    # Collators
    "DataCollatorSpeechSeq2SeqWithPadding",
    "DeepSeekOCRDataCollator",
    "VLMDataCollator",
    # Mappings
    "TEMPLATE_TO_MODEL_MAPPER",
    "MODEL_TO_TEMPLATE_MAPPER",
    "TEMPLATE_TO_RESPONSES_MAPPER",
    # Main entry points
    "check_dataset_format",
    "format_and_template_dataset",
    "format_dataset",
]


================================================
FILE: studio/backend/utils/datasets/chat_templates.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Chat template application utilities for dataset processing.

This module contains functions for applying chat templates to datasets
and generating dataset info summaries.
"""

from torch.utils.data import IterableDataset

from .format_detection import detect_dataset_format, detect_multimodal_dataset, detect_custom_format_heuristic
from .model_mappings import MODEL_TO_TEMPLATE_MAPPER
from loggers import get_logger
logger = get_logger(__name__)


DEFAULT_ALPACA_TEMPLATE = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""


def get_tokenizer_chat_template(tokenizer, model_name):
    """
    Gets appropriate chat template for tokenizer based on model.
    Uses Unsloth's get_chat_template if model is in the mapper.

    Args:
        tokenizer: HuggingFace tokenizer
        model_name: Model class name (e.g., "Gemma3ForCausalLM")

    Returns:
        tokenizer: Tokenizer with appropriate chat template applied
    """
    try:
        from unsloth.chat_templates import get_chat_template
    except ImportError:
        # Unsloth not available, return tokenizer as-is
        return tokenizer

    # Normalize model_name to lowercase for matching
    model_name_lower = model_name.lower()

    # Check if model matches any template in mapper
    matched_template = None

    # Direct match in MODEL_TO_TEMPLATE_MAPPER
    if model_name_lower in MODEL_TO_TEMPLATE_MAPPER:
        matched_template = MODEL_TO_TEMPLATE_MAPPER[model_name_lower]
        logger.info(f"📝 Applying Unsloth chat template: {matched_template}")
        try:
            tokenizer = get_chat_template(
                tokenizer,
                chat_template = matched_template,
            )
        except Exception as e:
            logger.info(f"⚠️ Failed to apply Unsloth template '{matched_template}': {e}")
            logger.info(f"   Falling back to tokenizer's default chat template")
    else:
        # Check if tokenizer actually has a chat_template set
        has_chat_template = (
            hasattr(tokenizer, 'chat_template')
            and tokenizer.chat_template is not None
        )
        if has_chat_template:
            logger.info(f"📝 Using tokenizer's own chat template (no Unsloth template match)")
        else:
            # Base model with no chat template — apply default ChatML
            logger.info(f"📝 No chat template found — applying default ChatML template (base model)")
            try:
                tokenizer = get_chat_template(
                    tokenizer,
                    chat_template = "chatml",
                )
            except Exception as e:
                logger.info(f"⚠️ Failed to apply default ChatML template: {e}")
                logger.info(f"   Falling back to tokenizer as-is")

    return tokenizer


def get_dataset_info_summary(dataset_info):
    """
    Returns a human-readable summary for UI display.
    """
    detected_format = dataset_info["detected_format"]
    final_format = dataset_info["final_format"]

    format_descriptions = {
        "alpaca": "Alpaca format (instruction/input/output)",
        "sharegpt": "ShareGPT format (needs standardization)",
        "chatml_messages": "ChatML format (messages column) - OpenAI compatible",
        "chatml_conversations": "ChatML format (conversations column) - HuggingFace standard",
        "unknown": "Unknown format"
    }

    return {
        "detected_format": detected_format,
        "final_format": final_format,
        "detected_description": format_descriptions.get(detected_format, "Unknown"),
        "final_description": format_descriptions.get(final_format, "Unknown"),
        "chat_column": dataset_info["chat_column"],
        "is_standardized": dataset_info["is_standardized"],
        "warnings": dataset_info.get("warnings", []),
        "ready_for_training": dataset_info["is_standardized"] and final_format != "unknown"
    }


def apply_chat_template_to_dataset(
    dataset_info,
    tokenizer,
    model_name = None,
    custom_prompt_template = None,
    add_eos_token = False,
    remove_bos_prefix = False,
    custom_format_mapping = None,
    auto_detect_mapping = True,
    batch_size = 1000,
    num_proc = None,
    progress_callback = None,
):
    """
    Applies chat template to dataset based on its format.

    Args:
        dataset_info: Output from format_dataset() with metadata
        tokenizer: Tokenizer with chat template
        custom_prompt_template: Optional string template for custom formatting
        add_eos_token: If True, appends tokenizer.eos_token to each text
        remove_bos_prefix: If True, removes '<bos>' prefix (for Gemma, etc.)
        custom_format_mapping: Dict mapping custom columns to standard format
        batch_size: Batch size for processing
        num_proc: Number of processes

    Returns:
        dict with dataset, success status, warnings, and errors
    """
    dataset = dataset_info["dataset"]
    final_format = dataset_info["final_format"]
    chat_column = dataset_info["chat_column"]
    is_standardized = dataset_info["is_standardized"]

    warnings = list(dataset_info.get("warnings", []))
    errors = []

    # Get EOS token if needed
    eos_token = ""
    if add_eos_token:
        if hasattr(tokenizer, 'eos_token') and tokenizer.eos_token:
            eos_token = tokenizer.eos_token
        else:
            warnings.append("add_eos_token=True but tokenizer has no eos_token")

    # CUSTOM FORMAT MAPPING (for non-standard datasets)
    if final_format == "unknown":
        # Try auto-detection if no custom mapping provided
        if custom_format_mapping is None and auto_detect_mapping:
            # Check if format_dataset already tried and failed
            if not dataset_info.get("auto_detection_attempted", False):
                custom_format_mapping = detect_custom_format_heuristic(dataset)
                if custom_format_mapping:
                    warnings.append(f"Auto-detected column mapping: {custom_format_mapping}")
                else:
                    errors.append("Could not auto-detect format mapping")
                    return {
                        "dataset": dataset,
                        "success": False,
                        "warnings": warnings,
                        "errors": errors
                    }
            else:
                # Already failed once in format_dataset, don't retry
                errors.append(
                    "Format remains unknown after detection attempts. "
                    "Please provide custom_format_mapping to specify column roles manually."
                )
                return {
                    "dataset": dataset,
                    "success": False,
                    "warnings": warnings,
                    "errors": errors
                }

        if custom_format_mapping:
            warnings.append(f"Applying custom format mapping: {custom_format_mapping}")
            is_user_provided = dataset_info.get("custom_format_mapping") is not None

            def _apply_custom_mapping(examples):
                conversations = []
                num_examples = len(examples[list(examples.keys())[0]])

                # Only preserve unmapped columns if auto-detected
                preserved_columns = {}
                if not is_user_provided:
                    all_columns = set(examples.keys())
                    mapped_columns = set(custom_format_mapping.keys())
                    non_mapped_columns = all_columns - mapped_columns

                    for col in non_mapped_columns:
                        preserved_columns[col] = examples[col]

                for i in range(num_examples):
                    convo = []
                    role_order = ['system', 'user', 'assistant']

                    for target_role in role_order:
                        for col_name, role in custom_format_mapping.items():
                            if role == target_role and col_name in examples:
                                content = examples[col_name][i]

                                if is_user_provided:
                                    # User explicitly mapped - include even if empty
                                    convo.append({"role": role, "content": str(content) if content else ""})
                                else:
                                    # Auto-detected - skip empty
                                    if content and str(content).strip():
                                        convo.append({"role": role, "content": str(content)})

                    conversations.append(convo)

                result = {"conversations": conversations}
                if not is_user_provided:
                    result.update(preserved_columns)
                return result

            try:
                dataset = dataset.map(_apply_custom_mapping, batched = True, batch_size = batch_size)
                # Update to use conversations format
                final_format = "chatml_conversations"
                chat_column = "conversations"
                is_standardized = True
                warnings.append("Successfully converted to ChatML format via custom mapping")
            except Exception as e:
                errors.append(f"Custom format mapping failed: {e}")
                return {
                    "dataset": dataset,
                    "success": False,
                    "warnings": warnings,
                    "errors": errors
                }

    # ALPACA FORMAT
    if final_format == "alpaca":

        # Set alpaca chat template on tokenizer for saving (if not already set)
        # This ensures the template is saved with the model for inference
        if not (hasattr(tokenizer, 'chat_template') and tokenizer.chat_template):
            try:
                from unsloth.chat_templates import get_chat_template
                tokenizer = get_chat_template(tokenizer, chat_template = "alpaca")
                logger.info(f"📝 Set alpaca chat template on tokenizer for model saving")
            except Exception as e:
                logger.info(f"⚠️ Could not set alpaca template on tokenizer: {e}")

        # Use custom template if provided
        def _format_alpaca_custom(examples):
            texts = []
            for i in range(len(examples["instruction"])):
                fields = {
                    "instruction": examples["instruction"][i],
                    "input": examples.get("input", [""] * len(examples["instruction"]))[i],
                    "output": examples["output"][i]
                }

                try:
                    text = DEFAULT_ALPACA_TEMPLATE.format(fields["instruction"], fields["input"], fields["output"])
                    text += eos_token
                    texts.append(text)
                except KeyError as e:
                    errors.append(f"Custom template missing field: {e}")
                    texts.append("")

            return {"text": texts}

        formatted_fn = _format_alpaca_custom

        try:
            dataset_map_kwargs = {
                'batched': True,
                'batch_size': batch_size,
            }

            if not isinstance(dataset, IterableDataset):
                from utils.hardware import safe_num_proc
                if num_proc is None or type(num_proc) is not int:
                    num_proc = safe_num_proc()
                else:
                    num_proc = safe_num_proc(num_proc)
                dataset_map_kwargs['num_proc'] = num_proc
                dataset_map_kwargs['desc'] = "Applying template to Alpaca format"

            formatted_dataset = dataset.map(formatted_fn, **dataset_map_kwargs)

            return {
                "dataset": formatted_dataset,
                "success": True,
                "warnings": warnings,
                "errors": errors
            }
        except Exception as e:
            errors.append(f"Failed to format Alpaca dataset: {e}")
            return {
                "dataset": dataset,
                "success": False,
                "warnings": warnings,
                "errors": errors
            }

    # CHATML FORMATS
    elif final_format in ["chatml_messages", "chatml_conversations"]:

        if not is_standardized:
            warnings.append("Dataset may not be fully standardized")

        # Apply Unsloth chat template if model matches
        if model_name:
            tokenizer = get_tokenizer_chat_template(tokenizer, model_name)

        def _format_chatml(examples):
            convos = examples[chat_column]
            texts = []

            for convo in convos:
                try:
                    text = tokenizer.apply_chat_template(
                        convo,
                        tokenize = False,
                        add_generation_prompt = False
                    )

                    if remove_bos_prefix:
                        text = text.removeprefix('<bos>')
                    text += eos_token

                    texts.append(text)
                except Exception as e:
                    if len(texts) == 0:
                        warnings.append(f"Chat template failed: {e}")
                    texts.append("")

            return {"text": texts}

        try:
            dataset_map_kwargs = {
                'batched': True,
                'batch_size': batch_size,
            }

            if not isinstance(dataset, IterableDataset):
                from utils.hardware import safe_num_proc
                if num_proc is None or type(num_proc) is not int:
                    num_proc = safe_num_proc()
                else:
                    num_proc = safe_num_proc(num_proc)
                dataset_map_kwargs['num_proc'] = num_proc
                dataset_map_kwargs['desc'] = f"Applying chat template to {final_format}"

            # Monitor tqdm progress from dataset.map() and relay to callback
            _tqdm_monitor_stop = None
            if progress_callback and not isinstance(dataset, IterableDataset):
                import threading
                from tqdm.auto import tqdm as _tqdm_cls

                _tqdm_monitor_stop = threading.Event()
                _total = len(dataset) if hasattr(dataset, "__len__") else 0
                _desc = f"Applying chat template to {final_format}"

                def _poll_tqdm():
                    while not _tqdm_monitor_stop.is_set():
                        for bar in list(getattr(_tqdm_cls, "_instances", set())):
                            try:
                                n = bar.n or 0
                                total = bar.total or _total
                                if total > 0 and n > 0:
                                    pct = min(int(n * 100 / total), 100)
                                    progress_callback(
                                        status_message = f"{_desc}... {pct}% ({n:,}/{total:,})"
                                    )
                            except (AttributeError, ReferenceError):
                                pass
                        _tqdm_monitor_stop.wait(3)

                threading.Thread(target = _poll_tqdm, daemon = True).start()

            formatted_dataset = dataset.map(_format_chatml, **dataset_map_kwargs)

            if _tqdm_monitor_stop is not None:
                _tqdm_monitor_stop.set()

            return {
                "dataset": formatted_dataset,
                "success": True,
                "warnings": warnings,
                "errors": errors
            }
        except Exception as e:
            errors.append(f"Failed to format ChatML dataset: {e}")
            return {
                "dataset": dataset,
                "success": False,
                "warnings": warnings,
                "errors": errors
            }

    # UNKNOWN FORMAT
    else:
        errors.append(
            f"Cannot apply chat template to format: {final_format}. "
            f"This should not happen after custom mapping."
        )
        return {
            "dataset": dataset,
            "success": False,
            "warnings": warnings,
            "errors": errors
        }


================================================
FILE: studio/backend/utils/datasets/data_collators.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Data collators for dataset processing.

This module contains custom data collators for training,
particularly for VLM/OCR processing.
"""

import torch
from dataclasses import dataclass
from typing import Any, List, Optional, Union
from loggers import get_logger

logger = get_logger(__name__)


@dataclass
class DataCollatorSpeechSeq2SeqWithPadding:
    """
    Data collator for Whisper speech-to-text training.

    Pads input features (audio) and label sequences (text) separately,
    masks padding in labels with -100, and strips leading BOS token.
    Mirrors the collator from the Whisper.ipynb notebook.
    """

    processor: Any

    def __call__(self, features: List[dict]) -> dict:
        input_features = [
            {"input_features": feature["input_features"]} for feature in features
        ]
        batch = self.processor.feature_extractor.pad(
            input_features, return_tensors = "pt"
        )

        label_features = [{"input_ids": feature["labels"]} for feature in features]
        labels_batch = self.processor.tokenizer.pad(label_features, return_tensors = "pt")

        labels = labels_batch["input_ids"].masked_fill(
            labels_batch.attention_mask.ne(1), -100
        )

        if (labels[:, 0] == self.processor.tokenizer.bos_token_id).all().cpu().item():
            labels = labels[:, 1:]

        batch["labels"] = labels
        return batch


@dataclass
class DeepSeekOCRDataCollator:
    """
    Data collator for DeepSeek OCR VLM training.

    Handles:
    - Image processing via processor
    - Text tokenization
    - Proper label masking for instruction fine-tuning
    """

    processor: Any  # Qwen2VLProcessor or similar
    max_length: int = 2048
    ignore_index: int = -100

    def __call__(self, batch: List[dict]) -> dict:
        """
        Collate a batch of samples.

        Args:
            batch: List of dicts, each with 'messages' containing
                   [{'role': 'user', 'content': [...]}, {'role': 'assistant', 'content': [...]}]

        Returns:
            dict with input_ids, attention_mask, labels, pixel_values, etc.
        """
        from PIL import Image

        # Extract messages and images
        all_messages = []
        all_images = []

        for sample in batch:
            messages = sample["messages"]
            all_messages.append(messages)

            # Extract PIL images from content
            for msg in messages:
                content = msg.get("content", [])
                if isinstance(content, list):
                    for item in content:
                        if isinstance(item, dict) and item.get("type") == "image":
                            img = item.get("image")
                            if img is not None and hasattr(img, "size"):  # PIL Image
                                all_images.append(img)

        # Process with the VL processor
        try:
            # Qwen2VL style processing
            texts = [
                self.processor.apply_chat_template(
                    msgs, tokenize = False, add_generation_prompt = False
                )
                for msgs in all_messages
            ]

            # Process with images
            inputs = self.processor(
                text = texts,
                images = all_images if all_images else None,
                return_tensors = "pt",
                padding = True,
                truncation = True,
                max_length = self.max_length,
            )

            # Create labels (mask input, keep output)
            labels = inputs["input_ids"].clone()

            # Simple masking: mask padding tokens
            labels[labels == self.processor.tokenizer.pad_token_id] = self.ignore_index

            inputs["labels"] = labels

            return inputs

        except Exception as e:
            logger.info(f"⚠️ DeepSeekOCRDataCollator error: {e}")
            raise


@dataclass
class VLMDataCollator:
    """
    Generic VLM data collator that works with various processors.

    Supports:
    - Qwen2VL
    - LLaVA
    - Other VL models with compatible processors
    """

    processor: Any
    max_length: int = 2048
    ignore_index: int = -100
    mask_input_tokens: bool = True  # Whether to mask user tokens in labels

    def __call__(self, batch: List[dict]) -> dict:
        """
        Collate a batch of VLM samples.
        """
        all_messages = []
        all_images = []

        for sample in batch:
            messages = sample.get("messages", [])
            all_messages.append(messages)

            # Extract images
            for msg in messages:
                content = msg.get("content", [])
                if isinstance(content, list):
                    for item in content:
                        if isinstance(item, dict):
                            img = item.get("image")
                            if img is not None:
                                all_images.append(img)

        # Apply chat template
        texts = [
            self.processor.apply_chat_template(
                msgs, tokenize = False, add_generation_prompt = False
            )
            for msgs in all_messages
        ]

        # Process inputs
        inputs = self.processor(
            text = texts,
            images = all_images if all_images else None,
            return_tensors = "pt",
            padding = True,
            truncation = True,
            max_length = self.max_length,
        )

        # Create labels
        labels = inputs["input_ids"].clone()

        # Mask padding
        if hasattr(self.processor, "tokenizer"):
            pad_token_id = self.processor.tokenizer.pad_token_id
        else:
            pad_token_id = self.processor.pad_token_id

        if pad_token_id is not None:
            labels[labels == pad_token_id] = self.ignore_index

        inputs["labels"] = labels

        return inputs


================================================
FILE: studio/backend/utils/datasets/dataset_utils.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Dataset utilities for format detection, conversion, and template application.

This module provides the main entry points for dataset processing:
- check_dataset_format: Lightweight check if manual mapping is needed (for frontend)
- format_dataset: Detects and normalizes dataset formats
- format_and_template_dataset: End-to-end processing with chat template application

All internal utilities have been moved to separate modules:
- format_detection: detect_dataset_format, detect_multimodal_dataset, etc.
- format_conversion: standardize_chat_format, convert_chatml_to_alpaca, etc.
- chat_templates: apply_chat_template_to_dataset, get_tokenizer_chat_template, etc.
- vlm_processing: generate_smart_vlm_instruction
- data_collators: DeepSeekOCRDataCollator, VLMDataCollator
- model_mappings: TEMPLATE_TO_MODEL_MAPPER
"""

import json

# Import from modular files
from .format_detection import (
    detect_dataset_format,
    detect_multimodal_dataset,
    detect_vlm_dataset_structure,
    detect_custom_format_heuristic,
)
from .format_conversion import (
    standardize_chat_format,
    convert_chatml_to_alpaca,
    convert_alpaca_to_chatml,
    convert_to_vlm_format,
    convert_llava_to_vlm_format,
    convert_sharegpt_with_images_to_vlm_format,
)
from .chat_templates import (
    apply_chat_template_to_dataset,
    get_dataset_info_summary,
    get_tokenizer_chat_template,
    DEFAULT_ALPACA_TEMPLATE,
)
from .vlm_processing import generate_smart_vlm_instruction
from .data_collators import DeepSeekOCRDataCollator, VLMDataCollator
from .model_mappings import TEMPLATE_TO_MODEL_MAPPER
from loggers import get_logger

logger = get_logger(__name__)


def check_dataset_format(dataset, is_vlm: bool = False) -> dict:
    """
    Lightweight format check without processing - for frontend validation.

    Use this to quickly determine if user needs to manually map columns
    before calling the full format_and_template_dataset().

    Args:
        dataset: HuggingFace dataset
        is_vlm: Whether this is a Vision-Language Model dataset

    Returns:
        dict: {
            "requires_manual_mapping": bool - True if user must map columns,
            "detected_format": str - The detected format,
            "columns": list - Available column names for mapping UI,
            "suggested_mapping": dict or None - Auto-detected mapping if available,
            "detected_image_column": str or None - For VLM only,
            "detected_text_column": str or None - For VLM only,
        }
    """
    columns = (
        list(dataset.column_names)
        if hasattr(dataset, "column_names")
        else list(next(iter(dataset)).keys())
    )

    # Auto-detect multimodal data regardless of is_vlm flag
    multimodal_info = detect_multimodal_dataset(dataset)
    is_audio = multimodal_info.get("is_audio", False)

    # Common audio fields for all return paths
    audio_fields = {
        "is_audio": is_audio,
        "detected_audio_column": multimodal_info.get("detected_audio_column"),
        "detected_speaker_column": multimodal_info.get("detected_speaker_column"),
    }

    if is_vlm:
        vlm_structure = detect_vlm_dataset_structure(dataset)
        requires_mapping = vlm_structure["format"] == "unknown"

        warning = None
        if requires_mapping:
            img_col = vlm_structure.get("image_column")
            txt_col = vlm_structure.get("text_column")
            missing = []
            if not img_col:
                missing.append("image")
            if not txt_col:
                missing.append("text")
            if missing:
                warning = (
                    f"Could not auto-detect {' or '.join(missing)} column. "
                    "Please assign image and text columns manually."
                )

        return {
            "requires_manual_mapping": requires_mapping,
            "detected_format": vlm_structure["format"],
            "columns": columns,
            "suggested_mapping": None,
            "detected_image_column": vlm_structure.get("image_column"),
            "detected_text_column": vlm_structure.get("text_column"),
            "is_image": multimodal_info["is_image"],
            "multimodal_columns": multimodal_info.get("multimodal_columns"),
            "warning": warning,
            **audio_fields,
        }

    if is_audio:
        # Audio dataset — require manual mapping only when columns can't be auto-detected
        detected_audio = multimodal_info.get("detected_audio_column")
        detected_text = multimodal_info.get("detected_text_column")
        needs_mapping = not detected_audio or not detected_text
        return {
            "requires_manual_mapping": needs_mapping,
            "detected_format": "audio",
            "columns": columns,
            "suggested_mapping": None,
            "detected_image_column": None,
            "detected_text_column": multimodal_info.get("detected_text_column"),
            "is_image": False,
            "multimodal_columns": multimodal_info.get("audio_columns"),
            **audio_fields,
        }

    # Text / LLM flow
    detected = detect_dataset_format(dataset)

    # If format is unknown, try heuristic detection
    if detected["format"] == "unknown":
        heuristic_mapping = detect_custom_format_heuristic(dataset)
        if heuristic_mapping:
            return {
                "requires_manual_mapping": False,
                "detected_format": "custom_heuristic",
                "columns": columns,
                "suggested_mapping": heuristic_mapping,
                "detected_image_column": None,
                "detected_text_column": None,
                "is_image": multimodal_info["is_image"],
                "multimodal_columns": multimodal_info.get("multimodal_columns"),
                **audio_fields,
            }
        else:
            # Heuristic failed — user must map manually (or use AI Assist)
            return {
                "requires_manual_mapping": True,
                "detected_format": "unknown",
                "columns": columns,
                "suggested_mapping": None,
                "detected_image_column": None,
                "detected_text_column": None,
                "is_image": multimodal_info["is_image"],
                "multimodal_columns": multimodal_info.get("multimodal_columns"),
                "warning": (
                    f"Could not auto-detect column roles for columns: {columns}. "
                    "Please assign roles manually, or use AI Assist."
                ),
                **audio_fields,
            }

    # Known format detected
    return {
        "requires_manual_mapping": False,
        "detected_format": detected["format"],
        "columns": columns,
        "suggested_mapping": None,
        "detected_image_column": None,
        "detected_text_column": None,
        "is_image": multimodal_info["is_image"],
        "multimodal_columns": multimodal_info.get("multimodal_columns"),
        **audio_fields,
    }


# Normalise any format-specific role to canonical chatml (user/assistant/system)
_TO_CHATML = {
    "user": "user",
    "human": "user",
    "instruction": "user",
    "assistant": "assistant",
    "gpt": "assistant",
    "output": "assistant",
    "system": "system",
    "input": "system",
}
_CHATML_ROLE_ORDER = ("system", "user", "assistant")
_CHATML_TO_ALPACA = {"user": "instruction", "system": "input", "assistant": "output"}


def _apply_user_mapping(dataset, mapping: dict, batch_size: int = 1000):
    """
    Apply user-provided column mapping to convert dataset to conversations format.

    Accepts chatml (user/assistant/system), sharegpt (human/gpt/system), and
    alpaca (instruction/input/output) role names — all normalised to chatml output.

    If the mapping contains ``__``-prefixed metadata keys (from the conversion
    advisor), routes to template-based conversion instead of simple role mapping.

    Returns:
        Dataset with single 'conversations' column
    """
    # Split metadata from column roles
    meta = {k: v for k, v in mapping.items() if k.startswith("__")}
    column_roles = {k: v for k, v in mapping.items() if not k.startswith("__")}

    if meta:
        return _apply_template_mapping(dataset, column_roles, meta, batch_size)

    # ── Simple mode (original logic) ──
    # Pre-compute: group columns by canonical chatml role
    role_groups: dict[str, list[str]] = {r: [] for r in _CHATML_ROLE_ORDER}
    for col_name, role in column_roles.items():
        canonical = _TO_CHATML.get(role)
        if canonical:
            role_groups[canonical].append(col_name)

    def _convert(examples):
        num = len(next(iter(examples.values())))
        conversations = []
        for i in range(num):
            convo = []
            for chatml_role in _CHATML_ROLE_ORDER:
                for col in role_groups[chatml_role]:
                    if col in examples:
                        content = examples[col][i]
                        convo.append(
                            {
                                "role": chatml_role,
                                "content": str(content) if content else "",
                            }
                        )
            conversations.append(convo)
        return {"conversations": conversations}

    return dataset.map(
        _convert,
        batched = True,
        batch_size = batch_size,
        remove_columns = dataset.column_names,
    )


def _extract_column_value(val, col: str, label_mapping: dict) -> str:
    """Extract a string value from a column, handling complex types and label mapping."""
    # Handle complex types (dicts, lists) — extract useful text instead of raw repr
    if isinstance(val, dict):
        # Common pattern: {"text": [...]} in QA datasets
        if "text" in val:
            inner = val["text"]
            str_val = inner[0] if isinstance(inner, list) and inner else str(inner)
        else:
            str_val = json.dumps(val, ensure_ascii = False)
    elif isinstance(val, list):
        str_val = val[0] if len(val) == 1 else ", ".join(str(v) for v in val)
    else:
        str_val = str(val) if val is not None else ""

    # Apply label mapping if this column has one
    if col in label_mapping and isinstance(label_mapping[col], dict):
        str_val = label_mapping[col].get(str_val, str_val)

    return str_val


def _apply_template_mapping(
    dataset, column_roles: dict, meta: dict, batch_size: int = 1000
):
    """
    Apply advisor-driven mapping for non-conversational datasets.

    Groups columns by their assigned role (user/assistant), concatenates
    values within each role into a single message, and injects an optional
    system prompt.  Label mapping is applied to convert integer labels
    to human-readable strings.

    Returns:
        Dataset with single 'conversations' column
    """
    system_prompt = meta.get("__system_prompt", "")
    label_mapping = meta.get("__label_mapping", {})  # {col: {int_str: label_str}}

    # Group columns by canonical chatml role
    role_groups: dict[str, list[str]] = {"user": [], "assistant": []}
    for col, role in column_roles.items():
        canonical = _TO_CHATML.get(role, role)
        if canonical in role_groups:
            role_groups[canonical].append(col)

    import logging as _log

    _log.getLogger(__name__).info(
        f"Applying role mapping: sys={bool(system_prompt)}, "
        f"user_cols={role_groups['user']}, asst_cols={role_groups['assistant']}, "
        f"label_map={list(label_mapping.keys())}"
    )

    def _convert(examples):
        num = len(next(iter(examples.values())))
        conversations = []
        for i in range(num):
            convo = []

            # System prompt (generated, static across all rows)
            if system_prompt:
                convo.append({"role": "system", "content": system_prompt})

            # User message: concatenate all user-role column values
            user_parts = []
            for col in role_groups["user"]:
                if col in examples:
                    user_parts.append(
                        _extract_column_value(examples[col][i], col, label_mapping)
                    )
            if user_parts:
                convo.append({"role": "user", "content": "\n".join(user_parts)})

            # Assistant message: concatenate all assistant-role column values
            asst_parts = []
            for col in role_groups["assistant"]:
                if col in examples:
                    asst_parts.append(
                        _extract_column_value(examples[col][i], col, label_mapping)
                    )
            if asst_parts:
                convo.append({"role": "assistant", "content": "\n".join(asst_parts)})

            conversations.append(convo)
        return {"conversations": conversations}

    return dataset.map(
        _convert,
        batched = True,
        batch_size = batch_size,
        remove_columns = dataset.column_names,
    )


def _apply_user_mapping_alpaca(dataset, mapping: dict, batch_size: int = 1000):
    """
    Apply user-provided column mapping to convert dataset to Alpaca format.

    Accepts any format's role names — normalises via _TO_CHATML, then maps
    user → instruction, system → input, assistant → output.

    Returns:
        Dataset with instruction/input/output columns
    """
    col_for: dict[str, str | None] = {
        "instruction": None,
        "input": None,
        "output": None,
    }
    for col_name, role in mapping.items():
        canonical = _TO_CHATML.get(role)
        alpaca_field = _CHATML_TO_ALPACA.get(canonical) if canonical else None
        if alpaca_field:
            col_for[alpaca_field] = col_name

    def _convert(examples):
        num = len(next(iter(examples.values())))
        instructions, inputs, outputs = [], [], []
        for i in range(num):
            for field, dest in (
                ("instruction", instructions),
                ("input", inputs),
                ("output", outputs),
            ):
                col = col_for[field]
                val = (
                    str(examples[col][i])
                    if col and col in examples and examples[col][i]
                    else ""
                )
                dest.append(val)
        return {"instruction": instructions, "input": inputs, "output": outputs}

    return dataset.map(
        _convert,
        batched = True,
        batch_size = batch_size,
        remove_columns = dataset.column_names,
    )


def format_dataset(
    dataset,
    format_type = "auto",
    tokenizer = None,
    aliases_for_system = [
        "system",
    ],
    aliases_for_user = [
        "user",
        "human",
        "input",
    ],
    aliases_for_assistant = [
        "gpt",
        "assistant",
        "output",
    ],
    batch_size = 1000,
    num_proc = None,
    auto_detect_custom = True,
    custom_format_mapping = None,
):
    """
    Formats dataset and returns metadata.

    Returns:
        dict: {
            "dataset": processed dataset,
            "detected_format": original format detected,
            "final_format": final format after processing,
            "chat_column": column name with chat data,
            "is_standardized": whether role names are standardized,
            "requires_manual_mapping": True if format detection failed and user must map columns,
            "warnings": list of warning messages
        }
    """

    # Detect multimodal first (needed for all flows)
    multimodal_info = detect_multimodal_dataset(dataset)

    # If user provided explicit mapping, skip detection and apply in the requested format
    if custom_format_mapping:
        try:
            if format_type == "alpaca":
                mapped_dataset = _apply_user_mapping_alpaca(
                    dataset, custom_format_mapping, batch_size
                )
                final_format = "alpaca"
                chat_column = None
            else:
                # auto / chatml / sharegpt / conversational — all produce chatml conversations
                # (sharegpt is always standardized to role/content internally)
                mapped_dataset = _apply_user_mapping(
                    dataset, custom_format_mapping, batch_size
                )
                final_format = "chatml_conversations"
                chat_column = "conversations"

            return {
                "dataset": mapped_dataset,
                "detected_format": "user_mapped",
                "final_format": final_format,
                "chat_column": chat_column,
                "is_standardized": True,
                "requires_manual_mapping": False,
                "is_image": multimodal_info["is_image"],
                "multimodal_info": multimodal_info,
                "warnings": [
                    f"Applied user-provided column mapping ({format_type}): {custom_format_mapping}"
                ],
            }
        except Exception as e:
            return {
                "dataset": dataset,
                "detected_format": "user_mapped",
                "final_format": "unknown",
                "chat_column": None,
                "is_standardized": False,
                "requires_manual_mapping": True,
                "is_image": multimodal_info["is_image"],
                "multimodal_info": multimodal_info,
                "warnings": [f"Failed to apply user mapping: {e}"],
            }

    # Detect current format
    detected = detect_dataset_format(dataset)
    warnings = []

    # Add multimodal warning if detected
    if multimodal_info["is_image"]:
        warnings.append(
            f"Multimodal dataset detected. Found columns: {multimodal_info['multimodal_columns']}"
        )

    # AUTO MODE: Keep format but standardize if needed
    if format_type == "auto":
        # Alpaca - keep as is
        if detected["format"] == "alpaca":
            return {
                "dataset": dataset,
                "detected_format": "alpaca",
                "final_format": "alpaca",
                "chat_column": None,
                "is_standardized": True,
                "requires_manual_mapping": False,
                "is_image": multimodal_info["is_image"],
                "multimodal_info": multimodal_info,
                "warnings": [],
            }

        # ShareGPT - needs standardization
        elif detected["format"] == "sharegpt":
            try:
                standardized = standardize_chat_format(
                    dataset,
                    tokenizer,
                    aliases_for_system,
                    aliases_for_user,
                    aliases_for_assistant,
                    batch_size,
                    num_proc,
                )
                return {
                    "dataset": standardized,
                    "detected_format": "sharegpt",
                    "final_format": f"chatml_{detected['chat_column']}",
                    "chat_column": detected["chat_column"],
                    "is_standardized": True,
                    "requires_manual_mapping": False,
                    "is_image": multimodal_info["is_image"],
                    "multimodal_info": multimodal_info,
                    "warnings": [],
                }
            except Exception as e:
                warnings.append(f"Failed to standardize ShareGPT format: {e}")
                return {
                    "dataset": dataset,
                    "detected_format": "sharegpt",
                    "final_format": "sharegpt",
                    "chat_column": detected["chat_column"],
                    "is_standardized": False,
                    "requires_manual_mapping": True,
                    "is_image": multimodal_info["is_image"],
                    "multimodal_info": multimodal_info,
                    "warnings": warnings,
                }

        elif detected["format"] == "chatml" and detected["chat_column"] in [
            "conversations",
            "messages",
            "texts",
        ]:
            return {
                "dataset": dataset,
                "detected_format": f"chatml_{detected['chat_column']}",
                "final_format": f"chatml_{detected['chat_column']}",
                "chat_column": detected["chat_column"],
                "is_standardized": True,
                "requires_manual_mapping": False,
                "is_image": multimodal_info["is_image"],
                "multimodal_info": multimodal_info,
                "warnings": warnings,
            }

        # Unknown - try standardization, if fails pass as is
        else:
            warnings.append(
                f"Unknown format detected. Keys found: {detected['sample_keys']}"
            )

            # NEW: Try heuristic detection
            if auto_detect_custom:
                custom_mapping = detect_custom_format_heuristic(dataset)
                if custom_mapping:
                    warnings.append(f"Auto-detected column mapping: {custom_mapping}")

                    def _apply_auto_mapping(examples):
                        conversations = []
                        num_examples = len(examples[list(examples.keys())[0]])

                        # Preserve non-mapped columns
                        all_columns = set(examples.keys())
                        mapped_columns = set(custom_mapping.keys())
                        preserved_columns = {
                            col: examples[col] for col in all_columns - mapped_columns
                        }

                        for i in range(num_examples):
                            convo = []
                            for target_role in ["system", "user", "assistant"]:
                                for col_name, role in custom_mapping.items():
                                    if role == target_role and col_name in examples:
                                        content = examples[col_name][i]
                                        if content and str(content).strip():
                                            convo.append(
                                                {"role": role, "content": str(content)}
                                            )
                            conversations.append(convo)

                        return {"conversations": conversations, **preserved_columns}

                    try:
                        dataset = dataset.map(
                            _apply_auto_mapping, batched = True, batch_size = batch_size
                        )
                        return {
                            "dataset": dataset,
                            "detected_format": "unknown",
                            "final_format": "chatml_conversations",
                            "chat_column": "conversations",
                            "is_standardized": True,
                            "requires_manual_mapping": False,
                            "is_image": multimodal_info["is_image"],
                            "multimodal_info": multimodal_info,
                            "warnings": warnings,
                        }
                    except Exception as e:
                        warnings.append(f"Auto-detection failed: {e}")

            # Try standardization as a last resort
            if detected["chat_column"]:
                try:
                    standardized = standardize_chat_format(
                        dataset,
                        tokenizer,
                        aliases_for_system,
                        aliases_for_user,
                        aliases_for_assistant,
                        batch_size,
                        num_proc,
                    )
                    warnings.append("Successfully standardized unknown format")
                    return {
                        "dataset": standardized,
                        "detected_format": "unknown",
                        "final_format": f"chatml_{detected['chat_column']}",
                        "chat_column": detected["chat_column"],
                        "is_standardized": True,
                        "requires_manual_mapping": False,
                        "is_image": multimodal_info["is_image"],
                        "multimodal_info": multimodal_info,
                        "warnings": warnings,
                    }
                except Exception as e:
                    warnings.append(
                        f"Could not standardize: {e}. Passing dataset as-is."
                    )

            # Return as-is with warnings
            return {
                "dataset": dataset,
                "detected_format": "unknown",
                "final_format": "unknown",
                "chat_column": detected["chat_column"],
                "is_standardized": False,
                "requires_manual_mapping": True,
                "is_image": multimodal_info["is_image"],
                "multimodal_info": multimodal_info,
                "warnings": warnings,
            }

    # ALPACA MODE: Convert to Alpaca
    elif format_type == "alpaca":
        if detected["format"] == "alpaca":
            return {
                "dataset": dataset,
                "detected_format": "alpaca",
                "final_format": "alpaca",
                "chat_column": None,
                "is_standardized": True,
                "requires_manual_mapping": False,
                "is_image": multimodal_info["is_image"],
                "multimodal_info": multimodal_info,
                "warnings": [],
            }

        elif detected["format"] in ["sharegpt", "chatml"]:
            # First standardize if ShareGPT
            if detected["format"] == "sharegpt":
                dataset = standardize_chat_format(
                    dataset,
                    tokenizer,
                    aliases_for_system,
                    aliases_for_user,
                    aliases_for_assistant,
                    batch_size,
                    num_proc,
                )

            # Then convert to Alpaca
            converted = convert_chatml_to_alpaca(dataset, batch_size, num_proc)
            return {
                "dataset": converted,
                "detected_format": detected["format"],
                "final_format": "alpaca",
                "chat_column": None,
                "is_standardized": True,
                "requires_manual_mapping": False,
                "is_image": multimodal_info["is_image"],
                "multimodal_info": multimodal_info,
                "warnings": [],
            }

        else:
            warnings.append(f"Cannot convert unknown format to Alpaca")
            return {
                "dataset": dataset,
                "detected_format": "unknown",
                "final_format": "unknown",
                "chat_column": detected["chat_column"],
                "is_standardized": False,
                "requires_manual_mapping": True,
                "is_image": multimodal_info["is_image"],
                "multimodal_info": multimodal_info,
                "warnings": warnings,
            }

    # CHATML MODE: Convert to ChatML
    elif format_type in ["chatml", "conversational", "sharegpt"]:
        if detected["format"] == "alpaca":
            converted = convert_alpaca_to_chatml(dataset, batch_size, num_proc)
            return {
                "dataset": converted,
                "detected_format": "alpaca",
                "final_format": "chatml_conversations",
                "chat_column": "conversations",
                "is_standardized": True,
                "requires_manual_mapping": False,
                "is_image": multimodal_info["is_image"],
                "multimodal_info": multimodal_info,
                "warnings": [],
            }

        elif detected["format"] == "sharegpt":
            standardized = standardize_chat_format(
                dataset,
                tokenizer,
                aliases_for_system,
                aliases_for_user,
                aliases_for_assistant,
                batch_size,
                num_proc,
            )
            return {
                "dataset": standardized,
                "detected_format": "sharegpt",
                "final_format": f"chatml_{detected['chat_column']}",
                "chat_column": detected["chat_column"],
                "is_standardized": True,
                "requires_manual_mapping": False,
                "is_image": multimodal_info["is_image"],
                "multimodal_info": multimodal_info,
                "warnings": [],
            }

        elif detected["format"] == "chatml":
            return {
                "dataset": dataset,
                "detected_format": f"chatml_{detected['chat_column']}",
                "final_format": f"chatml_{detected['chat_column']}",
                "chat_column": detected["chat_column"],
                "is_standardized": True,
                "requires_manual_mapping": False,
                "is_image": multimodal_info["is_image"],
                "multimodal_info": multimodal_info,
                "warnings": [],
            }

        else:
            warnings.append(f"Unknown format, attempting standardization")
            if detected["chat_column"]:
                try:
                    standardized = standardize_chat_format(
                        dataset,
                        tokenizer,
                        aliases_for_system,
                        aliases_for_user,
                        aliases_for_assistant,
                        batch_size,
                        num_proc,
                    )
                    return {
                        "dataset": standardized,
                        "detected_format": "unknown",
                        "final_format": f"chatml_{detected['chat_column']}",
                        "chat_column": detected["chat_column"],
                        "is_standardized": True,
                        "requires_manual_mapping": False,
                        "is_image": multimodal_info["is_image"],
                        "multimodal_info": multimodal_info,
                        "warnings": warnings,
                    }
                except Exception as e:
                    warnings.append(f"Standardization failed: {e}")

            return {
                "dataset": dataset,
                "detected_format": "unknown",
                "final_format": "unknown",
                "chat_column": detected["chat_column"],
                "is_standardized": False,
                "requires_manual_mapping": True,
                "is_image": multimodal_info["is_image"],
                "multimodal_info": multimodal_info,
                "warnings": warnings,
            }

    else:
        raise ValueError(f"Unknown format_type: {format_type}")


def format_and_template_dataset(
    dataset,
    model_name,
    tokenizer,
    is_vlm = False,
    format_type = "auto",
    # VLM-specific parameters
    vlm_instruction = None,  # Now optional - will auto-generate
    vlm_text_column = None,
    vlm_image_column = None,
    dataset_name = None,
    custom_prompt_template = None,
    add_eos_token = False,
    remove_bos_prefix = False,
    custom_format_mapping = None,
    auto_detect_custom = True,
    auto_detect_mapping = True,
    aliases_for_system = [
        "system",
    ],
    aliases_for_user = [
        "user",
        "human",
        "input",
    ],
    aliases_for_assistant = [
        "gpt",
        "assistant",
        "output",
    ],
    batch_size = 1000,
    num_proc = None,
    progress_callback = None,
):
    """
    Convenience function that combines format_dataset and apply_chat_template_to_dataset.
    Perfect for UI workflows - one function does everything!

    Returns:
        dict: {
            "dataset": Final dataset with 'text' column,
            "detected_format": Original format,
            "final_format": Format after processing,
            "success": Whether template application succeeded,
            "requires_manual_mapping": True if format detection failed and user must map columns,
            "warnings": List of warnings,
            "errors": List of errors,
            "summary": Human-readable summary
        }
    """

    # VLM FLOW
    if is_vlm:
        warnings = []
        errors = []

        multimodal_info = detect_multimodal_dataset(dataset)

        # NEW: If user provided explicit mapping for VLM, use it directly
        if custom_format_mapping:
            # Expect mapping like: {"image_col": "image", "caption_col": "text"}
            user_vlm_image_column = None
            user_vlm_text_column = None

            for col, role in custom_format_mapping.items():
                if role == "image":
                    user_vlm_image_column = col
                elif role in ["text", "user", "caption", "assistant"]:
                    user_vlm_text_column = col

            if user_vlm_image_column and user_vlm_text_column:
                try:
                    dataset = convert_to_vlm_format(
                        dataset,
                        instruction = vlm_instruction,
                        text_column = user_vlm_text_column,
                        image_column = user_vlm_image_column,
                        dataset_name = dataset_name,
                        progress_callback = progress_callback,
                    )
                    warnings.append(
                        f"Applied user VLM mapping: image='{user_vlm_image_column}', text='{user_vlm_text_column}'"
                    )

                    return {
                        "dataset": dataset,
                        "detected_format": "user_mapped",
                        "final_format": "vlm_messages",
                        "chat_column": "messages",
                        "is_vlm": True,
                        "is_image": True,
                        "multimodal_info": multimodal_info,
                        "success": True,
                        "requires_manual_mapping": False,
                        "warnings": warnings,
                        "errors": [],
                    }
                except Exception as e:
                    # User mapping failed — fall back to auto-detection instead
                    # of giving up (handles stale cached mappings gracefully)
                    warnings.append(
                        f"User VLM mapping (image='{user_vlm_image_column}', "
                        f"text='{user_vlm_text_column}') failed: {e} — "
                        f"falling back to auto-detection"
                    )
                    logger.info(
                        f"⚠️ User VLM mapping failed, falling back to auto-detection..."
                    )
                    custom_format_mapping = None  # clear so auto-detection runs below
            else:
                errors.append(
                    f"Invalid VLM mapping: need 'image' and 'text' roles. Got: {custom_format_mapping}"
                )
                return {
                    "dataset": dataset,
                    "detected_format": "user_mapped",
                    "final_format": "vlm_unknown",
                    "is_vlm": True,
                    "success": False,
                    "requires_manual_mapping": True,
                    "warnings": warnings,
                    "errors": errors,
                }

        # Auto-detect VLM structure
        vlm_structure = detect_vlm_dataset_structure(dataset)

        # Handle Llava format
        if vlm_structure["format"] == "vlm_messages_llava":
            try:
                dataset = convert_llava_to_vlm_format(dataset)
                warnings.append(
                    "Converted from Llava format (image indices) to standard VLM format"
                )
            except Exception as e:
                errors.append(f"Failed to convert Llava format: {e}")
                import traceback

                traceback.print_exc()

                return {
                    "dataset": dataset,
                    "detected_format": "vlm_messages_llava",
                    "final_format": "vlm_conversion_failed",
                    "is_vlm": True,
                    "success": False,
                    "requires_manual_mapping": True,
                    "warnings": warnings,
                    "errors": errors,
                }

        # Handle ShareGPT/ChatML + image column (e.g. ShareGPT4V, LLaVA-style)
        elif vlm_structure["format"] == "sharegpt_with_images":
            try:
                dataset = convert_sharegpt_with_images_to_vlm_format(
                    dataset,
                    image_column = vlm_structure["image_column"],
                    messages_column = vlm_structure["messages_column"],
                    dataset_name = dataset_name,
                    progress_callback = progress_callback,
                )
                warnings.append(
                    "Converted from ShareGPT+image format to standard VLM format"
                )
            except Exception as e:
                errors.append(f"Failed to convert ShareGPT+image format: {e}")
                import traceback

                traceback.print_exc()

                return {
                    "dataset": dataset,
                    "detected_format": "sharegpt_with_images",
                    "final_format": "vlm_conversion_failed",
                    "is_vlm": True,
                    "success": False,
                    "requires_manual_mapping": True,
                    "warnings": warnings,
                    "errors": errors,
                }

        # Handle simple format
        elif vlm_structure["needs_conversion"]:
            if vlm_text_column is None:
                vlm_text_column = vlm_structure["text_column"]
            if vlm_image_column is None:
                vlm_image_column = vlm_structure["image_column"]

            if vlm_text_column is None or vlm_image_column is None:
                columns = list(next(iter(dataset)).keys()) if dataset else []
                issues = [
                    f"Could not auto-detect image and text columns from: {columns}",
                    f"VLM structure detected: {vlm_structure.get('format', 'unknown')}",
                ]
                friendly = None
                try:
                    from .llm_assist import llm_generate_dataset_warning

                    friendly = llm_generate_dataset_warning(
                        issues,
                        dataset_name = dataset_name,
                        modality = "vision",
                        column_names = columns,
                    )
                except Exception:
                    pass
                errors.append(
                    friendly
                    or f"Could not auto-detect image/text columns. Found: {vlm_structure}. "
                )
                return {
                    "dataset": dataset,
                    "detected_format": "vlm_unknown",
                    "final_format": "vlm_unknown",
                    "is_vlm": True,
                    "success": False,
                    "requires_manual_mapping": True,
                    "warnings": warnings,
                    "errors": errors,
                }

            try:
                dataset = convert_to_vlm_format(
                    dataset,
                    instruction = vlm_instruction,
                    text_column = vlm_text_column,
                    image_column = vlm_image_column,
                    dataset_name = dataset_name,
                    progress_callback = progress_callback,
                )

                if vlm_instruction:
                    warnings.append(
                        f"Using user-provided instruction: '{vlm_instruction}'"
                    )
                else:
                    warnings.append(
                        "Auto-generated instruction based on dataset analysis"
                    )

            except Exception as e:
                errors.append(f"Failed to convert to VLM format: {e}")
                import traceback

                traceback.print_exc()

                return {
                    "dataset": dataset,
                    "detected_format": vlm_structure["format"],
                    "final_format": "vlm_conversion_failed",
                    "is_vlm": True,
                    "success": False,
                    "requires_manual_mapping": True,
                    "warnings": warnings,
                    "errors": errors,
                }

        # Already in standard VLM format
        elif vlm_structure["format"] == "vlm_messages":
            dataset = [sample for sample in dataset]
            warnings.append("Dataset already in standard VLM messages format")

        # Return as list
        return {
            "dataset": dataset,
            "detected_format": vlm_structure["format"],
            "final_format": "vlm_messages",
            "chat_column": "messages",
            "is_vlm": True,
            "is_image": multimodal_info["is_image"],
            "multimodal_info": multimodal_info,
            "vlm_structure": vlm_structure,
            "success": True,
            "requires_manual_mapping": False,
            "warnings": warnings,
            "errors": errors,
        }

    # LLM FLOW (Existing code)
    else:
        # Step 1: Format the dataset
        n_rows = len(dataset) if hasattr(dataset, "__len__") else None
        if progress_callback and n_rows:
            progress_callback(status_message = f"Formatting dataset ({n_rows:,} rows)...")
        dataset_info = format_dataset(
            dataset,
            format_type = format_type,
            tokenizer = tokenizer,
            auto_detect_custom = auto_detect_custom,
            custom_format_mapping = custom_format_mapping,
            aliases_for_system = aliases_for_system,
            aliases_for_user = aliases_for_user,
            aliases_for_assistant = aliases_for_assistant,
            batch_size = batch_size,
            num_proc = num_proc,
        )

        # Step 2: Apply chat template
        detected = dataset_info.get("detected_format", "unknown")
        if progress_callback and n_rows:
            progress_callback(
                status_message = f"Applying chat template to {detected} ({n_rows:,} rows)..."
            )
        # Gemma emits a leading <bos> that must be stripped for text-only chatml/sharegpt.
        is_alpaca = format_type == "alpaca" or (
            format_type == "auto" and dataset_info["detected_format"] == "alpaca"
        )
        is_gemma = "gemma" in model_name.lower()
        if is_gemma and not dataset_info["is_image"] and not is_alpaca:
            remove_bos_prefix = True
        template_result = apply_chat_template_to_dataset(
            dataset_info = dataset_info,
            tokenizer = tokenizer,
            model_name = model_name,
            custom_prompt_template = custom_prompt_template,
            add_eos_token = add_eos_token,
            remove_bos_prefix = remove_bos_prefix,
            custom_format_mapping = custom_format_mapping,
            auto_detect_mapping = auto_detect_mapping,
            batch_size = batch_size,
            num_proc = num_proc,
            progress_callback = progress_callback,
        )

        # Step 3: Generate summary
        summary = get_dataset_info_summary(dataset_info)

        # Combine results
        all_warnings = dataset_info.get("warnings", []) + template_result.get(
            "warnings", []
        )
        all_errors = template_result.get("errors", [])

        # If format_dataset returned "unknown" but apply_chat_template rescued
        # it via heuristic detection, update final_format to reflect reality.
        final_format = dataset_info["final_format"]
        requires_manual = dataset_info.get("requires_manual_mapping", False)
        if final_format == "unknown" and template_result["success"]:
            out_ds = template_result["dataset"]
            if hasattr(out_ds, "column_names") and "text" in out_ds.column_names:
                final_format = "chatml_conversations"
                requires_manual = False

        return {
            "dataset": template_result["dataset"],
            "detected_format": dataset_info["detected_format"],
            "final_format": final_format,
            "chat_column": dataset_info.get("chat_column"),
            "is_vlm": False,  # This is LLM flow
            "success": template_result["success"],
            "requires_manual_mapping": requires_manual,
            "warnings": all_warnings,
            "errors": all_errors,
            "summary": summary,
        }


================================================
FILE: studio/backend/utils/datasets/format_conversion.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Format conversion utilities for dataset processing.

This module contains functions for converting between dataset formats
(Alpaca, ShareGPT, ChatML) and standardizing chat formats.
"""

import os

from datasets import IterableDataset
from loggers import get_logger

logger = get_logger(__name__)


def standardize_chat_format(
    dataset,
    tokenizer = None,
    aliases_for_system = [
        "system",
    ],
    aliases_for_user = [
        "user",
        "human",
        "input",
    ],
    aliases_for_assistant = [
        "gpt",
        "assistant",
        "output",
    ],
    batch_size = 1000,
    num_proc = None,
):
    """
    Our own standardization function that handles BOTH messages and conversations.
    Converts non-standard role names and keys to standard format.
    """
    import collections
    import itertools
    from datasets import IterableDataset

    # Check if vision tokenizer is used
    is_vlm = False
    if tokenizer is not None:
        if hasattr(tokenizer, "image_processor") or hasattr(tokenizer, "tokenizer"):
            is_vlm = True

    column_names = set(next(iter(dataset)).keys())

    #   Check for both 'conversations' and 'messages'
    chat_column = None
    if "conversations" in column_names:
        chat_column = "conversations"
    elif "messages" in column_names:
        chat_column = "messages"
    elif "texts" in column_names:
        chat_column = "texts"
    else:
        return dataset  # No chat column found

    # Inspect structure
    examples = itertools.islice(dataset, 10)
    uniques = collections.defaultdict(list)
    for example in examples:
        for message in example[chat_column]:
            for key, value in message.items():
                if type(value) is not str:
                    continue  # Skip non-string values
                uniques[key].append(value)

    if len(uniques.keys()) != 2:
        return dataset  # Unexpected structure

    keys = list(uniques.keys())
    length_first = len(set(uniques[keys[0]]))
    length_second = len(set(uniques[keys[1]]))

    # Determine which is role and which is content
    if length_first < length_second:
        role_key = keys[0]
        content_key = keys[1]
    else:
        role_key = keys[1]
        content_key = keys[0]

    # Mapping for aliases
    aliases_mapping = {}
    for x in aliases_for_system:
        aliases_mapping[x] = "system"
    for x in aliases_for_user:
        aliases_mapping[x] = "user"
    for x in aliases_for_assistant:
        aliases_mapping[x] = "assistant"

    def _standardize_dataset(examples):
        convos = examples[chat_column]
        all_convos = []
        for convo in convos:
            new_convo = []
            for message in convo:
                # Get original role and content
                original_role = message.get(role_key, "")
                original_content = message.get(content_key, "")

                # Map to standard role name
                standard_role = aliases_mapping.get(original_role, original_role)

                # Handle VLM format
                if is_vlm:
                    original_content = [{"type": "text", "text": original_content}]

                # Create dict with EXPLICIT ORDER
                new_message = {"role": standard_role, "content": original_content}
                new_convo.append(new_message)

            all_convos.append(new_convo)

        return {chat_column: all_convos}

    dataset_map_kwargs = {
        "batched": True,
        "batch_size": batch_size,
    }

    if not isinstance(dataset, IterableDataset):
        from utils.hardware import safe_num_proc

        if num_proc is None or type(num_proc) is not int:
            num_proc = safe_num_proc()
        else:
            num_proc = safe_num_proc(num_proc)

        dataset_map_kwargs["num_proc"] = num_proc
        dataset_map_kwargs["desc"] = "Standardizing chat format"

    return dataset.map(_standardize_dataset, **dataset_map_kwargs)


def convert_chatml_to_alpaca(dataset, batch_size = 1000, num_proc = None):
    """
    Converts ChatML format (messages OR conversations) to Alpaca format.
    Handles both standardized and ShareGPT formats.

    Supports:
    - "messages" or "conversations" column
    - "role"/"content" (standard) or "from"/"value" (ShareGPT)
    """
    from torch.utils.data import IterableDataset

    def _convert(examples):
        # Auto-detect which column name is used
        chatml_data = (
            examples.get("messages")
            or examples.get("conversations")
            or examples.get("texts")
        )

        if chatml_data is None:
            raise ValueError(
                "No 'messages' or 'conversations' or 'texts' column found."
            )

        instructions = []
        outputs = []
        inputs = []

        for convo in chatml_data:
            instruction = ""
            output = ""

            for msg in convo:
                # Handle both standard and ShareGPT formats
                role = msg.get("role") or msg.get("from")
                content = msg.get("content") or msg.get("value")

                # Get first user message as instruction
                if role in ["user", "human", "input"] and not instruction:
                    instruction = content
                # Get first assistant message as output
                elif role in ["assistant", "gpt", "output"] and not output:
                    output = content
                    break  # Stop after first assistant response

            instructions.append(instruction)
            inputs.append("")  # Alpaca typically has empty input
            outputs.append(output)

        return {"instruction": instructions, "input": inputs, "output": outputs}

    dataset_map_kwargs = {
        "batched": True,
        "batch_size": batch_size,
    }

    if not isinstance(dataset, IterableDataset):
        from utils.hardware import safe_num_proc

        if num_proc is None or type(num_proc) is not int:
            num_proc = safe_num_proc()
        else:
            num_proc = safe_num_proc(num_proc)

        dataset_map_kwargs["num_proc"] = num_proc
        dataset_map_kwargs["desc"] = "Converting ChatML to Alpaca format"

    return dataset.map(_convert, **dataset_map_kwargs)


def convert_alpaca_to_chatml(dataset, batch_size = 1000, num_proc = None):
    """
    Converts Alpaca format to ChatML format.

    Output format: Uses 'conversations' column with standard 'role'/'content' structure.
    """
    from torch.utils.data import IterableDataset

    def _convert(examples):
        conversations = []

        for i in range(len(examples["instruction"])):
            instruction = examples["instruction"][i]
            input_text = examples.get("input", [""] * len(examples["instruction"]))[i]
            output = examples["output"][i]

            # Combine instruction and input (if exists) for user message
            if input_text and input_text.strip():
                user_content = f"{instruction}\n\n{input_text}".strip()
            else:
                user_content = instruction

            # Build conversation in standard ChatML format
            convo = [
                {"role": "user", "content": user_content},
                {"role": "assistant", "content": output},
            ]
            conversations.append(convo)

        return {"conversations": conversations}

    dataset_map_kwargs = {
        "batched": True,
        "batch_size": batch_size,
    }

    if not isinstance(dataset, IterableDataset):
        from utils.hardware import safe_num_proc

        if num_proc is None or type(num_proc) is not int:
            num_proc = safe_num_proc()
        else:
            num_proc = safe_num_proc(num_proc)

        dataset_map_kwargs["num_proc"] = num_proc
        dataset_map_kwargs["desc"] = "Converting Alpaca to ChatML format"

    return dataset.map(_convert, **dataset_map_kwargs)


def _format_eta(seconds):
    """Format seconds into a human-readable ETA string."""
    if seconds < 60:
        return f"{seconds:.0f}s"
    elif seconds < 3600:
        m, s = divmod(int(seconds), 60)
        return f"{m}m {s}s"
    else:
        h, remainder = divmod(int(seconds), 3600)
        m, _ = divmod(remainder, 60)
        return f"{h}h {m}m"


def convert_to_vlm_format(
    dataset,
    instruction = None,
    text_column = "text",
    image_column = "image",
    dataset_name = None,
    progress_callback = None,
):
    """
    Converts simple {image, text} format to VLM messages format.

    Returns a LIST, not a HuggingFace Dataset (to preserve PIL Images).

    For URL-based image datasets, runs a 200-sample parallel probe first to
    estimate download speed and failure rate, then reports time estimate or
    warning through progress_callback before proceeding with the full conversion.

    Args:
        progress_callback: Optional callable(status_message=str) to report
                          progress to the training overlay.

    Returns:
        list: List of dicts with 'messages' field
    """
    from PIL import Image
    from .vlm_processing import generate_smart_vlm_instruction

    def _notify(msg):
        """Send status update to the training overlay if callback is available."""
        if progress_callback:
            progress_callback(status_message = msg)

    # Generate smart instruction if not provided
    if instruction is None:
        instruction_info = generate_smart_vlm_instruction(
            dataset,
            text_column = text_column,
            image_column = image_column,
            dataset_name = dataset_name,
        )

        instruction = instruction_info["instruction"]
        instruction_column = instruction_info.get("instruction_column")
        uses_dynamic = instruction_info["uses_dynamic_instruction"]

        logger.info(
            f"📝 Auto-detected instruction type: {instruction_info['instruction_type']}"
        )
        logger.info(f"📝 Confidence: {instruction_info['confidence']:.2f}")
        if not uses_dynamic:
            logger.info(f"📝 Using instruction: '{instruction}'")
        else:
            logger.info(
                f"📝 Using dynamic instructions from column: '{instruction_column}'"
            )
    else:
        instruction_column = None
        uses_dynamic = False

    def _convert_single_sample(sample):
        """Convert a single sample to VLM format."""
        # Get image (might be PIL Image, local path, URL, or bare filename)
        image_data = sample[image_column]

        if isinstance(image_data, str):
            if image_data.startswith(("http://", "https://")):
                import fsspec
                from io import BytesIO

                with fsspec.open(image_data, "rb", expand = True) as f:
                    image_data = Image.open(BytesIO(f.read())).convert("RGB")
            elif _image_lookup is not None and image_data in _image_lookup:
                # Bare filename → resolve via HF repo lookup
                from huggingface_hub import hf_hub_download

                local_path = hf_hub_download(
                    dataset_name,
                    _image_lookup[image_data],
                    repo_type = "dataset",
                )
                image_data = Image.open(local_path).convert("RGB")
            else:
                image_data = Image.open(image_data).convert("RGB")

        # Get text (if list of strings, pick a random one — e.g. multiple captions)
        text_data = sample[text_column]
        if isinstance(text_data, list) and len(text_data) > 0:
            import random

            text_data = random.choice(text_data)

        # Get instruction (static or dynamic)
        if uses_dynamic and instruction_column:
            current_instruction = sample[instruction_column]
        else:
            current_instruction = instruction

        # Build VLM messages - simple structure
        messages = [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": current_instruction},
                    {"type": "image", "image": image_data},  # PIL object
                ],
            },
            {"role": "assistant", "content": [{"type": "text", "text": text_data}]},
        ]

        # Return dict with messages
        return {"messages": messages}

    total = len(dataset)
    first_image = next(iter(dataset))[image_column]
    has_urls = isinstance(first_image, str) and first_image.startswith(
        ("http://", "https://")
    )

    # ── Bare-filename detection: images stored as filenames (e.g. "img_001.png")
    #    that don't exist locally.  Build a basename→repo_path lookup so we can
    #    resolve them via hf_hub_download during conversion.
    _image_lookup = None
    _IMAGE_EXTS = (".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp", ".tiff")
    if (
        not has_urls
        and isinstance(first_image, str)
        and not os.path.exists(first_image)
        and dataset_name
    ):
        try:
            from huggingface_hub import HfApi

            _notify("Resolving image filenames from HF repo...")
            logger.info(
                f"🔍 Image column contains bare filenames (e.g. '{first_image}') — building repo lookup..."
            )
            repo_files = HfApi().list_repo_files(dataset_name, repo_type = "dataset")
            _image_lookup = {
                os.path.basename(f): f
                for f in repo_files
                if any(f.lower().endswith(ext) for ext in _IMAGE_EXTS)
            }
            if first_image in _image_lookup:
                logger.info(
                    f"✅ Matched {len(_image_lookup)} image files in repo (e.g. '{first_image}' → '{_image_lookup[first_image]}')"
                )
            else:
                logger.info(
                    f"⚠️ Built lookup with {len(_image_lookup)} images but '{first_image}' not found — falling back to local open"
                )
                _image_lookup = None
        except Exception as e:
            logger.info(f"⚠️ Failed to build HF repo image lookup: {e}")
            _image_lookup = None

    # ── URL probe: 200 samples with parallel workers to estimate speed + failure rate ──
    PROBE_SIZE = 200
    MAX_FAIL_RATE = 0.3

    if has_urls and total > PROBE_SIZE:
        import time
        from concurrent.futures import ThreadPoolExecutor, as_completed
        from utils.hardware import safe_num_proc

        num_workers = safe_num_proc()
        _notify(f"Probing {PROBE_SIZE} image URLs with {num_workers} workers...")
        logger.info(
            f"🔍 Probing {PROBE_SIZE}/{total} image URLs with {num_workers} workers..."
        )

        probe_samples = [dataset[i] for i in range(PROBE_SIZE)]
        probe_ok = 0
        probe_fail = 0
        probe_start = time.time()

        with ThreadPoolExecutor(max_workers = num_workers) as executor:
            futures = {
                executor.submit(_convert_single_sample, s): s for s in probe_samples
            }
            for future in as_completed(futures):
                try:
                    future.result()
                    probe_ok += 1
                except Exception:
                    probe_fail += 1

        probe_elapsed = time.time() - probe_start
        probe_total = probe_ok + probe_fail
        fail_rate = probe_fail / probe_total if probe_total > 0 else 0
        throughput = probe_total / probe_elapsed if probe_elapsed > 0 else 0

        if fail_rate >= MAX_FAIL_RATE:
            issues = [
                f"{fail_rate:.0%} of the first {PROBE_SIZE} image URLs failed to download ({probe_fail}/{probe_total})",
                "Images are external URLs, not embedded in the dataset",
            ]
            # Try LLM-friendly warning
            friendly = None
            try:
                from .llm_assist import llm_generate_dataset_warning

                friendly = llm_generate_dataset_warning(
                    issues,
                    dataset_name = dataset_name,
                    modality = "vision",
                    column_names = [image_column, text_column],
                )
            except Exception:
                pass
            msg = friendly or (
                f"⚠️ {fail_rate:.0%} of the first {PROBE_SIZE} images failed to download "
                f"({probe_fail}/{probe_total}). "
                "This dataset has too many broken or unreachable image URLs. "
                "Consider using a dataset with embedded images instead."
            )
            logger.info(msg)
            _notify(msg)
            raise ValueError(msg)

        # Estimate total time for remaining samples
        remaining = total - PROBE_SIZE
        estimated_seconds = remaining / throughput if throughput > 0 else 0
        eta_str = _format_eta(estimated_seconds)

        info_msg = (
            f"Downloading {total:,} images ({num_workers} workers, ~{throughput:.1f} img/s). "
            f"Estimated time: ~{eta_str}"
        )
        if probe_fail > 0:
            info_msg += f" | {fail_rate:.0%} broken URLs will be skipped"

        logger.info(
            f"✅ Probe passed: {probe_ok}/{probe_total} ok, {probe_fail} failed ({fail_rate:.0%}), {throughput:.1f} img/s"
        )
        logger.info(f"⏱️ Estimated time for {total:,} samples: ~{eta_str}")
        _notify(info_msg)

    # ── Full conversion with progress ──
    from tqdm import tqdm

    logger.info(f"🔄 Converting {total} samples to VLM format...")
    converted_list = []
    failed_count = 0

    if has_urls:
        # Parallel conversion for URL-based datasets
        import time
        from concurrent.futures import ThreadPoolExecutor, as_completed
        from utils.hardware import safe_num_proc

        num_workers = safe_num_proc()
        batch_size = 500
        start_time = time.time()

        for batch_start in range(0, total, batch_size):
            batch_end = min(batch_start + batch_size, total)
            batch_samples = [dataset[i] for i in range(batch_start, batch_end)]

            with ThreadPoolExecutor(max_workers = num_workers) as executor:
                futures = {
                    executor.submit(_convert_single_sample, s): i
                    for i, s in enumerate(batch_samples)
                }
                batch_results = [None] * len(batch_samples)
                for future in as_completed(futures):
                    idx = futures[future]
                    try:
                        batch_results[idx] = future.result()
                    except Exception as e:
                        failed_count += 1
                        if failed_count == 1:
                            print(
                                f"⚠️ First VLM conversion failure: {type(e).__name__}: {e}"
                            )
                        if failed_count == 1:
                            logger.info(
                                f"⚠️ First VLM conversion failure: {type(e).__name__}: {e}"
                            )

            converted_list.extend(r for r in batch_results if r is not None)

            # Progress update every batch
            elapsed = time.time() - start_time
            done = batch_end
            rate = done / elapsed if elapsed > 0 else 0
            remaining_time = (total - done) / rate if rate > 0 else 0
            eta_str = _format_eta(remaining_time)
            progress_msg = f"Downloading images: {done:,}/{total:,} ({done*100//total}%) | ~{eta_str} remaining | {failed_count} skipped"
            logger.info(
                f"  [{done}/{total}] {rate:.1f} img/s, {failed_count} failed, ETA {eta_str}"
            )
            _notify(progress_msg)
    else:
        # Sequential conversion for local/embedded images (fast, no I/O bottleneck)
        pbar = tqdm(dataset, total = total, desc = "Converting VLM samples", unit = "sample")
        for sample in pbar:
            try:
                converted_list.append(_convert_single_sample(sample))
            except Exception as e:
                failed_count += 1
                if failed_count == 1:
                    # Log the first failure to aid debugging
                    print(f"⚠️ First VLM conversion failure: {type(e).__name__}: {e}")
                if failed_count == 1:
                    # Log the first failure to aid debugging
                    logger.info(
                        f"⚠️ First VLM conversion failure: {type(e).__name__}: {e}"
                    )
            pbar.set_postfix(ok = len(converted_list), failed = failed_count, refresh = False)
        pbar.close()

    if failed_count > 0:
        fail_rate = failed_count / total
        logger.info(
            f"⚠️ Skipped {failed_count}/{total} ({fail_rate:.0%}) samples with broken/unreachable images"
        )
        # For datasets that skipped the probe (small URL datasets), check fail rate now
        if has_urls and fail_rate >= MAX_FAIL_RATE:
            issues = [
                f"{fail_rate:.0%} of images failed to download ({failed_count}/{total})",
                "Images are external URLs, not embedded in the dataset",
            ]
            friendly = None
            try:
                from .llm_assist import llm_generate_dataset_warning

                friendly = llm_generate_dataset_warning(
                    issues,
                    dataset_name = dataset_name,
                    modality = "vision",
                    column_names = [image_column, text_column],
                )
            except Exception:
                pass
            msg = friendly or (
                f"⚠️ {fail_rate:.0%} of images failed to download ({failed_count}/{total}). "
                "This dataset has too many broken or unreachable image URLs. "
                "Consider using a dataset with embedded images instead."
            )
            _notify(msg)
            raise ValueError(msg)

    if len(converted_list) == 0:
        issues = [
            f"All {total} samples failed during VLM conversion — no usable images found",
            f"Image column '{image_column}' may contain URLs that are no longer accessible, "
            "or local file paths that don't exist",
        ]
        friendly = None
        try:
            from .llm_assist import llm_generate_dataset_warning

            friendly = llm_generate_dataset_warning(
                issues,
                dataset_name = dataset_name,
                modality = "vision",
                column_names = [image_column, text_column],
            )
        except Exception:
            pass
        raise ValueError(
            friendly
            or (
                f"All {total} samples failed during VLM conversion — no usable images found. "
                "This dataset may contain only image URLs that are no longer accessible."
            )
        )

    logger.info(f"✅ Converted {len(converted_list)}/{total} samples")
    _notify(f"Converted {len(converted_list):,}/{total:,} images successfully")

    # Return list, NOT Dataset
    return converted_list


def convert_sharegpt_with_images_to_vlm_format(
    dataset,
    image_column = "image",
    messages_column = "conversations",
    dataset_name = None,
    progress_callback = None,
):
    """
    Converts ShareGPT/ChatML datasets that have a separate image column and
    ``<image>`` placeholders inside the conversation text.

    Example input::

        {
            "image": "sam/images/sa_545504.jpg",
            "conversations": [
                {"from": "human", "value": "<image>\\nWhat is this photo about?"},
                {"from": "gpt",   "value": "The image captures..."}
            ]
        }

    Returns a list of dicts in standard VLM messages format (PIL Images inline).
    """
    from PIL import Image
    from tqdm import tqdm

    _IMAGE_EXTS = (".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp", ".tiff")
    _ROLE_MAP = {
        "human": "user",
        "user": "user",
        "gpt": "assistant",
        "assistant": "assistant",
        "system": "system",
    }

    def _notify(msg):
        if progress_callback:
            progress_callback(status_message = msg)

    # ── Resolve image loading strategy (same 3-tier as convert_to_vlm_format) ──
    total = len(dataset)
    first_image = next(iter(dataset))[image_column]

    _image_lookup = None
    if (
        isinstance(first_image, str)
        and not first_image.startswith(("http://", "https://"))
        and not os.path.exists(first_image)
        and dataset_name
    ):
        try:
            from huggingface_hub import HfApi

            _notify("Resolving image filenames from HF repo...")
            logger.info(
                f"🔍 Image column contains bare filenames (e.g. '{first_image}') — building repo lookup..."
            )
            repo_files = HfApi().list_repo_files(dataset_name, repo_type = "dataset")
            _image_lookup = {
                os.path.basename(f): f
                for f in repo_files
                if any(f.lower().endswith(ext) for ext in _IMAGE_EXTS)
            }
            # Also add the full relative paths as keys (for paths like "sam/images/sa_545504.jpg")
            for f in repo_files:
                if any(f.lower().endswith(ext) for ext in _IMAGE_EXTS):
                    _image_lookup[f] = f
            if first_image in _image_lookup:
                logger.info(
                    f"✅ Matched {len(_image_lookup)} image files in repo (e.g. '{first_image}' → '{_image_lookup[first_image]}')"
                )
            else:
                logger.info(
                    f"⚠️ Built lookup with {len(_image_lookup)} images but '{first_image}' not found — falling back to local open"
                )
                _image_lookup = None
        except Exception as e:
            logger.info(f"⚠️ Failed to build HF repo image lookup: {e}")
            _image_lookup = None

    def _resolve_image(image_data):
        """Resolve image data to a PIL Image object."""
        if hasattr(image_data, "size") and hasattr(image_data, "mode"):
            return image_data  # Already PIL
        if isinstance(image_data, str):
            if image_data.startswith(("http://", "https://")):
                import fsspec
                from io import BytesIO

                with fsspec.open(image_data, "rb", expand = True) as f:
                    return Image.open(BytesIO(f.read())).convert("RGB")
            elif _image_lookup is not None and image_data in _image_lookup:
                from huggingface_hub import hf_hub_download

                local_path = hf_hub_download(
                    dataset_name,
                    _image_lookup[image_data],
                    repo_type = "dataset",
                )
                return Image.open(local_path).convert("RGB")
            else:
                return Image.open(image_data).convert("RGB")
        if isinstance(image_data, dict) and (
            "bytes" in image_data or "path" in image_data
        ):
            if image_data.get("bytes"):
                from io import BytesIO

                return Image.open(BytesIO(image_data["bytes"])).convert("RGB")
            if image_data.get("path"):
                return Image.open(image_data["path"]).convert("RGB")
        raise ValueError(f"Cannot resolve image: {type(image_data)}")

    def _convert_single_sample(sample):
        """Convert a single ShareGPT+image sample to standard VLM format."""
        pil_image = _resolve_image(sample[image_column])
        conversation = sample[messages_column]

        new_messages = []
        for msg in conversation:
            role_raw = msg.get("from") or msg.get("role", "user")
            role = _ROLE_MAP.get(role_raw.lower(), role_raw.lower())
            text = msg.get("value") or msg.get("content") or ""

            # Split on <image> to interleave text and image content blocks
            if "<image>" in text:
                parts = text.split("<image>")
                content = []
                for i, part in enumerate(parts):
                    part = part.strip()
                    if part:
                        content.append({"type": "text", "text": part})
                    if i < len(parts) - 1:
                        content.append({"type": "image", "image": pil_image})
                # If <image> was the entire text, content might just be the image
                if not content:
                    content.append({"type": "image", "image": pil_image})
            else:
                content = [{"type": "text", "text": text}]

            new_messages.append({"role": role, "content": content})

        return {"messages": new_messages}

    # ── Full conversion with progress ──
    logger.info(f"🔄 Converting {total} samples from ShareGPT+image format...")
    converted_list = []
    failed_count = 0

    pbar = tqdm(dataset, total = total, desc = "Converting ShareGPT+image", unit = "sample")
    for sample in pbar:
        try:
            converted_list.append(_convert_single_sample(sample))
        except Exception as e:
            failed_count += 1
            if failed_count == 1:
                logger.info(f"⚠️ First conversion failure: {type(e).__name__}: {e}")
        pbar.set_postfix(ok = len(converted_list), failed = failed_count, refresh = False)
    pbar.close()

    if failed_count > 0:
        logger.info(
            f"⚠️ Skipped {failed_count}/{total} ({failed_count*100//total}%) samples"
        )

    if len(converted_list) == 0:
        raise ValueError(
            f"All {total} samples failed during ShareGPT+image conversion — "
            "no usable samples found."
        )

    logger.info(f"✅ Converted {len(converted_list)}/{total} samples")
    _notify(f"Converted {len(converted_list):,}/{total:,} samples successfully")
    return converted_list


def convert_llava_to_vlm_format(dataset):
    """
    Converts Llava format to standard VLM format.

    Llava format:
    - messages: [{'content': [{'type': 'image', 'index': 0}, {'type': 'text', 'text': '...'}]}]
    - images: [PIL_Image1, PIL_Image2, ...]

    Standard VLM format:
    - messages: [{'content': [{'type': 'image', 'image': PIL_Image}, {'type': 'text', 'text': '...'}]}]
    """
    from PIL import Image

    logger.info(
        f"🔄 Converting {len(dataset)} samples from Llava format to standard VLM format..."
    )

    def _convert_single_sample(sample):
        """Convert a single llava sample to standard VLM format."""
        messages = sample["messages"]
        images = sample.get("images", [])

        # Process each message
        new_messages = []
        for msg in messages:
            new_content = []

            for item in msg["content"]:
                if item["type"] == "image":
                    # Replace index with actual PIL image
                    if "index" in item and item["index"] is not None:
                        img_idx = item["index"]
                        if img_idx < len(images):
                            pil_image = images[img_idx]
                            # Ensure it's PIL
                            if isinstance(pil_image, str):
                                pil_image = Image.open(pil_image).convert("RGB")

                            new_content.append(
                                {
                                    "type": "image",
                                    "image": pil_image,  # Actual PIL object
                                }
                            )
                    else:
                        # No index, try to use first image
                        if len(images) > 0:
                            pil_image = images[0]
                            if isinstance(pil_image, str):
                                pil_image = Image.open(pil_image).convert("RGB")

                            new_content.append({"type": "image", "image": pil_image})

                elif item["type"] == "text":
                    # Keep text as-is (only type + text)
                    new_content.append({"type": "text", "text": item.get("text", "")})

            new_messages.append({"role": msg["role"], "content": new_content})

        return {"messages": new_messages}

    # Convert using list comprehension
    converted_list = [_convert_single_sample(sample) for sample in dataset]

    logger.info(f"✅ Converted {len(converted_list)} samples")
    return converted_list


================================================
FILE: studio/backend/utils/datasets/format_detection.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Format detection utilities for dataset processing.

This module contains functions for detecting dataset formats (Alpaca, ShareGPT, ChatML),
detecting multimodal/VLM dataset structures, and heuristic-based column mapping.
"""

import re


def _keyword_in_column(keyword: str, col_name: str) -> bool:
    """Word-boundary keyword match to avoid false positives like 'pic' in 'topic'."""
    return (
        re.search(r"\b" + re.escape(keyword) + r"\b", col_name, re.IGNORECASE)
        is not None
    )


def detect_dataset_format(dataset):
    """
    Detects dataset format by inspecting structure.

    Returns:
        dict: {
            "format": "alpaca" | "sharegpt" | "chatml" | "unknown",
            "chat_column": "messages" | "conversations" | None,
            "needs_standardization": bool,
            "sample_keys": list of keys found in messages (for debugging)
        }
    """
    column_names = set(next(iter(dataset)).keys())

    # Check for Alpaca
    alpaca_columns = {"instruction", "output"}
    if alpaca_columns.issubset(column_names):
        return {
            "format": "alpaca",
            "chat_column": None,
            "needs_standardization": False,
            "sample_keys": [],
        }

    # Check for chat-based formats (messages or conversations)
    chat_column = None
    if "messages" in column_names:
        chat_column = "messages"
    elif "conversations" in column_names:
        chat_column = "conversations"
    elif "texts" in column_names:
        chat_column = "texts"

    if chat_column:
        # Inspect the structure to determine if ShareGPT or ChatML
        try:
            sample = next(iter(dataset))
            chat_data = sample[chat_column]

            if chat_data and len(chat_data) > 0:
                first_msg = chat_data[0]
                msg_keys = set(first_msg.keys())

                # ShareGPT uses "from" and "value"
                if "from" in msg_keys or "value" in msg_keys:
                    return {
                        "format": "sharegpt",
                        "chat_column": chat_column,
                        "needs_standardization": True,
                        "sample_keys": list(msg_keys),
                    }

                # ChatML uses "role" and "content"
                elif "role" in msg_keys and "content" in msg_keys:
                    return {
                        "format": "chatml",
                        "chat_column": chat_column,
                        "needs_standardization": False,
                        "sample_keys": list(msg_keys),
                    }

                # Unknown structure but has chat column
                else:
                    return {
                        "format": "unknown",
                        "chat_column": chat_column,
                        "needs_standardization": None,
                        "sample_keys": list(msg_keys),
                    }
        except Exception as e:
            return {
                "format": "unknown",
                "chat_column": chat_column,
                "needs_standardization": None,
                "sample_keys": [],
                "error": str(e),
            }

    # No recognized format
    return {
        "format": "unknown",
        "chat_column": None,
        "needs_standardization": None,
        "sample_keys": [],
    }


def detect_custom_format_heuristic(dataset):
    """
    Smart detection with priority scoring.

    Strategy for ambiguous keywords like 'task':
    1. Detect assistant first (unambiguous)
    2. Detect user using high-priority keywords first
    3. Check REMAINING columns for system keywords (including 'task')
    4. Only if no system match, use 'task' as fallback user
    """
    sample = next(iter(dataset))
    all_columns = list(sample.keys())

    mapping = {}

    # Keywords
    assistant_words = [
        "output",
        "answer",
        "response",
        "assistant",
        "completion",
        "expected",
        "recommendation",
        "reply",
        "result",
        "target",
        "solution",
        "explanation",
        "solve",
    ]

    # Split into high/low priority
    user_words_high_priority = [
        "input",
        "question",
        "query",
        "prompt",
        "instruction",
        "request",
        "snippet",
        "user",
        "text",
        "problem",
        "exercise",
    ]
    user_words_low_priority = ["task"]  # Ambiguous - can be user OR system
    user_words = user_words_high_priority + user_words_low_priority

    system_words = [
        "system",
        "context",
        "description",
        "persona",
        "role",
        "template",
        "task",  # Also in system
    ]

    # Metadata columns to ignore
    metadata_exact_match = {
        "id",
        "idx",
        "index",
        "key",
        "timestamp",
        "date",
        "metadata",
        "source",
        "kind",
        "type",
        "category",
        "score",
        "label",
        "tag",
        "inference_mode",
    }

    metadata_prefix_patterns = [
        "problem_type",
        "problem_source",
        "generation_model",
        "pass_rate",
    ]

    priority_patterns = {
        "generated": 100,
        "gen_": 90,
        "model_": 80,
        "predicted": 70,
        "completion": 60,
    }

    def has_keyword(col_name, keywords):
        """Check if any keyword appears in column name."""
        col_lower = col_name.lower()
        col_normalized = col_lower.replace("_", "").replace("-", "").replace(" ", "")

        for keyword in keywords:
            if keyword in col_lower or keyword in col_normalized:
                return True
        return False

    def is_metadata(col_name):
        """Check if column is likely metadata."""
        col_lower = col_name.lower()

        if col_lower in metadata_exact_match:
            return True

        if col_lower in metadata_prefix_patterns:
            return True

        for pattern in metadata_prefix_patterns:
            if (
                col_lower.startswith(pattern.split("_")[0] + "_")
                and col_lower != pattern
            ):
                if "_" in col_lower:
                    prefix = col_lower.split("_")[0]
                    if prefix in ["generation", "pass", "inference"]:
                        return True

        if len(col_lower) <= 2 and not col_lower in ["qa", "q", "a"]:
            return True

        return False

    def get_priority_score(col_name):
        """Calculate priority score based on column name patterns."""
        col_lower = col_name.lower()
        score = 0

        for pattern, pattern_score in priority_patterns.items():
            if pattern in col_lower:
                score += pattern_score

        return score

    def get_content_length(col_name):
        """Get average content length for this column."""
        try:
            if col_name in sample and sample[col_name]:
                content = str(sample[col_name])
                return len(content)
            return 0
        except:
            return 0

    def score_column(col_name, keywords, role_type, num_candidates):
        """Score a column for how likely it is to be a particular role."""
        if not has_keyword(col_name, keywords):
            return 0

        score = 0
        score += 10

        # Penalize ambiguous keywords when scoring for user
        if role_type == "user":
            col_lower = col_name.lower()
            # If column is ONLY "task" (or task_xxx), give it lower priority for user role
            if "task" in col_lower and not any(
                kw in col_lower for kw in user_words_high_priority
            ):
                score -= 15  # Significant penalty so other user columns win

        priority_bonus = get_priority_score(col_name)
        score += priority_bonus

        if role_type in ["assistant", "user"]:
            avg_length = get_content_length(col_name)

            if num_candidates > 1:
                if avg_length > 1000:
                    score += 50
                elif avg_length > 200:
                    score += 30
                elif avg_length > 50:
                    score += 10
                elif avg_length < 50:
                    score -= 20
            else:
                if avg_length > 1000:
                    score += 50
                elif avg_length > 200:
                    score += 30
                elif avg_length > 50:
                    score += 10

        return score

    # Filter out metadata columns
    content_columns = [col for col in all_columns if not is_metadata(col)]

    # Count candidates first
    assistant_potential = [
        col for col in content_columns if has_keyword(col, assistant_words)
    ]
    user_potential = [col for col in content_columns if has_keyword(col, user_words)]

    # STEP 1: Find best ASSISTANT column
    assistant_candidates = []
    for col in assistant_potential:
        score = score_column(
            col, assistant_words, "assistant", len(assistant_potential)
        )
        if score > 0:
            assistant_candidates.append((col, score))

    if assistant_candidates:
        assistant_candidates.sort(key = lambda x: x[1], reverse = True)
        assistant_col = assistant_candidates[0][0]
        mapping[assistant_col] = "assistant"
    else:
        assistant_col = None

    # STEP 2: Find best USER column (with penalty for ambiguous keywords)
    user_candidates = []
    for col in user_potential:
        if col == assistant_col:
            continue
        score = score_column(col, user_words, "user", len(user_potential))
        if score > 0:
            user_candidates.append((col, score))

    if user_candidates:
        user_candidates.sort(key = lambda x: x[1], reverse = True)
        user_col = user_candidates[0][0]
        mapping[user_col] = "user"
    else:
        user_col = None

    # STEP 3: Check ALL remaining columns for SYSTEM matches (priority check)
    remaining_columns = [col for col in content_columns if col not in mapping]

    system_col = None
    for col in remaining_columns:
        if has_keyword(col, system_words):
            # Found a system match in remaining columns
            mapping[col] = "system"
            system_col = col
            break

    # STEP 4: Handle any additional remaining columns
    if system_col:
        remaining_columns = [col for col in remaining_columns if col != system_col]

    if len(remaining_columns) >= 1:
        remaining_col = remaining_columns[0]

        # If no strong keyword match, decide based on what's missing
        if not has_keyword(remaining_col, user_words + assistant_words):
            mapping[remaining_col] = "system"
        elif user_col is None:
            # No user column yet, assign this as user
            mapping[remaining_col] = "user"
        else:
            # Already have user + assistant, treat as system context
            mapping[remaining_col] = "system"

    # VALIDATION: Ensure we have at least user + assistant
    has_user = any(role == "user" for role in mapping.values())
    has_assistant = any(role == "assistant" for role in mapping.values())

    if not has_user and len(remaining_columns) > 0:
        for col in remaining_columns:
            if col not in mapping:
                mapping[col] = "user"
                has_user = True
                break

    if has_user and has_assistant:
        return mapping

    return None


def detect_multimodal_dataset(dataset):
    """
    Detects if dataset contains multimodal data (images and/or audio).

    Two-pass approach for each modality:
      1. Column-name heuristic (fast): checks for keywords.
      2. Value-type inspection (reliable): checks actual sample values.

    Returns:
        dict: {
            "is_image": bool,
            "multimodal_columns": list of column names containing image data,
            "modality_types": list of detected types (e.g., ["image", "audio"]),
            "is_audio": bool,
            "audio_columns": list of column names containing audio data,
            "detected_audio_column": str or None,
            "detected_text_column": str or None,
        }
    """
    sample = next(iter(dataset))
    column_names = list(sample.keys())

    # Keywords that indicate image data
    image_keywords = [
        "image",
        "img",
        "pixel",
        "jpg",
        "jpeg",
        "png",
        "webp",
        "bmp",
        "gif",
        "tiff",
        "svg",
        "photo",
        "pic",
        "picture",
        "visual",
        "file_name",
        "filename",
    ]

    # Keywords that indicate audio data
    audio_keywords = ["audio", "speech", "wav", "waveform", "sound"]

    multimodal_columns = []
    audio_columns = []
    modality_types = set()

    # ── Image detection ─────────────────────────────────────
    # Pass 1: column-name heuristic (word-boundary match to avoid
    #          false positives like 'pic' in 'topic')
    for col_name in column_names:
        for keyword in image_keywords:
            if _keyword_in_column(keyword, col_name):
                multimodal_columns.append(col_name)
                modality_types.add(keyword)
                break

    # Pass 2: inspect actual values
    already_detected = set(multimodal_columns)
    for col_name in column_names:
        if col_name in already_detected:
            continue
        value = sample[col_name]
        if _is_image_value(value):
            multimodal_columns.append(col_name)
            modality_types.add("image")

    # ── Audio detection ─────────────────────────────────────
    # Pass 1: column-name heuristic (word-boundary match)
    for col_name in column_names:
        for keyword in audio_keywords:
            if _keyword_in_column(keyword, col_name):
                audio_columns.append(col_name)
                modality_types.add("audio")
                break

    # Pass 2: inspect actual values (catches non-obvious column names)
    already_audio = set(audio_columns)
    for col_name in column_names:
        if col_name in already_audio:
            continue
        value = sample[col_name]
        if _is_audio_value(value):
            audio_columns.append(col_name)
            modality_types.add("audio")

    # Filter out columns that are actually audio from the image list
    # (e.g. a column named "audio" with {"bytes", "path"} could match _is_image_value)
    if audio_columns:
        audio_set = set(audio_columns)
        multimodal_columns = [c for c in multimodal_columns if c not in audio_set]

    # Detect text column for audio datasets
    detected_text_col = None
    if audio_columns:
        text_keywords = ["text", "sentence", "transcript", "transcription", "label"]
        for col_name in column_names:
            if col_name.lower() in text_keywords:
                detected_text_col = col_name
                break

    is_audio = len(audio_columns) > 0

    # Detect speaker_id column for TTS datasets (CSM, Orpheus, Spark)
    detected_speaker_col = None
    if audio_columns:
        speaker_keywords = ["source", "speaker", "speaker_id"]
        for col_name in column_names:
            if col_name.lower() in speaker_keywords:
                detected_speaker_col = col_name
                break

    return {
        "is_image": len(multimodal_columns) > 0,
        "multimodal_columns": multimodal_columns,
        "modality_types": list(modality_types),
        "is_audio": is_audio,
        "audio_columns": audio_columns,
        "detected_audio_column": audio_columns[0] if audio_columns else None,
        "detected_text_column": detected_text_col,
        "detected_speaker_column": detected_speaker_col,
    }


def _is_image_value(value) -> bool:
    """Check if a single sample value looks like image data."""
    if value is None:
        return False

    # PIL Image instance
    try:
        from PIL.Image import Image as PILImage

        if isinstance(value, PILImage):
            return True
    except ImportError:
        pass

    # HF datasets Image feature stores decoded images as PIL or dicts with
    # {"bytes": b"...", "path": "..."} when not yet decoded.
    # Exclude audio dicts (decoded audio has "array" + "sampling_rate").
    if isinstance(value, dict):
        if "array" in value and "sampling_rate" in value:
            return False  # This is audio, not image
        if "bytes" in value and "path" in value:
            # Check path extension to exclude audio files
            path = value.get("path") or ""
            if isinstance(path, str) and any(
                path.lower().endswith(ext) for ext in _AUDIO_EXTENSIONS
            ):
                return False
            return True

    # Raw bytes with a known image magic header
    if isinstance(value, (bytes, bytearray)):
        return _has_image_header(value)

    # String that looks like an image file path or URL
    _IMAGE_EXTS = (".png", ".jpg", ".jpeg", ".webp", ".gif", ".bmp", ".tiff", ".svg")
    if isinstance(value, str) and len(value) < 1000:
        lower = value.strip().lower()
        # Image URL (http://... ending in image extension)
        if lower.startswith(("http://", "https://")) and any(
            lower.split("?")[0].endswith(ext) for ext in _IMAGE_EXTS
        ):
            return True
        # Image file path (relative or absolute path ending in image extension)
        if any(lower.endswith(ext) for ext in _IMAGE_EXTS):
            return True

    return False


_AUDIO_EXTENSIONS = (
    ".wav",
    ".mp3",
    ".flac",
    ".ogg",
    ".opus",
    ".m4a",
    ".aac",
    ".wma",
    ".webm",
)


def _is_audio_value(value) -> bool:
    """Check if a single sample value looks like audio data."""
    if value is None:
        return False

    # HF datasets Audio feature: decoded → {"array": np.ndarray, "sampling_rate": int}
    if isinstance(value, dict):
        if "array" in value and "sampling_rate" in value:
            return True
        # Undecoded/streaming → {"bytes": b"...", "path": "some.wav"}
        if "bytes" in value or "path" in value:
            path = value.get("path") or ""
            if isinstance(path, str) and any(
                path.lower().endswith(ext) for ext in _AUDIO_EXTENSIONS
            ):
                return True

    return False


def _has_image_header(data: bytes) -> bool:
    """Quick magic-byte check for common image formats."""
    if len(data) < 4:
        return False
    # JPEG
    if data[:2] == b"\xff\xd8":
        return True
    # PNG
    if data[:4] == b"\x89PNG":
        return True
    # GIF
    if data[:3] == b"GIF":
        return True
    # WebP
    if data[:4] == b"RIFF" and len(data) >= 12 and data[8:12] == b"WEBP":
        return True
    # BMP
    if data[:2] == b"BM":
        return True
    return False


def detect_vlm_dataset_structure(dataset):
    """
    Detects if VLM dataset is:
    - Standard VLM messages format (image objects in content)
    - Llava format (image indices + separate images column)
    - Simple format needing conversion (image + text columns)
    """
    try:
        sample = next(iter(dataset))
    except StopIteration:
        return {
            "format": "unknown",
            "needs_conversion": None,
            "image_column": None,
            "text_column": None,
            "messages_column": None,
        }

    column_names = set(sample.keys())

    # Check if has messages column
    if "messages" in column_names:
        messages = sample["messages"]

        if messages and len(messages) > 0:
            first_msg = messages[0]
            if "content" in first_msg:
                content = first_msg["content"]

                if isinstance(content, list) and len(content) > 0:
                    if isinstance(content[0], dict) and "type" in content[0]:
                        # Check for llava format
                        has_index = any(
                            "index" in item
                            for item in content
                            if isinstance(item, dict)
                        )
                        has_images_column = "images" in column_names

                        if has_index and has_images_column:
                            return {
                                "format": "vlm_messages_llava",
                                "needs_conversion": True,
                                "messages_column": "messages",
                                "image_column": "images",
                                "text_column": None,
                            }

                        # Standard VLM format
                        has_image = any(
                            "image" in item
                            for item in content
                            if isinstance(item, dict)
                        )
                        if has_image:
                            return {
                                "format": "vlm_messages",
                                "needs_conversion": False,
                                "messages_column": "messages",
                                "image_column": None,
                                "text_column": None,
                            }

    # Check for ShareGPT/ChatML conversations with <image> placeholder + companion image column
    # (e.g. Lin-Chen/ShareGPT4V, LLaVA-style datasets)
    for chat_col in ("conversations", "messages"):
        if chat_col not in column_names:
            continue
        chat_data = sample[chat_col]
        if not isinstance(chat_data, list) or len(chat_data) == 0:
            continue
        first_msg = chat_data[0]
        if not isinstance(first_msg, dict):
            continue
        # Detect ShareGPT (from/value) or ChatML (role/content) keys
        msg_text = first_msg.get("value") or first_msg.get("content")
        if not isinstance(msg_text, str):
            continue
        # Check for <image> placeholder anywhere in the conversation
        has_image_placeholder = any(
            "<image>" in str(m.get("value", "") or m.get("content", ""))
            for m in chat_data
            if isinstance(m, dict)
        )
        if not has_image_placeholder:
            continue
        # Find companion image column
        image_col = None
        for col in column_names:
            if col == chat_col:
                continue
            if _keyword_in_column("image", col) or _keyword_in_column("img", col):
                image_col = col
                break
        if image_col:
            return {
                "format": "sharegpt_with_images",
                "needs_conversion": True,
                "image_column": image_col,
                "text_column": None,
                "messages_column": chat_col,
            }

    # Find image and text columns using metadata filtering

    # Define metadata patterns to EXCLUDE
    metadata_patterns = {
        "suffixes": [
            "_id",
            "_url",
            "_name",
            "_filename",
            "_uri",
            "_link",
            "_key",
            "_index",
        ],
        "prefixes": [
            "id_",
            "url_",
            "name_",
            "filename_",
            "uri_",
            "link_",
            "key_",
            "index_",
        ],
    }

    # Image-related keywords
    image_keywords = [
        "image",
        "img",
        "photo",
        "picture",
        "pic",
        "visual",
        "scan",
        "file_name",
        "filename",
    ]

    # Text-related keywords
    text_keywords = [
        "text",
        "caption",
        "captions",
        "description",
        "answer",
        "output",
        "response",
        "label",
    ]

    def is_metadata_column(col_name):
        """Check if column name looks like metadata."""
        col_lower = col_name.lower()

        # Check suffixes
        if any(col_lower.endswith(suffix) for suffix in metadata_patterns["suffixes"]):
            return True

        # Check prefixes
        if any(
            col_lower.startswith(prefix) for prefix in metadata_patterns["prefixes"]
        ):
            return True

        return False

    def _score_image_candidate(col, sample_value):
        """Score a candidate image column by how resolvable its value is."""
        # PIL Image object (highest priority - already loaded)
        if hasattr(sample_value, "size") and hasattr(sample_value, "mode"):
            return 100

        # Dict with image data (bytes/path from HF Image feature)
        if isinstance(sample_value, dict) and (
            "bytes" in sample_value or "path" in sample_value
        ):
            return 75

        if isinstance(sample_value, str):
            # URL strings
            if sample_value.startswith(("http://", "https://")):
                return 70 if not is_metadata_column(col) else 55
            # Bare file path
            if is_metadata_column(col):
                return 30
            return 50

        return 0

    def _probe_image_candidate(col, sample_value):
        """Quick probe to check if an image candidate is actually reachable.
        Returns True if likely valid, False if definitely broken."""
        import os

        # PIL / dict — already loaded, always valid
        if not isinstance(sample_value, str):
            return True

        # Local file — check it exists
        if not sample_value.startswith(("http://", "https://")):
            return os.path.exists(
                sample_value
            )  # bare filenames return False here, that's OK

        # URL — quick HEAD request with short timeout
        try:
            import urllib.request

            req = urllib.request.Request(sample_value, method = "HEAD")
            resp = urllib.request.urlopen(req, timeout = 3)
            return resp.status < 400
        except Exception:
            return False

    def find_image_column():
        """Find image column by keyword match + value-based fallback.
        When multiple candidates exist, probes them to find one that works."""
        candidates = []

        # Pass 1: keyword-matched columns
        for col in column_names:
            if any(_keyword_in_column(keyword, col) for keyword in image_keywords):
                sample_value = sample[col]
                score = _score_image_candidate(col, sample_value)
                if score > 0:
                    candidates.append((col, score))

        # Pass 2: value-based fallback — find columns with image URLs/paths
        # even if the column name doesn't match image keywords
        already = {c[0] for c in candidates}
        for col in column_names:
            if col in already:
                continue
            sample_value = sample[col]
            if _is_image_value(sample_value):
                score = _score_image_candidate(col, sample_value)
                # Slightly penalise non-keyword columns so keyword matches win on ties
                candidates.append((col, max(score - 5, 1)))

        if not candidates:
            return None

        candidates.sort(key = lambda x: x[1], reverse = True)

        # Single candidate or top candidate is PIL/dict — no probing needed
        if len(candidates) == 1 or candidates[0][1] >= 75:
            return candidates[0][0]

        # Multiple string-based candidates — probe to find one that actually works
        for col, score in candidates:
            sample_value = sample[col]
            if _probe_image_candidate(col, sample_value):
                return col

        # Nothing probed successfully — return highest-scored anyway and let
        # conversion handle the error (it may still resolve via hf_hub_download)
        return candidates[0][0]

    def find_text_column():
        """Find text column by filtering out metadata and checking keywords."""
        candidates = []

        for col in column_names:
            # Skip metadata columns
            if is_metadata_column(col):
                continue

            # Check if contains text keywords (word-boundary match)
            if any(_keyword_in_column(keyword, col) for keyword in text_keywords):
                # Verify it's actually text
                sample_value = sample[col]

                if isinstance(sample_value, str) and len(sample_value) > 0:
                    # Longer text = higher priority (likely content, not just a label)
                    priority = min(len(sample_value), 1000)  # Cap at 1000
                    candidates.append((col, priority))
                elif (
                    isinstance(sample_value, list)
                    and len(sample_value) > 0
                    and isinstance(sample_value[0], str)
                ):
                    # List of strings (e.g. captions list) — lower priority than plain strings
                    priority = min(len(sample_value[0]), 1000) // 2
                    candidates.append((col, priority))

        # Return highest priority candidate
        if candidates:
            candidates.sort(key = lambda x: x[1], reverse = True)
            return candidates[0][0]

        return None

    found_image = find_image_column()
    found_text = find_text_column()

    if found_image and found_text:
        return {
            "format": "simple_image_text",
            "needs_conversion": True,
            "image_column": found_image,
            "text_column": found_text,
            "messages_column": None,
        }

    return {
        "format": "unknown",
        "needs_conversion": None,
        "image_column": found_image,
        "text_column": found_text,
        "messages_column": None,
    }


================================================
FILE: studio/backend/utils/datasets/llm_assist.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
LLM-assisted dataset analysis using an ephemeral GGUF helper model.

Complements heuristic-based detection in format_detection.py and
vlm_processing.py.  Only invoked when heuristics are uncertain.

Architecture:
  - Instantiates LlamaCppBackend, loads model, runs completion(s), unloads.
  - Not kept warm — VRAM is freed immediately after use.
  - Gracefully degrades: returns None when unavailable (no binary, OOM, disabled).
"""

import json
import logging
import os
import re
import textwrap
import time
from itertools import islice
from typing import Any, Optional

from loggers import get_logger

logger = get_logger(__name__)

DEFAULT_HELPER_MODEL_REPO = "unsloth/Qwen3.5-4B-GGUF"
DEFAULT_HELPER_MODEL_VARIANT = "UD-Q4_K_XL"

README_MAX_CHARS = 1500


def _strip_think_tags(text: str) -> str:
    """Strip <think>...</think> reasoning blocks emitted by some models.

    If the model places its actual answer OUTSIDE the think block, we
    discard the think block and keep the rest.  If the entire response
    is INSIDE a think block (nothing useful outside), we extract and
    return the inner content instead of discarding everything.
    """
    if "<think>" not in text:
        return text

    # Try stripping think blocks — keep content outside them
    stripped = re.sub(r"<think>.*?</think>\s*", "", text, flags = re.DOTALL).strip()
    if stripped:
        return stripped

    # Everything was inside <think> tags — extract the inner content of the last block
    matches = re.findall(r"<think>(.*?)</think>", text, flags = re.DOTALL)
    if matches:
        return matches[-1].strip()

    return text


def precache_helper_gguf():
    """
    Pre-download the helper GGUF to HF cache.

    Called on FastAPI startup in a background thread so subsequent
    ``_run_with_helper()`` calls skip the download and only pay for
    llama-server startup.  No-op if already cached or disabled.
    """
    if os.environ.get("UNSLOTH_HELPER_MODEL_DISABLE", "").strip() in ("1", "true"):
        return

    repo = os.environ.get("UNSLOTH_HELPER_MODEL_REPO", DEFAULT_HELPER_MODEL_REPO)
    variant = os.environ.get(
        "UNSLOTH_HELPER_MODEL_VARIANT", DEFAULT_HELPER_MODEL_VARIANT
    )

    try:
        from huggingface_hub import HfApi, hf_hub_download
        from huggingface_hub.utils import disable_progress_bars, enable_progress_bars

        disable_progress_bars()
        logging.getLogger("huggingface_hub").setLevel(logging.WARNING)

        # Find the GGUF file matching the variant
        api = HfApi()
        files = api.list_repo_files(repo, repo_type = "model")
        gguf_files = [f for f in files if f.endswith(".gguf")]

        # Find all GGUF files matching the variant (may be split into shards)
        variant_lower = variant.lower().replace("-", "_")
        matching = sorted(
            f for f in gguf_files if variant_lower in f.lower().replace("-", "_")
        )

        if matching:
            logger.info(
                f"Pre-caching helper GGUF: {repo}/{matching[0]}"
                + (f" (+{len(matching) - 1} shards)" if len(matching) > 1 else "")
            )
            for target in matching:
                hf_hub_download(repo_id = repo, filename = target)
            logger.info(f"Helper GGUF cached: {len(matching)} file(s)")
        else:
            logger.warning(f"No GGUF matching variant '{variant}' in {repo}")
    except Exception as e:
        logger.warning(f"Failed to pre-cache helper GGUF: {e}")
    finally:
        try:
            enable_progress_bars()
        except Exception as e:
            pass


def _run_with_helper(prompt: str, max_tokens: int = 256) -> Optional[str]:
    """
    Load helper model, run one chat completion, unload.

    Returns the completion text, or None on any failure.
    """
    if os.environ.get("UNSLOTH_HELPER_MODEL_DISABLE", "").strip() in ("1", "true"):
        return None

    repo = os.environ.get("UNSLOTH_HELPER_MODEL_REPO", DEFAULT_HELPER_MODEL_REPO)
    variant = os.environ.get(
        "UNSLOTH_HELPER_MODEL_VARIANT", DEFAULT_HELPER_MODEL_VARIANT
    )

    backend = None
    try:
        from core.inference.llama_cpp import LlamaCppBackend

        backend = LlamaCppBackend()
        logger.info(f"Loading helper model: {repo} ({variant})")

        ok = backend.load_model(
            hf_repo = repo,
            hf_variant = variant,
            model_identifier = f"helper:{repo}:{variant}",
            is_vision = False,
            n_ctx = 2048,
            n_gpu_layers = -1,
        )
        if not ok:
            logger.warning("Helper model failed to start")
            return None

        messages = [{"role": "user", "content": prompt}]
        logger.info(
            "Helper model request: enable_thinking=False (per-request override)"
        )
        cumulative = ""
        for text in backend.generate_chat_completion(
            messages = messages,
            temperature = 0.1,
            top_p = 0.9,
            top_k = 20,
            max_tokens = max_tokens,
            repetition_penalty = 1.0,
            enable_thinking = False,  # Always disable thinking for AI Assist
        ):
            cumulative = text  # cumulative — last value is full text

        result = cumulative.strip()
        result = _strip_think_tags(result)
        logger.info(f"Helper model response ({len(result)} chars)")
        return result if result else None

    except Exception as e:
        logger.warning(f"Helper model failed: {e}")
        return None

    finally:
        if backend is not None:
            try:
                backend.unload_model()
                logger.info("Helper model unloaded")
            except Exception:
                pass


# ─── Public API ───────────────────────────────────────────────────────


def llm_generate_vlm_instruction(
    column_names: list[str],
    samples: list[dict],
    dataset_name: Optional[str] = None,
) -> Optional[dict]:
    """
    Ask a helper LLM to generate a task-specific VLM instruction.

    Called when heuristic instruction generation returns low confidence
    or falls back to generic.

    Args:
        column_names: Column names in the dataset.
        samples: 3-5 sample rows with text values (images replaced by "<image>").
        dataset_name: Optional HF dataset identifier for context.

    Returns:
        {"instruction": str, "confidence": 0.85} or None.
    """
    # Format samples for the prompt
    formatted = ""
    for i, row in enumerate(samples[:5], 1):
        parts = []
        for col in column_names:
            val = str(row.get(col, ""))[:300]
            parts.append(f"  {col}: {val}")
        formatted += f"Sample {i}:\n" + "\n".join(parts) + "\n\n"

    prompt = (
        "You are a dataset analyst. Given a vision-language dataset, generate ONE "
        "instruction sentence that describes what the model should do with each image.\n\n"
        f"Dataset: {dataset_name or 'unknown'}\n"
        f"Columns: {column_names}\n\n"
        f"{formatted}"
        "Write ONE instruction sentence. Examples:\n"
        '- "Solve the math problem shown in the image and explain your reasoning."\n'
        '- "Transcribe all text visible in this image."\n'
        '- "Answer the question about this image."\n\n'
        "Respond with ONLY the instruction sentence, nothing else."
    )

    result = _run_with_helper(prompt, max_tokens = 100)
    if not result:
        return None

    # Clean up: strip quotes, ensure it's a single sentence
    instruction = result.strip().strip('"').strip("'").strip()
    # Reject obviously bad outputs (too short, too long, or multi-line)
    if len(instruction) < 10 or len(instruction) > 200 or "\n" in instruction:
        logger.warning(f"Helper model returned unusable instruction: {instruction!r}")
        return None

    logger.info(f"LLM-generated instruction: {instruction}")
    return {
        "instruction": instruction,
        "confidence": 0.85,
    }


def llm_classify_columns(
    column_names: list[str],
    samples: list[dict],
) -> Optional[dict[str, str]]:
    """
    Ask a helper LLM to classify dataset columns into roles.

    Called when heuristic column detection fails (returns None).

    Args:
        column_names: Column names in the dataset.
        samples: 3-5 sample rows with values truncated to 200 chars.

    Returns:
        Dict mapping column_name → role ("user"|"assistant"|"system"|"metadata"),
        or None on failure.
    """
    formatted = ""
    for i, row in enumerate(samples[:5], 1):
        parts = []
        for col in column_names:
            val = str(row.get(col, ""))[:200]
            parts.append(f"  {col}: {val}")
        formatted += f"Sample {i}:\n" + "\n".join(parts) + "\n\n"

    prompt = (
        "Classify each column in this dataset into one of these roles:\n"
        "- user: The input/question/prompt from the human\n"
        "- assistant: The expected output/answer/response from the AI\n"
        "- system: Context, persona, or task description\n"
        "- metadata: IDs, scores, labels, timestamps — not part of conversation\n\n"
        f"Columns: {column_names}\n\n"
        f"{formatted}"
        "Respond with ONLY a JSON object mapping column names to roles.\n"
        'Example: {"question": "user", "answer": "assistant", "id": "metadata"}'
    )

    result = _run_with_helper(prompt, max_tokens = 200)
    if not result:
        return None

    # Parse JSON from response (may have markdown fences)
    text = result.strip()
    if text.startswith("```"):
        # Strip markdown code fence
        lines = text.split("\n")
        text = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:])
        text = text.strip()

    try:
        mapping = json.loads(text)
    except json.JSONDecodeError:
        # Try to find JSON object in the response
        import re

        match = re.search(r"\{[^}]+\}", text)
        if match:
            try:
                mapping = json.loads(match.group())
            except json.JSONDecodeError:
                logger.warning(f"Could not parse helper model JSON: {text!r}")
                return None
        else:
            logger.warning(f"No JSON found in helper model response: {text!r}")
            return None

    if not isinstance(mapping, dict):
        return None

    # Validate: all values must be valid roles
    valid_roles = {"user", "assistant", "system", "metadata"}
    cleaned = {}
    for col, role in mapping.items():
        if (
            col in column_names
            and isinstance(role, str)
            and role.lower() in valid_roles
        ):
            cleaned[col] = role.lower()

    if not cleaned:
        return None

    # Must have at least user + assistant
    roles_present = set(cleaned.values())
    if "user" not in roles_present or "assistant" not in roles_present:
        logger.warning(f"Helper model mapping missing user/assistant: {cleaned}")
        return None

    logger.info(f"LLM-classified columns: {cleaned}")
    return cleaned


def llm_generate_dataset_warning(
    issues: list[str],
    dataset_name: Optional[str] = None,
    modality: str = "text",
    column_names: Optional[list[str]] = None,
) -> Optional[str]:
    """
    Ask the helper LLM to turn technical dataset issues into a user-friendly warning.

    Works for all modalities (text, vision, audio).

    Args:
        issues: List of technical issue descriptions found during analysis.
        dataset_name: Optional HF dataset name.
        modality: "text", "vision", or "audio".
        column_names: Optional list of column names for context.

    Returns:
        A human-friendly warning string, or None on failure.
    """
    if not issues:
        return None

    issues_text = "\n".join(f"- {issue}" for issue in issues)
    cols_text = f"\nColumns: {column_names}" if column_names else ""

    prompt = (
        "You are a helpful assistant. A user is trying to fine-tune a model on a dataset.\n"
        "The following issues were found during dataset analysis:\n\n"
        f"{issues_text}\n\n"
        f"Dataset: {dataset_name or 'unknown'}\n"
        f"Modality: {modality}"
        f"{cols_text}\n\n"
        "Write a brief, friendly explanation of what's wrong and what the user can do about it.\n"
        "Keep it under 3 sentences. Be specific about the dataset."
    )

    result = _run_with_helper(prompt, max_tokens = 200)
    if not result:
        return None

    warning = result.strip()
    # Reject obviously bad outputs
    if len(warning) < 10 or len(warning) > 500:
        return None

    logger.info(f"LLM-generated warning: {warning}")
    return warning


# ─── Dataset Conversion Advisor ──────────────────────────────────────


def _parse_json_response(text: str) -> Optional[dict]:
    """Parse JSON from LLM response, handling markdown fences and noise."""
    if not text:
        return None

    cleaned = text.strip()

    # Strip markdown code fences
    if cleaned.startswith("```"):
        lines = cleaned.split("\n")
        end = -1 if lines[-1].strip().startswith("```") else len(lines)
        cleaned = "\n".join(lines[1:end]).strip()

    # Try direct parse
    try:
        obj = json.loads(cleaned)
        if isinstance(obj, dict):
            return obj
    except json.JSONDecodeError:
        pass

    # Greedy match for outermost {...}
    match = re.search(r"\{.*\}", cleaned, re.DOTALL)
    if match:
        try:
            obj = json.loads(match.group())
            if isinstance(obj, dict):
                return obj
        except json.JSONDecodeError:
            pass

    return None


def _generate_with_backend(backend, messages: list[dict], max_tokens: int = 512) -> str:
    """Run one chat completion on an already-loaded backend. Returns raw text."""
    logger.info("Advisor request: enable_thinking=False (per-request override)")
    cumulative = ""
    for text in backend.generate_chat_completion(
        messages = messages,
        temperature = 0.1,
        top_p = 0.9,
        top_k = 20,
        max_tokens = max_tokens,
        repetition_penalty = 1.0,
        enable_thinking = False,  # Always disable thinking for AI Assist
    ):
        cumulative = text
    result = cumulative.strip()
    result = _strip_think_tags(result)
    return result


def fetch_hf_dataset_card(
    dataset_name: str, hf_token: Optional[str] = None
) -> tuple[Optional[str], Optional[dict]]:
    """
    Fetch HF dataset card (README) and metadata.

    Returns:
        (readme_text, metadata_dict) or (None, None) on failure.
    """
    try:
        from huggingface_hub import DatasetCard

        card = DatasetCard.load(dataset_name, token = hf_token)
        readme = card.text or ""

        # Truncate at sentence boundary
        if len(readme) > README_MAX_CHARS:
            cut = readme[:README_MAX_CHARS].rfind(".")
            if cut > README_MAX_CHARS // 2:
                readme = readme[: cut + 1] + "\n[...truncated]"
            else:
                readme = readme[:README_MAX_CHARS] + "\n[...truncated]"

        # Extract metadata from YAML frontmatter
        metadata = {}
        if card.data:
            for key in (
                "task_categories",
                "task_ids",
                "language",
                "size_categories",
                "tags",
                "license",
                "pretty_name",
            ):
                val = getattr(card.data, key, None)
                if val is not None:
                    metadata[key] = val

        logger.info(
            f"Fetched dataset card: {len(readme)} chars, {len(metadata)} metadata fields"
        )
        return readme, metadata

    except Exception as e:
        logger.warning(f"Could not fetch dataset card for {dataset_name}: {e}")
        return None, None


def _run_multi_pass_advisor(
    columns: list[str],
    samples: list[dict],
    dataset_name: Optional[str] = None,
    dataset_card: Optional[str] = None,
    dataset_metadata: Optional[dict] = None,
    model_name: Optional[str] = None,
    model_type: Optional[str] = None,
    hf_token: Optional[str] = None,
) -> Optional[dict[str, Any]]:
    """
    Multi-pass LLM analysis: classify → convert → validate.

    Keeps model loaded across all passes. Returns combined result dict or None.
    """
    if os.environ.get("UNSLOTH_HELPER_MODEL_DISABLE", "").strip() in ("1", "true"):
        return None

    repo = os.environ.get("UNSLOTH_HELPER_MODEL_REPO", DEFAULT_HELPER_MODEL_REPO)
    variant = os.environ.get(
        "UNSLOTH_HELPER_MODEL_VARIANT", DEFAULT_HELPER_MODEL_VARIANT
    )

    backend = None
    try:
        from core.inference.llama_cpp import LlamaCppBackend

        backend = LlamaCppBackend()
        logger.info(f"Loading advisor model: {repo} ({variant})")
        t0 = time.monotonic()

        ok = backend.load_model(
            hf_repo = repo,
            hf_variant = variant,
            model_identifier = f"advisor:{repo}:{variant}",
            is_vision = False,
            n_ctx = 2048,
            n_gpu_layers = -1,
        )
        if not ok:
            logger.warning("Advisor model failed to start")
            return None

        logger.info(f"Advisor model loaded in {time.monotonic() - t0:.1f}s")
        # ── Format samples ──
        samples_text = ""
        for i, row in enumerate(samples[:5], 1):
            parts = [f"  {col}: {str(row.get(col, ''))[:200]}" for col in columns]
            samples_text += f"Row {i}:\n" + "\n".join(parts) + "\n"

        metadata_str = (
            json.dumps(dataset_metadata, indent = 2, default = str)[:500]
            if dataset_metadata
            else "N/A"
        )
        card_excerpt = (dataset_card or "")[:1200] or "N/A"

        # ── Target Model Hints ──
        target_hints = ""
        is_gemma_3n = False
        if model_name:
            try:
                from utils.models.model_config import load_model_config

                config = load_model_config(
                    model_name,
                    use_auth = True,
                    token = hf_token,
                    trust_remote_code = False,
                )
                archs = getattr(config, "architectures", [])
                if archs and "Gemma3nForConditionalGeneration" in archs:
                    is_gemma_3n = True
            except Exception:
                is_gemma_3n = "gemma-3n" in model_name.lower()

        if model_type == "audio" and not is_gemma_3n:
            target_hints = (
                "\n\nHINT: The user is training an AUDIO model. The dataset MUST contain "
                "a column with audio files/paths. Ensure one such column is selected "
                "as part of the input."
            )
        elif model_type == "embeddings":
            target_hints = (
                "\n\nHINT: The user is training an EMBEDDING model. These models typically "
                "do not use standard conversational input/output formats but instead use "
                "specific formats like:\n"
                "- Pairs of texts for Semantic Textual Similarity (STS)\n"
                "- Premise, hypothesis, and label for Natural Language Inference (NLI)\n"
                "- Queries and positive/negative documents for information retrieval\n"
                "Ensure the dataset format mapped reflects these specialized tasks."
            )

        # ── Pass 1: Classify ──
        logger.info("Pass 1: Classifying dataset...")
        t1 = time.monotonic()
        messages1 = [
            {
                "role": "system",
                "content": (
                    "You are a dataset analyst. Your job is to look at a HuggingFace dataset "
                    "and figure out what kind of data it contains and whether it is already in "
                    "a conversational format suitable for LLM fine-tuning. A dataset is "
                    '"conversational" if it already has columns like "messages", "conversations", '
                    'or multiturn "user"/"assistant" pairs. Some datasets are NOT conversational '
                    "— they are things like summarization, question answering, translation, "
                    "classification, etc. Those need conversion. You must respond with ONLY a "
                    "valid JSON object. Do not write any explanation before or after the JSON."
                    f"{target_hints}"
                ),
            },
            {
                "role": "user",
                "content": textwrap.dedent(f"""\
                    Look at this HuggingFace dataset and classify it.

                    DATASET CARD (excerpt):
                    {card_excerpt}

                    METADATA:
                    {metadata_str}

                    COLUMNS: {columns}

                    SAMPLE DATA (first 3 rows):
                    {samples_text}

                    Based on the above, respond with this exact JSON structure:
                    {{
                        "dataset_type": "<one of: summarization, question_answering, translation, classification, natural_language_inference, instruction_following, conversational, code_generation, other>",
                        "is_conversational": <true if the dataset already has message/conversation columns, false otherwise>,
                        "needs_conversion": <true if it needs to be converted into user/assistant turns, false if it is already conversational>,
                        "description": "<one sentence describing what this dataset contains>",
                        "task_description": "<one sentence describing the task: what input goes in and what output comes out>"
                    }}

                    Respond with ONLY the JSON object. No markdown, no explanation."""),
            },
        ]
        raw1 = _generate_with_backend(backend, messages1, max_tokens = 256)
        pass1 = _parse_json_response(raw1)
        logger.info(f"Pass 1 done ({time.monotonic() - t1:.1f}s): {pass1}")

        if not pass1:
            logger.warning(f"Advisor Pass 1 failed to produce JSON: {raw1[:200]}")
            return None

        # If dataset is already conversational, skip passes 2-3
        if pass1.get("is_conversational") and not pass1.get("needs_conversion"):
            return {
                "success": True,
                "dataset_type": pass1.get("dataset_type"),
                "is_conversational": True,
                "user_notification": (
                    "This dataset is already in conversational format. "
                    "No conversion needed — columns can be mapped directly."
                ),
            }

        # ── Pass 2: Map columns to roles ──
        logger.info("Pass 2: Mapping columns to roles...")

        t2 = time.monotonic()
        messages2 = [
            {
                "role": "system",
                "content": (
                    "You are a data preparation assistant. Your job is to assign each column "
                    "in a dataset to a conversation role for LLM fine-tuning. There are exactly "
                    "two roles:\n"
                    '- "user" = This column contains INPUT that the model will receive as a prompt.\n'
                    '- "assistant" = This column contains OUTPUT that the model should learn to generate.\n\n'
                    "CRITICAL RULES:\n"
                    '1. There MUST be at least one column assigned to "user" AND at least one '
                    'column assigned to "assistant". Never assign all columns to the same role.\n'
                    "2. The column that contains the TARGET or OUTPUT or ANSWER or LABEL must "
                    'ALWAYS be assigned to "assistant". This is the thing the model should learn '
                    "to produce.\n"
                    "3. The columns that contain the SOURCE or INPUT or CONTEXT or QUESTION must "
                    'be assigned to "user". This is what the model receives.\n'
                    '4. Metadata columns like "id", "index", "source", "url", "date" should be '
                    'set to "skip".\n\n'
                    "You must respond with ONLY a valid JSON object."
                    f"{target_hints}"
                ),
            },
            {
                "role": "user",
                "content": textwrap.dedent(f"""\
                    Here is a dataset that has been classified:

                    CLASSIFICATION:
                    {json.dumps(pass1, indent = 2)}

                    COLUMNS AVAILABLE: {columns}

                    SAMPLE DATA (first 3 rows):
                    {samples_text}

                    Your task: assign each column to either "user", "assistant", or "skip".

                    Here are worked examples to guide you:

                    Example 1 — Summarization dataset with columns ["document", "summary"]:
                      "document" is the input text → "user"
                      "summary" is the output the model should generate → "assistant"
                      Result: {{"document": "user", "summary": "assistant"}}

                    Example 2 — Question answering dataset with columns ["context", "question", "answer"]:
                      "context" is input → "user"
                      "question" is input → "user"
                      "answer" is what the model should generate → "assistant"
                      Result: {{"context": "user", "question": "user", "answer": "assistant"}}

                    Example 3 — Classification dataset with columns ["text", "label"]:
                      "text" is input → "user"
                      "label" is the output the model should predict → "assistant"
                      Result: {{"text": "user", "label": "assistant"}}

                    Example 4 — Translation dataset with columns ["en", "fr"]:
                      "en" is the source language (input) → "user"
                      "fr" is the target language (output) → "assistant"
                      Result: {{"en": "user", "fr": "assistant"}}

                    Now apply this logic to the actual dataset columns listed above.

                    Respond with this exact JSON structure:
                    {{
                        "column_roles": {{
                            "<column_name>": "<user|assistant|skip>"
                        }},
                        "label_mapping": <if any column contains integer labels (like 0, 1, 2), provide a mapping like {{"label": {{"0": "entailment", "1": "neutral", "2": "contradiction"}}}}, otherwise null>,
                        "notes": "<brief explanation of why you assigned roles this way>"
                    }}

                    REMEMBER: There must be at least one "user" column AND at least one "assistant" column. If all columns are "user", you made a mistake — the output/target column should be "assistant".

                    Respond with ONLY the JSON object."""),
            },
        ]
        raw2 = _generate_with_backend(backend, messages2, max_tokens = 512)
        pass2 = _parse_json_response(raw2)
        logger.info(f"Pass 2 done ({time.monotonic() - t2:.1f}s): {pass2}")

        if not pass2:
            logger.warning(f"Advisor Pass 2 failed to produce JSON: {raw2[:200]}")
            return None

        # ── Extract and validate column roles from Pass 2 ──
        column_roles = pass2.get("column_roles", {})
        label_map = pass2.get("label_mapping") or {}  # may be null

        # Validate: must have at least one user AND one assistant
        roles_present = set(column_roles.values())
        if "user" not in roles_present or "assistant" not in roles_present:
            logger.warning(
                f"Pass 2 sanity fail: missing user or assistant role: {column_roles}"
            )
            return None  # triggers fallback to simple classification

        # ── Pass 3: System prompt (non-conversational datasets only) ──
        sys_prompt = ""
        dtype = pass1.get("dataset_type", "unknown")
        is_conv = pass1.get("is_conversational", False)

        if not is_conv:
            logger.info("Pass 3: Generating system prompt...")
            t3 = time.monotonic()

            # Format label mapping info for the prompt
            label_info = ""
            if label_map:
                for col, mapping in label_map.items():
                    if isinstance(mapping, dict) and mapping:
                        pairs = ", ".join(f"{k} = {v}" for k, v in mapping.items())
                        label_info += f"\nLabel mapping for '{col}': {pairs}"

            # Describe the role assignments for context
            user_cols = [c for c, r in column_roles.items() if r == "user"]
            asst_cols = [c for c, r in column_roles.items() if r == "assistant"]
            task_desc = pass1.get("task_description") or pass1.get("description", "")

            messages3 = [
                {
                    "role": "user",
                    "content": textwrap.dedent(f"""\
                        I am building a fine-tuning dataset for an LLM. I need you to write a \
                        system prompt that will be included in every training example to tell \
                        the model what task it is performing.

                        Here is the task information:
                        - Dataset type: {dtype}
                        - Task description: {task_desc}
                        - The USER (input) columns are: {user_cols}
                        - The ASSISTANT (output) columns are: {asst_cols}
                        {label_info}

                        Write a system prompt that:
                        1. Explains what task the model is performing in plain language
                        2. Describes what input it will receive
                        3. Describes what output it should produce
                        4. Is 2-4 sentences long

                        Write ONLY the system prompt text. No quotes, no labels, no explanation around it."""),
                },
            ]
            raw3 = _generate_with_backend(backend, messages3, max_tokens = 256)
            logger.info(
                f"Pass 3 done ({time.monotonic() - t3:.1f}s): {raw3[:200] if raw3 else None}"
            )

            if raw3:
                # Pass 3 returns raw text, not JSON — clean it up
                cleaned = raw3.strip().strip('"').strip("'").strip()
                if len(cleaned) >= 20 and cleaned.lower() not in ("null", "none", ""):
                    sys_prompt = cleaned

        # Build suggested_mapping (column → role, for the frontend dropdowns)
        suggested_mapping = {}
        for col, role in column_roles.items():
            if col in columns and role in ("user", "assistant", "system"):
                suggested_mapping[col] = role

        # Build user notification from Pass 1 classification
        desc = pass1.get("task_description") or pass1.get("description", "")
        note_parts = [f"This is a {dtype} dataset (not conversational)."]
        if desc:
            note_parts.append(desc)
        note_parts.append(
            "Columns have been mapped to conversation roles. You can adjust the mapping if needed."
        )
        user_notification = " ".join(note_parts)

        total_time = time.monotonic() - t0
        logger.info(
            f"Advisor complete ({total_time:.1f}s): type={dtype}, mapping={suggested_mapping}, sys_prompt={bool(sys_prompt)}, label_map={bool(label_map)}"
        )

        return {
            "success": True,
            "suggested_mapping": suggested_mapping,
            "system_prompt": sys_prompt,
            "label_mapping": label_map if label_map else None,
            "dataset_type": dtype,
            "is_conversational": is_conv,
            "user_notification": user_notification,
        }

    except Exception as e:
        logger.warning(f"Advisor multi-pass failed: {e}")
        return None

    finally:
        if backend is not None:
            try:
                backend.unload_model()
                logger.info("Advisor model unloaded")
            except Exception:
                pass


def llm_conversion_advisor(
    column_names: list[str],
    samples: list[dict],
    dataset_name: Optional[str] = None,
    hf_token: Optional[str] = None,
    model_name: Optional[str] = None,
    model_type: Optional[str] = None,
) -> Optional[dict[str, Any]]:
    """
    Full conversion advisor: fetch HF card → multi-pass LLM analysis.

    Falls back to simple llm_classify_columns() if the multi-pass advisor fails.

    Returns:
        Dict with keys: success, suggested_mapping, system_prompt, user_template,
        assistant_template, label_mapping, dataset_type, is_conversational,
        user_notification. Or None on complete failure.
    """
    # Fetch HF dataset card if this looks like a HF dataset (has a slash)
    dataset_card = None
    dataset_metadata = None
    if dataset_name and "/" in dataset_name:
        dataset_card, dataset_metadata = fetch_hf_dataset_card(dataset_name, hf_token)

    # Try multi-pass advisor
    result = _run_multi_pass_advisor(
        columns = column_names,
        samples = samples,
        dataset_name = dataset_name,
        dataset_card = dataset_card,
        dataset_metadata = dataset_metadata,
        model_name = model_name,
        model_type = model_type,
        hf_token = hf_token,
    )

    if result and result.get("success"):
        logger.info(f"Conversion advisor succeeded: type={result.get('dataset_type')}")
        return result

    # Fallback: simple column classification
    logger.info("Advisor failed, falling back to simple column classification")
    simple_mapping = llm_classify_columns(column_names, samples)
    if simple_mapping:
        return {
            "success": True,
            "suggested_mapping": {
                col: role
                for col, role in simple_mapping.items()
                if role in ("user", "assistant", "system")
            },
            "dataset_type": None,
            "is_conversational": None,
            "user_notification": None,
        }

    return None


================================================
FILE: studio/backend/utils/datasets/model_mappings.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Model and template mappings for dataset processing.

This module contains the mapping dictionaries that associate model names
with their corresponding chat templates and response markers.
"""

TEMPLATE_TO_MODEL_MAPPER = {
    "phi-3.5": (
        "unsloth/Phi-3.5-mini-instruct-bnb-4bit",
        "unsloth/Phi-3.5-mini-instruct",
        "microsoft/Phi-3.5-mini-instruct",
    ),
    "phi-3": (
        "unsloth/Phi-3-mini-4k-instruct-bnb-4bit",
        "unsloth/Phi-3-mini-4k-instruct",
        "microsoft/Phi-3-mini-4k-instruct",
        "unsloth/Phi-3-medium-4k-instruct-bnb-4bit",
        "unsloth/Phi-3-medium-4k-instruct",
        "microsoft/Phi-3-medium-4k-instruct",
        "unsloth/Phi-3-mini-4k-instruct-v0-bnb-4bit",
        "unsloth/Phi-3-mini-4k-instruct-v0",
    ),
    "phi-4": (
        "unsloth/phi-4-unsloth-bnb-4bit",
        "unsloth/phi-4",
        "microsoft/phi-4",
        "unsloth/phi-4-bnb-4bit",
        "unsloth/phi-4-reasoning-unsloth-bnb-4bit",
        "unsloth/phi-4-reasoning",
        "microsoft/Phi-4-reasoning",
        "unsloth/phi-4-reasoning-bnb-4bit",
        "unsloth/phi-4-reasoning-plus-unsloth-bnb-4bit",
        "unsloth/phi-4-reasoning-plus",
        "microsoft/Phi-4-reasoning-plus",
        "unsloth/phi-4-reasoning-plus-bnb-4bit",
        "unsloth/phi-4-mini-reasoning-unsloth-bnb-4bit",
        "unsloth/phi-4-mini-reasoning",
        "microsoft/Phi-4-mini-reasoning",
        "unsloth/phi-4-mini-reasoning-bnb-4bit",
        "unsloth/Phi-4-mini-instruct-unsloth-bnb-4bit",
        "unsloth/Phi-4-mini-instruct",
        "microsoft/Phi-4-mini-instruct",
        "unsloth/Phi-4-mini-instruct-bnb-4bit",
    ),
    "mistral": (
        "unsloth/mistral-7b-instruct-v0.1-bnb-4bit",
        "unsloth/mistral-7b-instruct-v0.1",
        "mistralai/Mistral-7B-Instruct-v0.1",
        "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
        "unsloth/mistral-7b-instruct-v0.2",
        "mistralai/Mistral-7B-Instruct-v0.2",
        "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
        "unsloth/mistral-7b-instruct-v0.3",
        "mistralai/Mistral-7B-Instruct-v0.3",
        "unsloth/Mixtral-8x7B-Instruct-v0.1-unsloth-bnb-4bit",
        "unsloth/Mixtral-8x7B-Instruct-v0.1",
        "mistralai/Mixtral-8x7B-Instruct-v0.1",
        "unsloth/Mixtral-8x7B-Instruct-v0.1-bnb-4bit",
        "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
        "unsloth/Mistral-Nemo-Instruct-2407",
        "mistralai/Mistral-Nemo-Instruct-2407",
        "unsloth/Mistral-Large-Instruct-2407-bnb-4bit",
        "mistralai/Mistral-Large-Instruct-2407",
        "unsloth/Mistral-Small-Instruct-2409-bnb-4bit",
        "unsloth/Mistral-Small-Instruct-2409",
        "mistralai/Mistral-Small-Instruct-2409",
        "unsloth/Mistral-Small-24B-Instruct-2501-unsloth-bnb-4bit",
        "unsloth/Mistral-Small-24B-Instruct-2501",
        "mistralai/Mistral-Small-24B-Instruct-2501",
        "unsloth/Mistral-Small-24B-Instruct-2501-bnb-4bit",
        "unsloth/Mistral-Small-3.1-24B-Instruct-2503-unsloth-bnb-4bit",
        "unsloth/Mistral-Small-3.1-24B-Instruct-2503",
        "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
        "unsloth/Mistral-Small-3.1-24B-Instruct-2503-bnb-4bit",
        "unsloth/Mistral-Small-3.2-24B-Instruct-2506-unsloth-bnb-4bit",
        "unsloth/Mistral-Small-3.2-24B-Instruct-2506",
        "mistralai/Mistral-Small-3.2-24B-Instruct-2506",
        "unsloth/Mistral-Small-3.2-24B-Instruct-2506-bnb-4bit",
    ),
    "llama": (
        "meta-llama/Llama-2-13b-chat-hf",
        "unsloth/llama-2-7b-chat-bnb-4bit",
        "unsloth/llama-2-7b-chat",
        "meta-llama/Llama-2-7b-chat-hf",
    ),
    "llama3": (
        "unsloth/llama-3-8b-Instruct-bnb-4bit",
        "unsloth/llama-3-8b-Instruct",
        "meta-llama/Meta-Llama-3-8B-Instruct",
        "unsloth/llama-3-70b-Instruct-bnb-4bit",
        "meta-llama/Meta-Llama-3-70B-Instruct",
    ),
    "llama-3.1": (
        "unsloth/Meta-Llama-3.1-8B-Instruct-unsloth-bnb-4bit",
        "unsloth/Meta-Llama-3.1-8B-Instruct",
        "meta-llama/Meta-Llama-3.1-8B-Instruct",
        "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
        "unsloth/Llama-3.1-8B-Instruct-unsloth-bnb-4bit",
        "unsloth/Llama-3.1-8B-Instruct",
        "meta-llama/Llama-3.1-8B-Instruct",
        "unsloth/Llama-3.1-8B-Instruct-bnb-4bit",
        "unsloth/Meta-Llama-3.1-405B-Instruct-bnb-4bit",
        "meta-llama/Meta-Llama-3.1-405B-Instruct",
        "unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit",
        "unsloth/Meta-Llama-3.1-70B-Instruct",
        "meta-llama/Meta-Llama-3.1-70B-Instruct",
        "unsloth/Llama-3.1-Storm-8B-bnb-4bit",
        "unsloth/Llama-3.1-Storm-8B",
        "akjindal53244/Llama-3.1-Storm-8B",
        "unsloth/Hermes-3-Llama-3.1-8B-bnb-4bit",
        "unsloth/Hermes-3-Llama-3.1-8B",
        "NousResearch/Hermes-3-Llama-3.1-8B",
        "unsloth/Hermes-3-Llama-3.1-70B-bnb-4bit",
        "unsloth/Hermes-3-Llama-3.1-70B",
        "NousResearch/Hermes-3-Llama-3.1-70B",
        "unsloth/Hermes-3-Llama-3.1-405B-bnb-4bit",
        "NousResearch/Hermes-3-Llama-3.1-405B",
        "unsloth/Llama-3.1-Nemotron-70B-Instruct-bnb-4bit",
        "unsloth/Llama-3.1-Nemotron-70B-Instruct",
        "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
        "unsloth/Llama-3.1-Tulu-3-8B-bnb-4bit",
        "unsloth/Llama-3.1-Tulu-3-8B",
        "allenai/Llama-3.1-Tulu-3-8B",
        "unsloth/Llama-3.1-Tulu-3-70B-bnb-4bit",
        "unsloth/Llama-3.1-Tulu-3-70B",
        "allenai/Llama-3.1-Tulu-3-70B",
    ),
    "llama-3.2": (
        "unsloth/Llama-3.2-1B-Instruct-unsloth-bnb-4bit",
        "unsloth/Llama-3.2-1B-Instruct",
        "meta-llama/Llama-3.2-1B-Instruct",
        "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
        "unsloth/Llama-3.2-3B-Instruct-unsloth-bnb-4bit",
        "unsloth/Llama-3.2-3B-Instruct",
        "meta-llama/Llama-3.2-3B-Instruct",
        "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
        "unsloth/Llama-3.2-11B-Vision-Instruct-unsloth-bnb-4bit",
        "unsloth/Llama-3.2-11B-Vision-Instruct",
        "meta-llama/Llama-3.2-11B-Vision-Instruct",
        "unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit",
        "unsloth/Llama-3.2-90B-Vision-Instruct-bnb-4bit",
        "unsloth/Llama-3.2-90B-Vision-Instruct",
        "meta-llama/Llama-3.2-90B-Vision-Instruct",
    ),
    "llama-3.3": (
        "unsloth/Llama-3.3-70B-Instruct-bnb-4bit",
        "unsloth/Llama-3.3-70B-Instruct",
        "meta-llama/Llama-3.3-70B-Instruct",
    ),
    "gemma": (
        "unsloth/gemma-7b-it-bnb-4bit",
        "unsloth/gemma-7b-it",
        "google/gemma-7b-it",
        "google/gemma-2b-it",
        "unsloth/gemma-1.1-2b-it-bnb-4bit",
        "unsloth/gemma-1.1-2b-it",
        "google/gemma-1.1-2b-it",
        "unsloth/gemma-1.1-7b-it-bnb-4bit",
        "unsloth/gemma-1.1-7b-it",
        "google/gemma-1.1-7b-it",
    ),
    "gemma2": (
        "unsloth/gemma-2-9b-it-bnb-4bit",
        "unsloth/gemma-2-9b-it",
        "google/gemma-2-9b-it",
        "unsloth/gemma-2-27b-it-bnb-4bit",
        "unsloth/gemma-2-27b-it",
        "google/gemma-2-27b-it",
        "unsloth/gemma-2-2b-it-bnb-4bit",
        "unsloth/gemma-2-2b-it",
        "google/gemma-2-2b-it",
    ),
    "gemma-3": (
        "unsloth/gemma-3-1b-it-unsloth-bnb-4bit",
        "unsloth/gemma-3-1b-it",
        "google/gemma-3-1b-it",
        "unsloth/gemma-3-1b-it-bnb-4bit",
        "unsloth/gemma-3-4b-it-unsloth-bnb-4bit",
        "unsloth/gemma-3-4b-it",
        "google/gemma-3-4b-it",
        "unsloth/gemma-3-4b-it-bnb-4bit",
        "unsloth/gemma-3-12b-it-unsloth-bnb-4bit",
        "unsloth/gemma-3-12b-it",
        "google/gemma-3-12b-it",
        "unsloth/gemma-3-12b-it-bnb-4bit",
        "unsloth/gemma-3-27b-it-unsloth-bnb-4bit",
        "unsloth/gemma-3-27b-it",
        "google/gemma-3-27b-it",
        "unsloth/gemma-3-27b-it-bnb-4bit",
        "unsloth/gemma-3-270m-it-unsloth-bnb-4bit",
        "unsloth/gemma-3-270m-it",
        "google/gemma-3-270m-it",
        "unsloth/gemma-3-270m-it-bnb-4bit",
        "unsloth/gemma-3-270m-unsloth-bnb-4bit",
        "unsloth/medgemma-4b-it-unsloth-bnb-4bit",
        "unsloth/medgemma-4b-it",
        "google/medgemma-4b-it",
        "unsloth/medgemma-4b-it-bnb-4bit",
        "unsloth/medgemma-27b-text-it-unsloth-bnb-4bit",
        "unsloth/medgemma-27b-text-it",
        "google/medgemma-27b-text-it",
        "unsloth/medgemma-27b-text-it-bnb-4bit",
    ),
    "gemma3n": (
        "unsloth/gemma-3n-E4B-it-unsloth-bnb-4bit",
        "unsloth/gemma-3n-E4B-it",
        "google/gemma-3n-E4B-it",
        "unsloth/gemma-3n-E4B-it-unsloth-bnb-4bit",
        "unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit",
        "unsloth/gemma-3n-E2B-it",
        "google/gemma-3n-E2B-it",
        "unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit",
    ),
    "qwen2.5": (
        "unsloth/Qwen2.5-0.5B-Instruct-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-0.5B-Instruct",
        "Qwen/Qwen2.5-0.5B-Instruct",
        "unsloth/Qwen2.5-0.5B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-1.5B-Instruct-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-1.5B-Instruct",
        "Qwen/Qwen2.5-1.5B-Instruct",
        "unsloth/Qwen2.5-1.5B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-3B-Instruct-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-3B-Instruct",
        "Qwen/Qwen2.5-3B-Instruct",
        "unsloth/Qwen2.5-3B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-7B-Instruct-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-7B-Instruct",
        "Qwen/Qwen2.5-7B-Instruct",
        "unsloth/Qwen2.5-7B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-14B-Instruct-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-14B-Instruct",
        "Qwen/Qwen2.5-14B-Instruct",
        "unsloth/Qwen2.5-14B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-32B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-32B-Instruct",
        "Qwen/Qwen2.5-32B-Instruct",
        "unsloth/Qwen2.5-72B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-72B-Instruct",
        "Qwen/Qwen2.5-72B-Instruct",
        "unsloth/Qwen2.5-0.5B-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-Math-1.5B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Math-1.5B-Instruct",
        "Qwen/Qwen2.5-Math-1.5B-Instruct",
        "unsloth/Qwen2.5-Math-7B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Math-7B-Instruct",
        "Qwen/Qwen2.5-Math-7B-Instruct",
        "unsloth/Qwen2.5-Math-72B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Math-72B-Instruct",
        "Qwen/Qwen2.5-Math-72B-Instruct",
        "unsloth/Qwen2.5-Coder-0.5B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Coder-0.5B-Instruct",
        "Qwen/Qwen2.5-Coder-0.5B-Instruct",
        "unsloth/Qwen2.5-Coder-1.5B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Coder-1.5B-Instruct",
        "Qwen/Qwen2.5-Coder-1.5B-Instruct",
        "unsloth/Qwen2.5-Coder-3B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Coder-3B-Instruct",
        "Qwen/Qwen2.5-Coder-3B-Instruct",
        "unsloth/Qwen2.5-Coder-7B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Coder-7B-Instruct",
        "Qwen/Qwen2.5-Coder-7B-Instruct",
        "unsloth/Qwen2.5-Coder-14B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Coder-14B-Instruct",
        "Qwen/Qwen2.5-Coder-14B-Instruct",
        "unsloth/Qwen2.5-Coder-32B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Coder-32B-Instruct",
        "Qwen/Qwen2.5-Coder-32B-Instruct",
        "unsloth/Qwen2.5-VL-3B-Instruct-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-VL-3B-Instruct",
        "Qwen/Qwen2.5-VL-3B-Instruct",
        "unsloth/Qwen2.5-VL-3B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-VL-7B-Instruct-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-VL-7B-Instruct",
        "Qwen/Qwen2.5-VL-7B-Instruct",
        "unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-VL-32B-Instruct-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-VL-32B-Instruct",
        "Qwen/Qwen2.5-VL-32B-Instruct",
        "unsloth/Qwen2.5-VL-32B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-VL-72B-Instruct-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-VL-72B-Instruct",
        "Qwen/Qwen2.5-VL-72B-Instruct",
        "unsloth/Qwen2.5-VL-72B-Instruct-bnb-4bit",
        "unsloth/OpenThinker-7B-unsloth-bnb-4bit",
        "unsloth/OpenThinker-7B",
        "open-thoughts/OpenThinker-7B",
        "unsloth/OpenThinker-7B-bnb-4bit",
    ),
    "qwen3": (
        "unsloth/Qwen3-0.6B-unsloth-bnb-4bit",
        "unsloth/Qwen3-0.6B",
        "Qwen/Qwen3-0.6B",
        "unsloth/Qwen3-0.6B-bnb-4bit",
        "unsloth/Qwen3-1.7B-unsloth-bnb-4bit",
        "unsloth/Qwen3-1.7B",
        "Qwen/Qwen3-1.7B",
        "unsloth/Qwen3-1.7B-bnb-4bit",
        "unsloth/Qwen3-4B-unsloth-bnb-4bit",
        "unsloth/Qwen3-4B",
        "Qwen/Qwen3-4B",
        "unsloth/Qwen3-4B-bnb-4bit",
        "unsloth/Qwen3-8B-unsloth-bnb-4bit",
        "unsloth/Qwen3-8B",
        "Qwen/Qwen3-8B",
        "unsloth/Qwen3-8B-bnb-4bit",
        "unsloth/Qwen3-14B-unsloth-bnb-4bit",
        "unsloth/Qwen3-14B",
        "Qwen/Qwen3-14B",
        "unsloth/Qwen3-14B-bnb-4bit",
        "unsloth/Qwen3-32B-unsloth-bnb-4bit",
        "unsloth/Qwen3-32B",
        "Qwen/Qwen3-32B",
        "unsloth/Qwen3-32B-bnb-4bit",
        "unsloth/Qwen3-30B-A3B-unsloth-bnb-4bit",
        "unsloth/Qwen3-30B-A3B",
        "Qwen/Qwen3-30B-A3B",
        "unsloth/Qwen3-30B-A3B-bnb-4bit",
    ),
    "qwen3-instruct": (
        "unsloth/Qwen3-4B-Instruct-2507-unsloth-bnb-4bit",
        "unsloth/Qwen3-4B-Instruct-2507",
        "Qwen/Qwen3-4B-Instruct-2507",
        "unsloth/Qwen3-4B-Instruct-2507-bnb-4bit",
        "unsloth/Qwen3-30B-A3B-Instruct-2507",
        "Qwen/Qwen3-30B-A3B-Instruct-2507",
        "unsloth/Qwen3-Coder-30B-A3B-Instruct",
        "Qwen/Qwen3-Coder-30B-A3B-Instruct",
        "unsloth/Qwen3-4B-Instruct-2507-unsloth-bnb-4bit",
        "unsloth/Qwen3-4B-Instruct-2507",
        "Qwen/Qwen3-4B-Instruct-2507",
        "unsloth/Qwen3-4B-Instruct-2507-bnb-4bit",
    ),
    "qwen3-thinking": (
        "unsloth/QwQ-32B-Preview-bnb-4bit",
        "unsloth/QwQ-32B-Preview",
        "Qwen/QwQ-32B-Preview",
        "unsloth/QwQ-32B-unsloth-bnb-4bit",
        "unsloth/QwQ-32B",
        "Qwen/QwQ-32B",
        "unsloth/QwQ-32B-bnb-4bit",
        "unsloth/Qwen3-4B-Thinking-2507-unsloth-bnb-4bit",
        "unsloth/Qwen3-4B-Thinking-2507",
        "Qwen/Qwen3-4B-Thinking-2507",
        "unsloth/Qwen3-4B-Thinking-2507-bnb-4bit",
        "unsloth/Qwen3-30B-A3B-Thinking-2507",
        "Qwen/Qwen3-30B-A3B-Thinking-2507",
    ),
    "qwen3.5": (
        "unsloth/Qwen3.5-0.8B",
        "unsloth/Qwen3.5-2B",
        "unsloth/Qwen3.5-4B",
        "unsloth/Qwen3.5-27B",
        "unsloth/Qwen3.5-35B-A3B",
    ),
    "zephyr": (
        "unsloth/zephyr-sft-bnb-4bit",
        "unsloth/zephyr-sft",
        "HuggingFaceH4/mistral-7b-sft-beta",
    ),
    "chatml": (
        "unsloth/yi-6b-bnb-4bit",
        "unsloth/yi-6b",
        "01-ai/Yi-6B",
        "unsloth/Hermes-2-Pro-Mistral-7B-bnb-4bit",
        "unsloth/Hermes-2-Pro-Mistral-7B",
        "NousResearch/Hermes-2-Pro-Mistral-7B",
        "unsloth/OpenHermes-2.5-Mistral-7B-bnb-4bit",
        "unsloth/OpenHermes-2.5-Mistral-7B",
        "teknium/OpenHermes-2.5-Mistral-7B",
    ),
    "gpt-oss": (
        "unsloth/gpt-oss-20b-unsloth-bnb-4bit",
        "unsloth/gpt-oss-20b",
        "openai/gpt-oss-20b",
        "unsloth/gpt-oss-20b-unsloth-bnb-4bit",
        "unsloth/gpt-oss-120b-unsloth-bnb-4bit",
        "unsloth/gpt-oss-120b",
        "openai/gpt-oss-120b",
        "unsloth/gpt-oss-120b-unsloth-bnb-4bit",
    ),
    "starling": (
        "unsloth/Starling-LM-7B-beta-bnb-4bit",
        "unsloth/Starling-LM-7B-beta",
        "Nexusflow/Starling-LM-7B-beta",
    ),
    "yi-chat": (
        "unsloth/yi-34b-chat-bnb-4bit",
        "01-ai/Yi-6B-Chat",
        "01-ai/Yi-34B-Chat",
    ),
    "glm": (
        "unsloth/GLM-4.7-Flash-unsloth-bnb-4bit",
        "unsloth/GLM-4.7-Flash",
        "THUDM/GLM-4.7-Flash",
        "unsloth/GLM-4.7-Flash-bnb-4bit",
    ),
}

MODEL_TO_TEMPLATE_MAPPER = {}

for key, values in TEMPLATE_TO_MODEL_MAPPER.items():
    for value in values:
        MODEL_TO_TEMPLATE_MAPPER[value] = key

    # Get lowercased
    lowered_key = key.lower()
    for value in values:
        MODEL_TO_TEMPLATE_MAPPER[value.lower()] = lowered_key


TEMPLATE_TO_RESPONSES_MAPPER = {
    "gemma-3": {
        "instruction": "<start_of_turn>user\n",
        "response": "<start_of_turn>model\n",
    },
    "gemma3n": {
        "instruction": "<start_of_turn>user\n",
        "response": "<start_of_turn>model\n",
    },
    "qwen3.5": {
        "instruction": "<|im_start|>user\n",
        "response": "<|im_start|>assistant\n",
    },
    "qwen3-instruct": {
        "instruction": "<|im_start|>user\n",
        "response": "<|im_start|>assistant\n",
    },
    "qwen3-thinking": {
        "instruction": "<|im_start|>user\n",
        "response": "<|im_start|>assistant\n<think>\n",
    },
    "qwen3": {
        "instruction": "<|im_start|>user\n",
        "response": "<|im_start|>assistant\n",
    },
    "qwen2.5": {
        "instruction": "<|im_start|>user\n",
        "response": "<|im_start|>assistant\n",
    },
    "llama-3.2": {
        "instruction": "<|start_header_id|>user<|end_header_id|>\n\n",
        "response": "<|start_header_id|>assistant<|end_header_id|>\n\n",
    },
    "llama-3.3": {
        "instruction": "<|start_header_id|>user<|end_header_id|>\n\n",
        "response": "<|start_header_id|>assistant<|end_header_id|>\n\n",
    },
    "llama-3.1": {
        "instruction": "<|start_header_id|>user<|end_header_id|>\n\n",
        "response": "<|start_header_id|>assistant<|end_header_id|>\n\n",
    },
    "llama3": {
        "instruction": "<|start_header_id|>user<|end_header_id|>\n\n",
        "response": "<|start_header_id|>assistant<|end_header_id|>\n\n",
    },
    "phi-3": {
        "instruction": "<|user|>\n",
        "response": "<|assistant|>\n",
    },
    "phi-3.5": {
        "instruction": "<|user|>\n",
        "response": "<|assistant|>\n",
    },
    "phi-4": {
        "instruction": "<|im_start|>user<|im_sep|>",
        "response": "<|im_start|>assistant<|im_sep|>",
    },
    "mistral": {
        "instruction": "[INST] ",
        "response": " [/INST]",
    },
    "llama": {
        "instruction": "[INST] ",
        "response": " [/INST]",
    },
    "chatml": {
        "instruction": "<|im_start|>user\n",
        "response": "<|im_start|>assistant\n",
    },
    "zephyr": {
        "instruction": "<|user|>\n",
        "response": "<|assistant|>\n",
    },
    "unsloth": {
        "instruction": ">>> User: ",
        "response": ">>> Assistant: ",
    },
    "vicuna": {
        "instruction": "USER: ",
        "response": "ASSISTANT: ",
    },
    "alpaca": {
        "instruction": "### Instruction:\n",
        "response": "### Response:\n",
    },
    "gemma": {
        "instruction": "<start_of_turn>user\n",
        "response": "<start_of_turn>model\n",
    },
    "gemma2": {
        "instruction": "<start_of_turn>user\n",
        "response": "<start_of_turn>model\n",
    },
    "gpt-oss": {
        "instruction": "<|start|>user<|message|>",
        "response": "<|start|>assistant<|channel|>final<|message|>",
    },
    "lfm-2": {
        "instruction": "<|im_start|>user\n",
        "response": "<|im_start|>assistant\n",
    },
    "starling": {
        "instruction": "GPT4 Correct User: ",
        "response": "GPT4 Correct Assistant: ",
    },
    "yi-chat": {
        "instruction": "<|im_start|>user\n",
        "response": "<|im_start|>assistant\n",
    },
    "glm": {
        "instruction": "[gMASK]<sop><|user|>",
        "response": "<|assistant|><think>",
    },
}


================================================
FILE: studio/backend/utils/datasets/vlm_processing.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
VLM (Vision-Language Model) processing utilities.

This module contains functions for generating smart instructions
for VLM datasets based on content analysis and heuristics.
"""

import re
from itertools import islice


def generate_smart_vlm_instruction(
    dataset,
    text_column = "text",
    image_column = "image",
    dataset_name = None,
):
    """
    Generate smart, context-aware instruction for VLM datasets using heuristics.

    Strategy:
    1. Check for explicit question/instruction columns → use that
    2. Infer from text column name + sample content
    3. Analyze dataset name for task hints
    4. Fall back to generic instruction

    Returns:
        dict: {
            "instruction": str or None,  # None means use column content
            "instruction_type": "explicit" | "inferred" | "generic",
            "uses_dynamic_instruction": bool,  # True if instruction varies per sample
            "confidence": float,  # 0.0 to 1.0
        }
    """
    column_names = set(next(iter(dataset)).keys())
    sample = next(iter(dataset))

    # ===== LEVEL 1: Explicit Instruction Columns =====
    # Check for columns that contain per-sample instructions
    question_columns = ["question", "query", "prompt", "instruction", "user_prompt"]

    for col in question_columns:
        if col in column_names:
            # Check if this column has varied content (not just empty/same)
            sample_content = sample[col]
            if sample_content and str(sample_content).strip():
                return {
                    "instruction": None,  # Signal to use column content
                    "instruction_column": col,
                    "instruction_type": "explicit",
                    "uses_dynamic_instruction": True,
                    "confidence": 1.0,
                }

    # ===== LEVEL 2: Infer from Column Names + Content =====
    text_col_lower = text_column.lower()

    # Sample the text content to detect patterns
    text_sample = str(sample.get(text_column, ""))[:500]  # First 500 chars

    # Task-specific keywords and their instructions
    task_patterns = {
        # OCR / Transcription
        "ocr": {
            "keywords": ["ocr", "transcribe", "transcript"],
            "content_hints": [
                r"[A-Za-z\u0600-\u06FF]{10,}"
            ],  # Long text passages (Latin/Arabic)
            "instruction": "Transcribe all the text shown in this image.",
            "confidence": 0.9,
        },
        # LaTeX / Math
        "latex": {
            "keywords": ["latex", "math", "formula", "equation"],
            "content_hints": [r"\\[a-z]+\{", r"\^", r"_", r"\\frac"],  # LaTeX commands
            "instruction": "Convert this image to LaTeX notation.",
            "confidence": 0.95,
        },
        # Caption / Description
        "caption": {
            "keywords": ["caption", "description", "describe"],
            "content_hints": [],
            "instruction": "Provide a detailed description of this image.",
            "confidence": 0.85,
        },
        # Medical / Radiology
        "medical": {
            "keywords": [
                "medical",
                "radiology",
                "xray",
                "ct",
                "mri",
                "scan",
                "diagnosis",
            ],
            "content_hints": [r"\b(lesion|radiograph|patient|diagnosis|findings)\b"],
            "instruction": "Analyze this medical image and describe the key findings.",
            "confidence": 0.9,
        },
        # Code / Programming
        "code": {
            "keywords": ["code", "program", "function", "algorithm"],
            "content_hints": [r"def |class |function|import |return "],
            "instruction": "Explain what this code visualization shows.",
            "confidence": 0.85,
        },
        # Chart / Graph
        "chart": {
            "keywords": ["chart", "graph", "plot", "visualization", "diagram"],
            "content_hints": [r"\b(axis|legend|bar|line|pie|scatter)\b"],
            "instruction": "Describe this chart or graph, including key data points and trends.",
            "confidence": 0.85,
        },
        # Document / Text Recognition
        "document": {
            "keywords": ["document", "page", "paragraph", "article"],
            "content_hints": [r"\n.*\n.*\n"],  # Multi-line text
            "instruction": "Extract and transcribe the text from this document image.",
            "confidence": 0.85,
        },
    }

    # Check column name matches
    best_match = None
    best_score = 0.0

    for task_name, task_info in task_patterns.items():
        score = 0.0

        # Check column name
        if any(keyword in text_col_lower for keyword in task_info["keywords"]):
            score += 0.5

        # Check dataset name if provided
        if dataset_name and any(
            keyword in dataset_name.lower() for keyword in task_info["keywords"]
        ):
            score += 0.3

        # Check content patterns
        for pattern in task_info["content_hints"]:
            if re.search(pattern, text_sample, re.IGNORECASE):
                score += 0.4
                break

        if score > best_score:
            best_score = score
            best_match = task_info

    if best_match and best_score > 0.5:  # Confidence threshold
        return {
            "instruction": best_match["instruction"],
            "instruction_column": None,
            "instruction_type": "inferred",
            "uses_dynamic_instruction": False,
            "confidence": min(best_score, best_match["confidence"]),
        }

    # ===== LEVEL 3: Analyze Dataset Name =====
    if dataset_name:
        name_lower = dataset_name.lower()

        # Common dataset name patterns
        if "vqa" in name_lower or "question" in name_lower:
            return {
                "instruction": "Answer the question about this image.",
                "instruction_column": None,
                "instruction_type": "inferred",
                "uses_dynamic_instruction": False,
                "confidence": 0.75,
            }

        if "coco" in name_lower or "flickr" in name_lower:
            return {
                "instruction": "Provide a detailed caption for this image.",
                "instruction_column": None,
                "instruction_type": "inferred",
                "uses_dynamic_instruction": False,
                "confidence": 0.75,
            }

    # ===== LEVEL 4: LLM-Assisted Instruction Generation =====
    try:
        from .llm_assist import llm_generate_vlm_instruction

        sample_rows = []
        for s in islice(dataset, 5):
            row = {}
            for col in s:
                val = s[col]
                if hasattr(val, "size") and hasattr(val, "mode"):  # PIL Image
                    row[col] = "<image>"
                elif isinstance(val, list):
                    row[col] = str(val)[:300]
                else:
                    row[col] = str(val)[:300]
            sample_rows.append(row)

        llm_result = llm_generate_vlm_instruction(
            column_names = list(column_names),
            samples = sample_rows,
            dataset_name = dataset_name,
        )
        if llm_result and llm_result.get("instruction"):
            print(
                f"\n[DEBUG] LLM-assisted VLM instruction generated: "
                f"'{llm_result['instruction']}' (confidence={llm_result.get('confidence', 'N/A')})\n",
                flush = True,
            )
            return {
                "instruction": llm_result["instruction"],
                "instruction_column": None,
                "instruction_type": "llm_assisted",
                "uses_dynamic_instruction": False,
                "confidence": llm_result.get("confidence", 0.85),
            }
    except Exception as e:
        import logging

        logging.getLogger(__name__).debug(f"LLM-assisted instruction skipped: {e}")

    # ===== LEVEL 5: Generic Fallback =====
    return {
        "instruction": "Describe this image in detail.",
        "instruction_column": None,
        "instruction_type": "generic",
        "uses_dynamic_instruction": False,
        "confidence": 0.5,
    }


================================================
FILE: studio/backend/utils/hardware/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Hardware detection and GPU utilities
"""

from .hardware import (
    DeviceType,
    DEVICE,
    CHAT_ONLY,
    detect_hardware,
    get_device,
    is_apple_silicon,
    clear_gpu_cache,
    get_gpu_memory_info,
    log_gpu_memory,
    get_gpu_summary,
    get_package_versions,
    get_gpu_utilization,
    get_physical_gpu_count,
    get_visible_gpu_count,
    safe_num_proc,
)

__all__ = [
    "DeviceType",
    "DEVICE",
    "CHAT_ONLY",
    "detect_hardware",
    "get_device",
    "is_apple_silicon",
    "clear_gpu_cache",
    "get_gpu_memory_info",
    "log_gpu_memory",
    "get_gpu_summary",
    "get_package_versions",
    "get_gpu_utilization",
    "get_physical_gpu_count",
    "get_visible_gpu_count",
    "safe_num_proc",
]


================================================
FILE: studio/backend/utils/hardware/hardware.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Hardware detection — run once at startup, read everywhere.

Usage:
    # At FastAPI lifespan startup:
    from utils.hardware import detect_hardware
    detect_hardware()

    # Anywhere else:
    from utils.hardware import DEVICE, DeviceType, is_apple_silicon
    if DEVICE == DeviceType.CUDA:
        import torch
        ...
"""

import platform
import structlog
from loggers import get_logger
from enum import Enum
from typing import Optional, Dict, Any

logger = get_logger(__name__)


# ========== Device Enum ==========


class DeviceType(str, Enum):
    """Supported compute backends. Inherits from str so it serializes cleanly in JSON."""

    CUDA = "cuda"
    MLX = "mlx"
    CPU = "cpu"


# ========== Global State (set once by detect_hardware) ==========

DEVICE: Optional[DeviceType] = None
CHAT_ONLY: bool = True  # No CUDA GPU -> GGUF chat only (Mac, CPU-only, etc.)


# ========== Detection ==========


def is_apple_silicon() -> bool:
    """Check if running on Apple Silicon hardware (pure platform check, no ML imports)."""
    return platform.system() == "Darwin" and platform.machine() == "arm64"


def _has_torch() -> bool:
    """Check if PyTorch is importable."""
    try:
        import torch

        return True
    except ImportError:
        return False


def _has_mlx() -> bool:
    """Check if MLX is importable."""
    try:
        import mlx.core

        return True
    except ImportError:
        return False


def detect_hardware() -> DeviceType:
    """
    Detect the best available compute device and set the module-level DEVICE global.

    Should be called exactly once during FastAPI lifespan startup.
    Safe to call multiple times (idempotent).

    Detection order:
      1. CUDA  (NVIDIA GPU, requires torch)
      2. MLX   (Apple Silicon via MLX framework)
      3. CPU   (fallback)
    """
    global DEVICE, CHAT_ONLY
    CHAT_ONLY = True  # reset -- only CUDA sets it to False

    # --- CUDA: try PyTorch ---
    if _has_torch():
        import torch

        if torch.cuda.is_available():
            DEVICE = DeviceType.CUDA
            CHAT_ONLY = False
            device_name = torch.cuda.get_device_properties(0).name
            print(f"Hardware detected: CUDA — {device_name}")
            return DEVICE

    # --- MLX: Apple Silicon ---
    if is_apple_silicon() and _has_mlx():
        DEVICE = DeviceType.MLX
        chip = platform.processor() or platform.machine()
        print(f"Hardware detected: MLX — Apple Silicon ({chip})")
        return DEVICE

    # --- Fallback ---
    DEVICE = DeviceType.CPU
    print("Hardware detected: CPU (no GPU backend available)")
    return DEVICE


# ========== Convenience helpers ==========


def get_device() -> DeviceType:
    """
    Return the detected device. Auto-detects if detect_hardware() hasn't been called yet.
    Prefer calling detect_hardware() explicitly at startup instead.
    """
    global DEVICE
    if DEVICE is None:
        detect_hardware()
    return DEVICE


def clear_gpu_cache():
    """
    Clear GPU memory cache for the current device.
    Safe to call on any platform — no-ops gracefully.
    """
    import gc

    gc.collect()

    device = get_device()

    if device == DeviceType.CUDA:
        import torch

        torch.cuda.synchronize()
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()
    elif device == DeviceType.MLX:
        # MLX manages memory automatically; no explicit cache clear needed.
        # mlx.core has no empty_cache equivalent — gc.collect() above is enough.
        pass


def get_gpu_memory_info() -> Dict[str, Any]:
    """
    Get GPU memory information.
    Supports CUDA (NVIDIA), MLX (Apple Silicon), and CPU-only environments.
    """
    device = get_device()

    # ---- CUDA path ----
    if device == DeviceType.CUDA:
        try:
            import torch

            idx = torch.cuda.current_device()
            props = torch.cuda.get_device_properties(idx)

            total = props.total_memory
            allocated = torch.cuda.memory_allocated(idx)
            reserved = torch.cuda.memory_reserved(idx)

            return {
                "available": True,
                "backend": device.value,
                "device": idx,
                "device_name": props.name,
                "total_gb": total / (1024**3),
                "allocated_gb": allocated / (1024**3),
                "reserved_gb": reserved / (1024**3),
                "free_gb": (total - allocated) / (1024**3),
                "utilization_pct": (allocated / total) * 100,
            }
        except Exception as e:
            logger.error(f"Error getting CUDA GPU info: {e}")
            return {"available": False, "backend": device.value, "error": str(e)}

    # ---- MLX path (Apple Silicon) ----
    if device == DeviceType.MLX:
        try:
            import mlx.core as mx
            import psutil

            # MLX uses unified memory — report system memory as the pool
            total = psutil.virtual_memory().total
            # MLX doesn't expose per-process GPU allocation; report 0 as allocated
            allocated = 0

            return {
                "available": True,
                "backend": device.value,
                "device": 0,
                "device_name": f"Apple Silicon ({platform.processor() or platform.machine()})",
                "total_gb": total / (1024**3),
                "allocated_gb": allocated / (1024**3),
                "reserved_gb": 0,
                "free_gb": (total - allocated) / (1024**3),
                "utilization_pct": (allocated / total) * 100 if total else 0,
            }
        except Exception as e:
            logger.error(f"Error getting MLX GPU info: {e}")
            return {"available": False, "backend": device.value, "error": str(e)}

    # ---- CPU-only ----
    return {"available": False, "backend": "cpu"}


def log_gpu_memory(context: str):
    """Log GPU memory usage with context."""
    memory_info = get_gpu_memory_info()
    if memory_info.get("available"):
        backend = memory_info.get("backend", "unknown").upper()
        device_name = memory_info.get("device_name", "")
        label = f"{backend}" + (f" ({device_name})" if device_name else "")
        logger.info(
            f"GPU Memory [{context}] {label}: "
            f"{memory_info['allocated_gb']:.2f}GB/{memory_info['total_gb']:.2f}GB "
            f"({memory_info['utilization_pct']:.1f}% used, "
            f"{memory_info['free_gb']:.2f}GB free)"
        )
    else:
        logger.info(f"GPU Memory [{context}]: No GPU available (CPU-only)")


# ========== GPU Summary & Package Versions ==========


def get_gpu_summary() -> Dict[str, Any]:
    """
    Return a compact summary of the primary GPU.

    Returns dict with keys:
        gpu_name      – e.g. "NVIDIA L4" (or None)
        vram_total_gb – e.g. 22.17       (or None)
    """
    mem = get_gpu_memory_info()
    if mem.get("available"):
        return {
            "gpu_name": mem.get("device_name"),
            "vram_total_gb": round(mem.get("total_gb", 0), 2),
            "vram_free_gb": round(mem.get("free_gb", 0), 2),
        }
    return {"gpu_name": None, "vram_total_gb": None, "vram_free_gb": None}


def get_package_versions() -> Dict[str, Optional[str]]:
    """
    Return the installed versions of key ML packages.

    Uses importlib.metadata (stdlib) so no subprocess is needed.
    CUDA version comes from torch.version.cuda.

    Returns dict with keys: unsloth, torch, transformers, cuda.
    Missing packages yield None.
    """
    from importlib.metadata import version as pkg_version, PackageNotFoundError

    packages = ("unsloth", "torch", "transformers")
    versions: Dict[str, Optional[str]] = {}

    for name in packages:
        try:
            versions[name] = pkg_version(name)
        except PackageNotFoundError:
            versions[name] = None

    # CUDA toolkit version bundled with torch
    try:
        import torch

        versions["cuda"] = getattr(torch.version, "cuda", None)
    except Exception:
        versions["cuda"] = None

    return versions


# ========== Live GPU Utilization (nvidia-smi) ==========


def get_gpu_utilization() -> Dict[str, Any]:
    """
    Return a live snapshot of GPU utilization via ``nvidia-smi``.

    Designed to be polled by the frontend during training (not streaming).
    Uses ``nvidia-smi --query-gpu`` which is the most accurate source for
    utilization %, temperature, and power draw – stats that PyTorch does
    not expose.

    Returns dict with keys:
        available          – bool, whether stats could be retrieved
        gpu_utilization_pct – GPU core utilization %
        temperature_c      – GPU temperature in °C
        vram_used_gb       – VRAM currently used (GiB)
        vram_total_gb      – VRAM total (GiB)
        vram_utilization_pct – VRAM used / total * 100
        power_draw_w       – current power draw (W)
        power_limit_w      – power limit (W)
        power_utilization_pct – power draw / limit * 100
    """
    device = get_device()

    if device != DeviceType.CUDA:
        return {"available": False, "backend": device.value}

    def _parse_smi_value(raw: str):
        """Parse a single nvidia-smi CSV value. Returns float or None for [N/A]."""
        raw = raw.strip()
        if not raw or raw == "[N/A]":
            return None
        try:
            return float(raw)
        except (ValueError, TypeError):
            return None

    # ── nvidia-smi (most complete source) ───────────────────────
    smi_data = {}
    try:
        import subprocess

        result = subprocess.run(
            [
                "nvidia-smi",
                "--query-gpu=utilization.gpu,temperature.gpu,"
                "memory.used,memory.total,power.draw,power.limit",
                "--format=csv,noheader,nounits",
            ],
            capture_output = True,
            text = True,
            timeout = 5,
        )

        if result.returncode == 0 and result.stdout.strip():
            # nvidia-smi outputs one line per GPU; take GPU 0
            first_line = result.stdout.strip().splitlines()[0]
            parts = [p.strip() for p in first_line.split(",")]
            if len(parts) >= 6:
                smi_data = {
                    "gpu_util": _parse_smi_value(parts[0]),
                    "temp": _parse_smi_value(parts[1]),
                    "vram_used_mb": _parse_smi_value(parts[2]),
                    "vram_total_mb": _parse_smi_value(parts[3]),
                    "power_draw": _parse_smi_value(parts[4]),
                    "power_limit": _parse_smi_value(parts[5]),
                }

    except FileNotFoundError:
        logger.debug("nvidia-smi not found, falling back to torch.cuda")
    except Exception as e:
        logger.warning(f"nvidia-smi query failed: {e}")

    # ── Backfill VRAM from torch.cuda if nvidia-smi returned [N/A] ──
    vram_used_mb = smi_data.get("vram_used_mb")
    vram_total_mb = smi_data.get("vram_total_mb")

    if vram_used_mb is None or vram_total_mb is None:
        try:
            import torch

            idx = torch.cuda.current_device()
            props = torch.cuda.get_device_properties(idx)
            if vram_total_mb is None:
                vram_total_mb = props.total_memory / (1024**2)  # bytes → MiB
            if vram_used_mb is None:
                vram_used_mb = torch.cuda.memory_allocated(idx) / (1024**2)
        except Exception as e:
            logger.debug(f"torch.cuda VRAM backfill failed: {e}")

    # ── Build response ──────────────────────────────────────────
    gpu_util = smi_data.get("gpu_util")
    temp = smi_data.get("temp")
    power_draw = smi_data.get("power_draw")
    power_limit = smi_data.get("power_limit")

    vram_used_gb = round(vram_used_mb / 1024, 2) if vram_used_mb is not None else None
    vram_total_gb = (
        round(vram_total_mb / 1024, 2) if vram_total_mb is not None else None
    )
    vram_pct = (
        round((vram_used_mb / vram_total_mb) * 100, 1)
        if vram_used_mb is not None and vram_total_mb and vram_total_mb > 0
        else None
    )
    power_pct = (
        round((power_draw / power_limit) * 100, 1)
        if power_draw is not None and power_limit and power_limit > 0
        else None
    )

    # If we got at least something useful, report available
    has_any = any(v is not None for v in [gpu_util, temp, vram_used_gb, power_draw])
    if not has_any:
        return {"available": False, "backend": device.value}

    return {
        "available": True,
        "backend": device.value,
        "gpu_utilization_pct": gpu_util,
        "temperature_c": temp,
        "vram_used_gb": vram_used_gb,
        "vram_total_gb": vram_total_gb,
        "vram_utilization_pct": vram_pct,
        "power_draw_w": power_draw,
        "power_limit_w": power_limit,
        "power_utilization_pct": power_pct,
    }


# ========== Multi-GPU Detection & Safe num_proc ==========

_physical_gpu_count: Optional[int] = None
_visible_gpu_count: Optional[int] = None


def get_physical_gpu_count() -> int:
    """
    Return the number of physical NVIDIA GPUs on the machine.

    Uses ``nvidia-smi -L`` which is NOT affected by CUDA_VISIBLE_DEVICES,
    so it always reflects the true hardware count.
    Result is cached after the first call.
    """
    global _physical_gpu_count
    if _physical_gpu_count is not None:
        return _physical_gpu_count

    try:
        import subprocess

        result = subprocess.run(
            ["nvidia-smi", "-L"],
            capture_output = True,
            text = True,
            timeout = 5,
        )
        if result.returncode == 0 and result.stdout.strip():
            _physical_gpu_count = len(result.stdout.strip().splitlines())
        else:
            _physical_gpu_count = 1
    except Exception:
        _physical_gpu_count = 1

    return _physical_gpu_count


def get_visible_gpu_count() -> int:
    """
    Return the number of GPUs visible to this process.

    Respects ``CUDA_VISIBLE_DEVICES`` -- if set, only those GPUs count.
    Falls back to physical count if the env var is unset or torch is
    unavailable.  Result is cached after the first call.
    """
    global _visible_gpu_count
    if _visible_gpu_count is not None:
        return _visible_gpu_count

    import os

    cuda_visible = os.environ.get("CUDA_VISIBLE_DEVICES")
    if cuda_visible is not None:
        # "" means zero GPUs, "0" means 1, "0,1,2" means 3
        cuda_visible = cuda_visible.strip()
        if cuda_visible == "" or cuda_visible == "-1":
            _visible_gpu_count = 0
        else:
            _visible_gpu_count = len([x for x in cuda_visible.split(",") if x.strip()])
        return _visible_gpu_count

    # CUDA_VISIBLE_DEVICES not set -- try torch, fall back to physical count
    try:
        import torch

        _visible_gpu_count = torch.cuda.device_count()
    except Exception:
        _visible_gpu_count = get_physical_gpu_count()

    return _visible_gpu_count


def safe_num_proc(desired: Optional[int] = None) -> int:
    """
    Return a safe ``num_proc`` for ``dataset.map()`` calls.

    On Windows, always returns 1 because Python uses ``spawn`` instead of
    ``fork`` for multiprocessing -- the overhead of re-importing torch,
    transformers, unsloth etc. per worker is typically slower than
    single-process for normal dataset sizes.

    On multi-GPU machines (where multiple GPUs are *visible* to this
    process) the NVIDIA driver spawns extra background threads, making
    ``os.fork()`` prone to deadlocks when many workers are created.
    This helper caps ``num_proc`` to 4 on such machines.

    When ``CUDA_VISIBLE_DEVICES`` restricts to a single GPU, the cap
    does not apply.

    Args:
        desired: The num_proc you *want*. If None, auto-computes from
                 ``os.cpu_count()``.

    Returns:
        A safe integer ≥ 1.
    """
    import os
    import sys

    # Windows uses 'spawn' for multiprocessing -- the overhead of re-importing
    # torch/transformers/unsloth per worker is typically slower than single-process.
    if sys.platform == "win32":
        return 1

    if desired is None or not isinstance(desired, int):
        desired = max(1, os.cpu_count() // 3)

    visible = get_visible_gpu_count()
    if visible > 1:
        capped = min(4, desired)
        logger.info(
            f"Multi-GPU detected ({visible} visible GPUs) -- "
            f"capping num_proc {desired} -> {capped} to avoid fork deadlocks"
        )
        return capped

    return desired


================================================
FILE: studio/backend/utils/inference/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Inference utility functions
"""

from utils.inference.inference_config import load_inference_config

__all__ = ["load_inference_config"]


================================================
FILE: studio/backend/utils/inference/inference_config.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Inference configuration loading utilities.

This module provides functions to load inference parameters (temperature, top_p, top_k, min_p)
from model YAML configuration files, with fallback to default.yaml.
Includes family-based lookup from inference_defaults.json for GGUF models.
"""

from pathlib import Path
from typing import Dict, Any, Optional
import json
import yaml
import structlog
from loggers import get_logger

from utils.models.model_config import load_model_defaults

logger = get_logger(__name__)

# ── Family-based inference defaults (loaded once, cached) ──────────────

_FAMILY_DEFAULTS: Optional[Dict[str, Any]] = None
_FAMILY_PATTERNS: Optional[list] = None


def _load_family_defaults():
    """Load and cache inference_defaults.json."""
    global _FAMILY_DEFAULTS, _FAMILY_PATTERNS
    if _FAMILY_DEFAULTS is not None:
        return

    json_path = (
        Path(__file__).parent.parent.parent
        / "assets"
        / "configs"
        / "inference_defaults.json"
    )
    try:
        with open(json_path, "r", encoding = "utf-8") as f:
            data = json.load(f)
        _FAMILY_DEFAULTS = data.get("families", {})
        _FAMILY_PATTERNS = data.get("patterns", [])
    except Exception as e:
        logger.warning(f"Failed to load inference_defaults.json: {e}")
        _FAMILY_DEFAULTS = {}
        _FAMILY_PATTERNS = []


def get_family_inference_params(model_id: str) -> Dict[str, Any]:
    """
    Look up recommended inference parameters by model family.

    Extracts the model family from the identifier (e.g. "unsloth/Qwen3.5-9B-GGUF" -> "qwen3.5")
    and returns the matching parameters from inference_defaults.json.

    Args:
        model_id: Model identifier (e.g. "unsloth/Qwen3.5-9B-GGUF")

    Returns:
        Dict with inference params, or empty dict if no family match.
    """
    _load_family_defaults()

    if not _FAMILY_PATTERNS or not _FAMILY_DEFAULTS:
        return {}

    # Normalize: lowercase, strip org prefix
    normalized = model_id.lower()
    if "/" in normalized:
        normalized = normalized.split("/", 1)[1]

    # Match against patterns (ordered longest-match-first in the JSON)
    for pattern in _FAMILY_PATTERNS:
        if pattern in normalized:
            params = _FAMILY_DEFAULTS.get(pattern, {})
            if params:
                return dict(params)

    return {}


def _has_specific_yaml(model_identifier: str) -> bool:
    """Check if a model has its own YAML config (not just default.yaml)."""
    from utils.models.model_config import _REVERSE_MODEL_MAPPING

    script_dir = Path(__file__).parent.parent.parent
    defaults_dir = script_dir / "assets" / "configs" / "model_defaults"

    # Check the mapping
    if model_identifier.lower() in _REVERSE_MODEL_MAPPING:
        return True

    # Check for exact filename match
    model_filename = model_identifier.replace("/", "_") + ".yaml"
    for config_path in defaults_dir.rglob(model_filename):
        if config_path.is_file():
            return True

    return False


def load_inference_config(model_identifier: str) -> Dict[str, Any]:
    """
    Load inference configuration parameters for a model.

    Priority chain:
    1. Model-specific YAML (if it exists and has inference params)
    2. Family-based defaults from inference_defaults.json
    3. default.yaml fallback

    Args:
        model_identifier: Model identifier (e.g., "unsloth/llama-3-8b-bnb-4bit")

    Returns:
        Dictionary containing inference parameters:
        {
            "temperature": float,
            "top_p": float,
            "top_k": int,
            "min_p": float
        }
    """
    # Load model defaults to get inference parameters
    model_defaults = load_model_defaults(model_identifier)

    # Load default.yaml for fallback values
    script_dir = Path(__file__).parent.parent.parent
    defaults_dir = script_dir / "assets" / "configs" / "model_defaults"
    default_config_path = defaults_dir / "default.yaml"

    default_inference = {}
    if default_config_path.exists():
        try:
            with open(default_config_path, "r", encoding = "utf-8") as f:
                default_config = yaml.safe_load(f) or {}
                default_inference = default_config.get("inference", {})
        except Exception as e:
            logger.warning(f"Failed to load default.yaml: {e}")

    # Family-based defaults from inference_defaults.json
    family_params = get_family_inference_params(model_identifier)

    model_inference = model_defaults.get("inference", {})

    # If the model has its own YAML config, those values take priority over family defaults.
    # If it only fell back to default.yaml, family defaults take priority.
    has_own_yaml = _has_specific_yaml(model_identifier)

    def _get_param(key, hardcoded_default):
        if has_own_yaml:
            # Model-specific YAML wins, then family fills gaps, then default.yaml
            val = model_inference.get(key)
            if val is not None and isinstance(val, (int, float)):
                return val
            if key in family_params:
                return family_params[key]
            return default_inference.get(key, hardcoded_default)
        else:
            # No model-specific YAML: family wins, then default.yaml
            if key in family_params:
                return family_params[key]
            return default_inference.get(key, hardcoded_default)

    inference_config = {
        "temperature": _get_param("temperature", 0.7),
        "top_p": _get_param("top_p", 0.95),
        "top_k": _get_param("top_k", -1),
        "min_p": _get_param("min_p", 0.01),
        "presence_penalty": _get_param("presence_penalty", 0.0),
        "trust_remote_code": model_inference.get(
            "trust_remote_code", default_inference.get("trust_remote_code", False)
        ),
    }

    return inference_config


================================================
FILE: studio/backend/utils/models/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Model and LoRA configuration handling
"""

from .model_config import (
    ModelConfig,
    GgufVariantInfo,
    is_vision_model,
    is_embedding_model,
    detect_audio_type,
    is_audio_input_type,
    VALID_AUDIO_TYPES,
    scan_trained_loras,
    scan_exported_models,
    load_model_defaults,
    get_base_model_from_lora,
    load_model_config,
    list_gguf_variants,
    MODEL_NAME_MAPPING,
    UI_STATUS_INDICATORS,
)
from .checkpoints import scan_checkpoints

__all__ = [
    "ModelConfig",
    "GgufVariantInfo",
    "is_vision_model",
    "is_embedding_model",
    "detect_audio_type",
    "is_audio_input_type",
    "VALID_AUDIO_TYPES",
    "scan_trained_loras",
    "scan_exported_models",
    "load_model_defaults",
    "get_base_model_from_lora",
    "load_model_config",
    "list_gguf_variants",
    "MODEL_NAME_MAPPING",
    "UI_STATUS_INDICATORS",
    "scan_checkpoints",
]


================================================
FILE: studio/backend/utils/models/checkpoints.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Checkpoint scanning utilities for discovering training runs and their checkpoints.
"""

import json
import structlog
from loggers import get_logger
from pathlib import Path
from typing import List, Optional, Tuple
from utils.paths import outputs_root, resolve_output_dir

logger = get_logger(__name__)


def _read_checkpoint_loss(checkpoint_path: Path) -> Optional[float]:
    """
    Read the training loss from a checkpoint's trainer_state.json.

    Returns the loss from the last log_history entry, or None if unavailable.
    """
    trainer_state = checkpoint_path / "trainer_state.json"
    if not trainer_state.exists():
        return None
    try:
        with open(trainer_state) as f:
            state = json.load(f)
        log_history = state.get("log_history", [])
        if log_history:
            return log_history[-1].get("loss")
    except Exception as e:
        logger.debug(f"Could not read loss from {trainer_state}: {e}")
    return None


def scan_checkpoints(
    outputs_dir: str = str(outputs_root()),
) -> List[Tuple[str, List[Tuple[str, str, Optional[float]]], dict]]:
    """
    Scan outputs folder for training runs and their checkpoints.

    Returns:
        List of tuples: [(model_name, [(display_name, checkpoint_path, loss), ...], metadata), ...]
        metadata keys: base_model, peft_type, lora_rank (all optional)
        The first entry in each checkpoint list is the main adapter; its loss is
        set to the loss of the last (highest-step) intermediate checkpoint.
    """
    models = []
    outputs_path = resolve_output_dir(outputs_dir)

    if not outputs_path.exists():
        logger.warning(f"Outputs directory not found: {outputs_dir}")
        return models

    try:
        for item in outputs_path.iterdir():
            if not item.is_dir():
                continue

            config_file = item / "config.json"
            adapter_config = item / "adapter_config.json"

            if not (config_file.exists() or adapter_config.exists()):
                continue

            # Extract training metadata from adapter_config.json / config.json
            metadata: dict = {}
            try:
                if adapter_config.exists():
                    cfg = json.loads(adapter_config.read_text())
                    metadata["base_model"] = cfg.get("base_model_name_or_path")
                    metadata["peft_type"] = cfg.get("peft_type")
                    metadata["lora_rank"] = cfg.get("r")
                elif config_file.exists():
                    cfg = json.loads(config_file.read_text())
                    metadata["base_model"] = cfg.get("_name_or_path")
            except Exception:
                pass

            # Fallback: extract base model name from folder name
            # e.g. "unsloth_Llama-3.2-3B-Instruct_1771227800" → "unsloth/Llama-3.2-3B-Instruct"
            if not metadata.get("base_model"):
                parts = item.name.rsplit("_", 1)
                if len(parts) == 2 and parts[1].isdigit():
                    name_part = parts[0]
                    idx = name_part.find("_")
                    if idx > 0:
                        metadata["base_model"] = (
                            name_part[:idx] + "/" + name_part[idx + 1 :]
                        )
                    else:
                        metadata["base_model"] = name_part

            # This is a valid training run
            checkpoints = []

            # Placeholder for the main adapter — loss filled from last checkpoint below
            checkpoints.append((item.name, str(item), None))

            # Scan for intermediate checkpoints (checkpoint-N subdirs)
            for sub in sorted(item.iterdir()):
                if not sub.is_dir() or not sub.name.startswith("checkpoint-"):
                    continue
                sub_config = sub / "config.json"
                sub_adapter = sub / "adapter_config.json"
                if sub_config.exists() or sub_adapter.exists():
                    loss = _read_checkpoint_loss(sub)
                    checkpoints.append((sub.name, str(sub), loss))

            # Assign the last checkpoint's loss to the main adapter entry
            if len(checkpoints) > 1:
                last_checkpoint_loss = checkpoints[-1][2]
                checkpoints[0] = (
                    checkpoints[0][0],
                    checkpoints[0][1],
                    last_checkpoint_loss,
                )

            models.append((item.name, checkpoints, metadata))
            logger.debug(
                f"Found model: {item.name} with {len(checkpoints)} checkpoint(s)"
            )

        # Sort by modification time (newest first)
        models.sort(key = lambda x: Path(x[1][0][1]).stat().st_mtime, reverse = True)

        logger.info(f"Found {len(models)} training runs in {outputs_dir}")
        return models

    except Exception as e:
        logger.error(f"Error scanning checkpoints: {e}")
        return []


================================================
FILE: studio/backend/utils/models/model_config.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Model and LoRA configuration handling
"""

from transformers import AutoConfig
from dataclasses import dataclass
from typing import Optional, Dict, Any
from utils.paths import (
    normalize_path,
    is_local_path,
    is_model_cached,
    outputs_root,
    exports_root,
    resolve_output_dir,
    resolve_export_dir,
)
from utils.utils import without_hf_auth
import structlog
from loggers import get_logger
import os
import subprocess
import sys
from pathlib import Path
from typing import List, Tuple
import json
import yaml


logger = get_logger(__name__)

# Model name mapping: maps all equivalent model names to their canonical YAML config file
# Format: "canonical_model_name.yaml": [list of all equivalent model names]
# Based on the model mapper provided - canonical filename is based on the first model name in the mapper
MODEL_NAME_MAPPING = {
    # ── Embedding models ──
    "unsloth_all-MiniLM-L6-v2.yaml": [
        "unsloth/all-MiniLM-L6-v2",
        "sentence-transformers/all-MiniLM-L6-v2",
    ],
    "unsloth_bge-m3.yaml": [
        "unsloth/bge-m3",
        "BAAI/bge-m3",
    ],
    "unsloth_embeddinggemma-300m.yaml": [
        "unsloth/embeddinggemma-300m",
        "google/embeddinggemma-300m",
    ],
    "unsloth_gte-modernbert-base.yaml": [
        "unsloth/gte-modernbert-base",
        "Alibaba-NLP/gte-modernbert-base",
    ],
    "unsloth_Qwen3-Embedding-0.6B.yaml": [
        "unsloth/Qwen3-Embedding-0.6B",
        "Qwen/Qwen3-Embedding-0.6B",
        "unsloth/Qwen3-Embedding-4B",
        "Qwen/Qwen3-Embedding-4B",
    ],
    # ── Other models ──
    "unsloth_answerdotai_ModernBERT-large.yaml": [
        "answerdotai/ModernBERT-large",
    ],
    "unsloth_Qwen2.5-Coder-7B-Instruct-bnb-4bit.yaml": [
        "unsloth/Qwen2.5-Coder-7B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Coder-7B-Instruct",
        "Qwen/Qwen2.5-Coder-7B-Instruct",
    ],
    "unsloth_codegemma-7b-bnb-4bit.yaml": [
        "unsloth/codegemma-7b-bnb-4bit",
        "unsloth/codegemma-7b",
        "google/codegemma-7b",
    ],
    "unsloth_ERNIE-4.5-21B-A3B-PT.yaml": [
        "unsloth/ERNIE-4.5-21B-A3B-PT",
    ],
    "unsloth_ERNIE-4.5-VL-28B-A3B-PT.yaml": [
        "unsloth/ERNIE-4.5-VL-28B-A3B-PT",
    ],
    "tiiuae_Falcon-H1-0.5B-Instruct.yaml": [
        "tiiuae/Falcon-H1-0.5B-Instruct",
        "unsloth/Falcon-H1-0.5B-Instruct",
    ],
    "unsloth_functiongemma-270m-it.yaml": [
        "unsloth/functiongemma-270m-it-unsloth-bnb-4bit",
        "google/functiongemma-270m-it",
        "unsloth/functiongemma-270m-it-unsloth-bnb-4bit",
    ],
    "unsloth_gemma-2-2b.yaml": [
        "unsloth/gemma-2-2b-bnb-4bit",
        "google/gemma-2-2b",
    ],
    "unsloth_gemma-2-27b-bnb-4bit.yaml": [
        "unsloth/gemma-2-9b-bnb-4bit",
        "unsloth/gemma-2-9b",
        "google/gemma-2-9b",
        "unsloth/gemma-2-27b",
        "google/gemma-2-27b",
    ],
    "unsloth_gemma-3-4b-pt.yaml": [
        "unsloth/gemma-3-4b-pt-unsloth-bnb-4bit",
        "google/gemma-3-4b-pt",
        "unsloth/gemma-3-4b-pt-bnb-4bit",
    ],
    "unsloth_gemma-3-4b-it.yaml": [
        "unsloth/gemma-3-4b-it-unsloth-bnb-4bit",
        "google/gemma-3-4b-it",
        "unsloth/gemma-3-4b-it-bnb-4bit",
    ],
    "unsloth_gemma-3-27b-it.yaml": [
        "unsloth/gemma-3-27b-it-unsloth-bnb-4bit",
        "google/gemma-3-27b-it",
        "unsloth/gemma-3-27b-it-bnb-4bit",
    ],
    "unsloth_gemma-3-270m-it.yaml": [
        "unsloth/gemma-3-270m-it-unsloth-bnb-4bit",
        "google/gemma-3-270m-it",
        "unsloth/gemma-3-270m-it-bnb-4bit",
    ],
    "unsloth_gemma-3n-E4B-it.yaml": [
        "unsloth/gemma-3n-E4B-it-unsloth-bnb-4bit",
        "google/gemma-3n-E4B-it",
        "unsloth/gemma-3n-E4B-it-unsloth-bnb-4bit",
    ],
    "unsloth_gemma-3n-E4B.yaml": [
        "unsloth/gemma-3n-E4B-unsloth-bnb-4bit",
        "google/gemma-3n-E4B",
    ],
    "unsloth_gpt-oss-20b.yaml": [
        "openai/gpt-oss-20b",
        "unsloth/gpt-oss-20b-unsloth-bnb-4bit",
        "unsloth/gpt-oss-20b-BF16",
    ],
    "unsloth_gpt-oss-120b.yaml": [
        "openai/gpt-oss-120b",
        "unsloth/gpt-oss-120b-unsloth-bnb-4bit",
    ],
    "unsloth_granite-4.0-350m-unsloth-bnb-4bit.yaml": [
        "unsloth/granite-4.0-350m",
        "ibm-granite/granite-4.0-350m",
        "unsloth/granite-4.0-350m-bnb-4bit",
    ],
    "unsloth_granite-4.0-h-micro.yaml": [
        "ibm-granite/granite-4.0-h-micro",
        "unsloth/granite-4.0-h-micro-bnb-4bit",
        "unsloth/granite-4.0-h-micro-unsloth-bnb-4bit",
    ],
    "unsloth_LFM2-1.2B.yaml": [
        "unsloth/LFM2-1.2B",
    ],
    "unsloth_llama-3-8b-bnb-4bit.yaml": [
        "unsloth/llama-3-8b",
        "meta-llama/Meta-Llama-3-8B",
    ],
    "unsloth_llama-3-8b-Instruct-bnb-4bit.yaml": [
        "unsloth/llama-3-8b-Instruct",
        "meta-llama/Meta-Llama-3-8B-Instruct",
    ],
    "unsloth_Meta-Llama-3.1-70B-bnb-4bit.yaml": [
        "unsloth/Meta-Llama-3.1-8B-bnb-4bit",
        "unsloth/Meta-Llama-3.1-8B-unsloth-bnb-4bit",
        "meta-llama/Meta-Llama-3.1-8B",
        "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
        "unsloth/Meta-Llama-3.1-8B",
        "unsloth/Meta-Llama-3.1-70B",
        "meta-llama/Meta-Llama-3.1-70B",
        "unsloth/Meta-Llama-3.1-405B-bnb-4bit",
        "meta-llama/Meta-Llama-3.1-405B",
    ],
    "unsloth_Meta-Llama-3.1-8B-Instruct-bnb-4bit.yaml": [
        "unsloth/Meta-Llama-3.1-8B-Instruct-unsloth-bnb-4bit",
        "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
        "meta-llama/Meta-Llama-3.1-8B-Instruct",
        "unsloth/Meta-Llama-3.1-8B-Instruct",
        "RedHatAI/Llama-3.1-8B-Instruct-FP8",
        "unsloth/Llama-3.1-8B-Instruct-FP8-Block",
        "unsloth/Llama-3.1-8B-Instruct-FP8-Dynamic",
    ],
    "unsloth_Llama-3.2-3B-Instruct.yaml": [
        "unsloth/Llama-3.2-3B-Instruct-unsloth-bnb-4bit",
        "meta-llama/Llama-3.2-3B-Instruct",
        "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
        "RedHatAI/Llama-3.2-3B-Instruct-FP8",
        "unsloth/Llama-3.2-3B-Instruct-FP8-Block",
        "unsloth/Llama-3.2-3B-Instruct-FP8-Dynamic",
    ],
    "unsloth_Llama-3.2-1B-Instruct.yaml": [
        "unsloth/Llama-3.2-1B-Instruct-unsloth-bnb-4bit",
        "meta-llama/Llama-3.2-1B-Instruct",
        "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
        "RedHatAI/Llama-3.2-1B-Instruct-FP8",
        "unsloth/Llama-3.2-1B-Instruct-FP8-Block",
        "unsloth/Llama-3.2-1B-Instruct-FP8-Dynamic",
    ],
    "unsloth_Llama-3.2-11B-Vision-Instruct.yaml": [
        "unsloth/Llama-3.2-11B-Vision-Instruct-unsloth-bnb-4bit",
        "meta-llama/Llama-3.2-11B-Vision-Instruct",
        "unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit",
    ],
    "unsloth_Llama-3.3-70B-Instruct.yaml": [
        "unsloth/Llama-3.3-70B-Instruct-unsloth-bnb-4bit",
        "meta-llama/Llama-3.3-70B-Instruct",
        "unsloth/Llama-3.3-70B-Instruct-bnb-4bit",
        "RedHatAI/Llama-3.3-70B-Instruct-FP8",
        "unsloth/Llama-3.3-70B-Instruct-FP8-Block",
        "unsloth/Llama-3.3-70B-Instruct-FP8-Dynamic",
    ],
    "unsloth_Llasa-3B.yaml": [
        "HKUSTAudio/Llasa-1B",
        "unsloth/Llasa-3B",
    ],
    "unsloth_Magistral-Small-2509-unsloth-bnb-4bit.yaml": [
        "unsloth/Magistral-Small-2509",
        "mistralai/Magistral-Small-2509",
        "unsloth/Magistral-Small-2509-bnb-4bit",
    ],
    "unsloth_Ministral-3-3B-Instruct-2512.yaml": [
        "unsloth/Ministral-3-3B-Instruct-2512",
    ],
    "unsloth_mistral-7b-v0.3-bnb-4bit.yaml": [
        "unsloth/mistral-7b-v0.3-bnb-4bit",
        "unsloth/mistral-7b-v0.3",
        "mistralai/Mistral-7B-v0.3",
    ],
    "unsloth_Mistral-Nemo-Base-2407-bnb-4bit.yaml": [
        "unsloth/Mistral-Nemo-Base-2407-bnb-4bit",
        "unsloth/Mistral-Nemo-Base-2407",
        "mistralai/Mistral-Nemo-Base-2407",
        "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
        "unsloth/Mistral-Nemo-Instruct-2407",
        "mistralai/Mistral-Nemo-Instruct-2407",
    ],
    "unsloth_Mistral-Small-Instruct-2409.yaml": [
        "unsloth/Mistral-Small-Instruct-2409-bnb-4bit",
        "mistralai/Mistral-Small-Instruct-2409",
    ],
    "unsloth_mistral-7b-instruct-v0.3-bnb-4bit.yaml": [
        "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
        "unsloth/mistral-7b-instruct-v0.3",
        "mistralai/Mistral-7B-Instruct-v0.3",
    ],
    "unsloth_Qwen2.5-1.5B-Instruct.yaml": [
        "unsloth/Qwen2.5-1.5B-Instruct-unsloth-bnb-4bit",
        "Qwen/Qwen2.5-1.5B-Instruct",
        "unsloth/Qwen2.5-1.5B-Instruct-bnb-4bit",
    ],
    "unsloth_Nemotron-3-Nano-30B-A3B.yaml": [
        "unsloth/Nemotron-3-Nano-30B-A3B",
    ],
    "unsloth_orpheus-3b-0.1-ft.yaml": [
        "unsloth/orpheus-3b-0.1-ft",
        "unsloth/orpheus-3b-0.1-ft-unsloth-bnb-4bit",
        "canopylabs/orpheus-3b-0.1-ft",
        "unsloth/orpheus-3b-0.1-ft-bnb-4bit",
    ],
    "OuteAI_Llama-OuteTTS-1.0-1B.yaml": [
        "OuteAI/Llama-OuteTTS-1.0-1B",
        "unsloth/Llama-OuteTTS-1.0-1B",
        "unsloth/llama-outetts-1.0-1b",
        "OuteAI/OuteTTS-1.0-0.6B",
        "unsloth/OuteTTS-1.0-0.6B",
        "unsloth/outetts-1.0-0.6b",
    ],
    "unsloth_PaddleOCR-VL.yaml": [
        "unsloth/PaddleOCR-VL",
    ],
    "unsloth_Phi-3-medium-4k-instruct.yaml": [
        "unsloth/Phi-3-medium-4k-instruct-bnb-4bit",
        "microsoft/Phi-3-medium-4k-instruct",
    ],
    "unsloth_Phi-3.5-mini-instruct.yaml": [
        "unsloth/Phi-3.5-mini-instruct-bnb-4bit",
        "microsoft/Phi-3.5-mini-instruct",
    ],
    "unsloth_Phi-4.yaml": [
        "unsloth/phi-4-unsloth-bnb-4bit",
        "microsoft/phi-4",
        "unsloth/phi-4-bnb-4bit",
    ],
    "unsloth_Pixtral-12B-2409.yaml": [
        "unsloth/Pixtral-12B-2409-unsloth-bnb-4bit",
        "mistralai/Pixtral-12B-2409",
        "unsloth/Pixtral-12B-2409-bnb-4bit",
    ],
    "unsloth_Qwen2-7B.yaml": [
        "unsloth/Qwen2-7B-bnb-4bit",
        "Qwen/Qwen2-7B",
    ],
    "unsloth_Qwen2-VL-7B-Instruct.yaml": [
        "unsloth/Qwen2-VL-7B-Instruct-unsloth-bnb-4bit",
        "Qwen/Qwen2-VL-7B-Instruct",
        "unsloth/Qwen2-VL-7B-Instruct-bnb-4bit",
    ],
    "unsloth_Qwen2.5-7B.yaml": [
        "unsloth/Qwen2.5-7B-unsloth-bnb-4bit",
        "Qwen/Qwen2.5-7B",
        "unsloth/Qwen2.5-7B-bnb-4bit",
    ],
    "unsloth_Qwen2.5-Coder-1.5B-Instruct.yaml": [
        "unsloth/Qwen2.5-Coder-1.5B-Instruct-bnb-4bit",
        "Qwen/Qwen2.5-Coder-1.5B-Instruct",
    ],
    "unsloth_Qwen2.5-Coder-14B-Instruct.yaml": [
        "unsloth/Qwen2.5-Coder-14B-Instruct-bnb-4bit",
        "Qwen/Qwen2.5-Coder-14B-Instruct",
    ],
    "unsloth_Qwen2.5-VL-7B-Instruct-bnb-4bit.yaml": [
        "unsloth/Qwen2.5-VL-7B-Instruct",
        "Qwen/Qwen2.5-VL-7B-Instruct",
        "unsloth/Qwen2.5-VL-7B-Instruct-unsloth-bnb-4bit",
    ],
    "unsloth_Qwen3-0.6B.yaml": [
        "unsloth/Qwen3-0.6B-unsloth-bnb-4bit",
        "Qwen/Qwen3-0.6B",
        "unsloth/Qwen3-0.6B-bnb-4bit",
        "Qwen/Qwen3-0.6B-FP8",
        "unsloth/Qwen3-0.6B-FP8",
    ],
    "unsloth_Qwen3-4B-Instruct-2507.yaml": [
        "unsloth/Qwen3-4B-Instruct-2507-unsloth-bnb-4bit",
        "Qwen/Qwen3-4B-Instruct-2507",
        "unsloth/Qwen3-4B-Instruct-2507-bnb-4bit",
        "Qwen/Qwen3-4B-Instruct-2507-FP8",
        "unsloth/Qwen3-4B-Instruct-2507-FP8",
    ],
    "unsloth_Qwen3-4B-Thinking-2507.yaml": [
        "unsloth/Qwen3-4B-Thinking-2507-unsloth-bnb-4bit",
        "Qwen/Qwen3-4B-Thinking-2507",
        "unsloth/Qwen3-4B-Thinking-2507-bnb-4bit",
        "Qwen/Qwen3-4B-Thinking-2507-FP8",
        "unsloth/Qwen3-4B-Thinking-2507-FP8",
    ],
    "unsloth_Qwen3-14B-Base-unsloth-bnb-4bit.yaml": [
        "unsloth/Qwen3-14B-Base",
        "Qwen/Qwen3-14B-Base",
        "unsloth/Qwen3-14B-Base-bnb-4bit",
    ],
    "unsloth_Qwen3-14B.yaml": [
        "unsloth/Qwen3-14B-unsloth-bnb-4bit",
        "Qwen/Qwen3-14B",
        "unsloth/Qwen3-14B-bnb-4bit",
        "Qwen/Qwen3-14B-FP8",
        "unsloth/Qwen3-14B-FP8",
    ],
    "unsloth_Qwen3-32B.yaml": [
        "unsloth/Qwen3-32B-unsloth-bnb-4bit",
        "Qwen/Qwen3-32B",
        "unsloth/Qwen3-32B-bnb-4bit",
        "Qwen/Qwen3-32B-FP8",
        "unsloth/Qwen3-32B-FP8",
    ],
    "unsloth_Qwen3-VL-8B-Instruct-unsloth-bnb-4bit.yaml": [
        "Qwen/Qwen3-VL-8B-Instruct-FP8",
        "unsloth/Qwen3-VL-8B-Instruct-FP8",
        "unsloth/Qwen3-VL-8B-Instruct",
        "Qwen/Qwen3-VL-8B-Instruct",
        "unsloth/Qwen3-VL-8B-Instruct-bnb-4bit",
    ],
    "sesame_csm-1b.yaml": [
        "sesame/csm-1b",
        "unsloth/csm-1b",
    ],
    "Spark-TTS-0.5B_LLM.yaml": [
        "Spark-TTS-0.5B/LLM",
        "unsloth/Spark-TTS-0.5B",
    ],
    "unsloth_tinyllama-bnb-4bit.yaml": [
        "unsloth/tinyllama",
        "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T",
    ],
    "unsloth_whisper-large-v3.yaml": [
        "unsloth/whisper-large-v3",
        "openai/whisper-large-v3",
    ],
}

# Reverse mapping for quick lookup: model_name -> canonical_filename
_REVERSE_MODEL_MAPPING = {}
for canonical_file, model_names in MODEL_NAME_MAPPING.items():
    for model_name in model_names:
        _REVERSE_MODEL_MAPPING[model_name.lower()] = canonical_file


def load_model_config(
    model_name: str,
    use_auth: bool = False,
    token: Optional[str] = None,
    trust_remote_code: bool = True,
):
    """
    Load model config with optional authentication control.
    """

    if token:
        # Explicit token provided - use it
        return AutoConfig.from_pretrained(
            model_name, trust_remote_code = trust_remote_code, token = token
        )

    if not use_auth:
        # Load without any authentication (for public model checks)
        with without_hf_auth():
            return AutoConfig.from_pretrained(
                model_name,
                trust_remote_code = trust_remote_code,
                token = None,
            )

    # Use default authentication (cached tokens)
    return AutoConfig.from_pretrained(
        model_name,
        trust_remote_code = trust_remote_code,
    )


# VLM architecture suffixes and known VLM model_type values.
_VLM_ARCH_SUFFIXES = ("ForConditionalGeneration", "ForVisionText2Text")
_VLM_MODEL_TYPES = {
    "phi3_v",
    "llava",
    "llava_next",
    "llava_onevision",
    "internvl_chat",
    "cogvlm2",
    "minicpmv",
}

# Pre-computed .venv_t5 path and backend dir for subprocess version switching.
_VENV_T5_DIR = str(Path.home() / ".unsloth" / "studio" / ".venv_t5")
_BACKEND_DIR = str(Path(__file__).resolve().parent.parent.parent)

# Inline script executed in a subprocess with transformers 5.x activated.
# Receives model_name and token via argv, prints JSON result to stdout.
_VISION_CHECK_SCRIPT = r"""
import sys, os, json
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Activate transformers 5.x
venv_t5 = sys.argv[1]
backend_dir = sys.argv[2]
model_name = sys.argv[3]
token = sys.argv[4] if len(sys.argv) > 4 and sys.argv[4] != "" else None

sys.path.insert(0, venv_t5)
if backend_dir not in sys.path:
    sys.path.insert(0, backend_dir)

try:
    from transformers import AutoConfig
    kwargs = {"trust_remote_code": True}
    if token:
        kwargs["token"] = token
    config = AutoConfig.from_pretrained(model_name, **kwargs)

    is_vlm = False
    if hasattr(config, "architectures"):
        is_vlm = any(
            x.endswith(("ForConditionalGeneration", "ForVisionText2Text"))
            for x in config.architectures
        )
    if not is_vlm and hasattr(config, "vision_config"):
        is_vlm = True
    if not is_vlm and hasattr(config, "img_processor"):
        is_vlm = True
    if not is_vlm and hasattr(config, "image_token_index"):
        is_vlm = True
    if not is_vlm and hasattr(config, "model_type"):
        vlm_types = {"phi3_v","llava","llava_next","llava_onevision",
                      "internvl_chat","cogvlm2","minicpmv"}
        if config.model_type in vlm_types:
            is_vlm = True

    model_type = getattr(config, "model_type", "unknown")
    archs = getattr(config, "architectures", [])
    print(json.dumps({"is_vision": is_vlm, "model_type": model_type,
                       "architectures": archs}))
except Exception as exc:
    print(json.dumps({"error": str(exc)}))
    sys.exit(1)
"""


def _is_vision_model_subprocess(
    model_name: str, hf_token: Optional[str] = None
) -> bool:
    """Run is_vision_model check in a subprocess with transformers 5.x.

    Same pattern as training/inference workers: spawn a clean subprocess
    with .venv_t5/ prepended to sys.path so AutoConfig recognizes newer
    architectures (glm4_moe_lite, etc.).
    """
    token_arg = hf_token or ""

    try:
        result = subprocess.run(
            [
                sys.executable,
                "-c",
                _VISION_CHECK_SCRIPT,
                _VENV_T5_DIR,
                _BACKEND_DIR,
                model_name,
                token_arg,
            ],
            capture_output = True,
            text = True,
            timeout = 60,
        )

        if result.returncode != 0:
            stderr = result.stderr.strip()
            logger.warning(
                "Vision check subprocess failed for '%s': %s",
                model_name,
                stderr or result.stdout.strip(),
            )
            return False

        data = json.loads(result.stdout.strip())
        if "error" in data:
            logger.warning(
                "Vision check subprocess error for '%s': %s",
                model_name,
                data["error"],
            )
            return False

        is_vlm = data["is_vision"]
        logger.info(
            "Vision check (subprocess, transformers 5.x) for '%s': "
            "model_type=%s, architectures=%s, is_vision=%s",
            model_name,
            data.get("model_type"),
            data.get("architectures"),
            is_vlm,
        )
        return is_vlm

    except subprocess.TimeoutExpired:
        logger.warning("Vision check subprocess timed out for '%s'", model_name)
        return False
    except Exception as exc:
        logger.warning("Vision check subprocess failed for '%s': %s", model_name, exc)
        return False


def is_vision_model(model_name: str, hf_token: Optional[str] = None) -> bool:
    """
    Detect vision-language models (VLMs) by checking architecture in config.
    Works for fine-tuned models since they inherit the base architecture.

    For models that require transformers 5.x (e.g. GLM-4.7-Flash), the check
    runs in a subprocess with .venv_t5/ activated — same pattern as the
    training and inference workers.

    Args:
        model_name: Model identifier (HF repo or local path)
        hf_token: Optional HF token for accessing gated/private models
    """
    # Models that need transformers 5.x must be checked in a subprocess
    # because AutoConfig in the main process (transformers 4.57.x) doesn't
    # recognize their architectures.
    from utils.transformers_version import needs_transformers_5

    if needs_transformers_5(model_name):
        logger.info(
            "Model '%s' needs transformers 5.x — checking vision via subprocess",
            model_name,
        )
        return _is_vision_model_subprocess(model_name, hf_token = hf_token)

    try:
        config = load_model_config(model_name, use_auth = True, token = hf_token)

        # Exclude audio-only models that share ForConditionalGeneration suffix
        # (e.g. CsmForConditionalGeneration, WhisperForConditionalGeneration)
        _audio_only_model_types = {"csm", "whisper"}
        model_type = getattr(config, "model_type", None)
        if model_type in _audio_only_model_types:
            return False

        # Check 1: Architecture class name patterns
        if hasattr(config, "architectures"):
            is_vlm = any(x.endswith(_VLM_ARCH_SUFFIXES) for x in config.architectures)
            if is_vlm:
                logger.info(
                    f"Model {model_name} detected as VLM: architecture {config.architectures}"
                )
                return True

        # Check 2: Has vision_config (most VLMs: LLaVA, Gemma-3, Qwen2-VL, etc.)
        if hasattr(config, "vision_config"):
            logger.info(f"Model {model_name} detected as VLM: has vision_config")
            return True

        # Check 3: Has img_processor (Phi-3.5 Vision uses this instead of vision_config)
        if hasattr(config, "img_processor"):
            logger.info(f"Model {model_name} detected as VLM: has img_processor")
            return True

        # Check 4: Has image_token_index (common in VLMs for image placeholder tokens)
        if hasattr(config, "image_token_index"):
            logger.info(f"Model {model_name} detected as VLM: has image_token_index")
            return True

        # Check 5: Known VLM model_type values that may not match above checks
        if hasattr(config, "model_type"):
            if config.model_type in _VLM_MODEL_TYPES:
                logger.info(
                    f"Model {model_name} detected as VLM: model_type={config.model_type}"
                )
                return True

        return False

    except Exception as e:
        logger.warning(f"Could not determine if {model_name} is vision model: {e}")
        return False


VALID_AUDIO_TYPES = ("snac", "csm", "bicodec", "dac", "whisper", "audio_vlm")

# Cache detection results per session to avoid repeated API calls
_audio_detection_cache: Dict[str, Optional[str]] = {}

# Tokenizer token patterns → audio_type (all 6 types detected from tokenizer_config.json)
_AUDIO_TOKEN_PATTERNS = {
    "csm": lambda tokens: "<|AUDIO|>" in tokens and "<|audio_eos|>" in tokens,
    "whisper": lambda tokens: "<|startoftranscript|>" in tokens,
    "audio_vlm": lambda tokens: "<audio_soft_token>" in tokens,
    "bicodec": lambda tokens: any(t.startswith("<|bicodec_") for t in tokens),
    "dac": lambda tokens: "<|audio_start|>" in tokens
    and "<|audio_end|>" in tokens
    and "<|text_start|>" in tokens
    and "<|text_end|>" in tokens,
    "snac": lambda tokens: sum(1 for t in tokens if t.startswith("<custom_token_"))
    > 10000,
}


def detect_audio_type(model_name: str, hf_token: Optional[str] = None) -> Optional[str]:
    """
    Dynamically detect if a model is an audio model and return its type.

    Fully dynamic — works for any model, not just known ones.
    Uses tokenizer_config.json special tokens to detect all 6 audio types.

    Returns: audio_type string ('snac', 'csm', 'bicodec', 'dac', 'whisper', 'audio_vlm') or None.
    """
    if model_name in _audio_detection_cache:
        return _audio_detection_cache[model_name]

    result = _detect_audio_from_tokenizer(model_name, hf_token)

    _audio_detection_cache[model_name] = result
    if result:
        logger.info(f"Model {model_name} detected as audio model: audio_type={result}")
    return result


def _detect_audio_from_tokenizer(
    model_name: str, hf_token: Optional[str] = None
) -> Optional[str]:
    """Detect audio type from tokenizer special tokens (for LLM-based audio models).

    First checks local HF cache, then fetches tokenizer_config.json from HuggingFace.
    Checks added_tokens_decoder for distinctive patterns.
    """

    def _check_token_patterns(tok_config: dict) -> Optional[str]:
        added = tok_config.get("added_tokens_decoder", {})
        if not added:
            return None
        token_contents = [v.get("content", "") for v in added.values()]
        for audio_type, check_fn in _AUDIO_TOKEN_PATTERNS.items():
            if check_fn(token_contents):
                return audio_type
        return None

    # 1) Check local HF cache first (works for gated/offline models)
    try:
        from huggingface_hub.constants import HF_HUB_CACHE

        cache_dir = Path(HF_HUB_CACHE)
        repo_dir_name = f"models--{model_name.replace('/', '--')}"
        repo_dir = cache_dir / repo_dir_name
        if repo_dir.exists():
            snapshots_dir = repo_dir / "snapshots"
            if snapshots_dir.exists():
                for snapshot in snapshots_dir.iterdir():
                    for tok_path in [
                        "tokenizer_config.json",
                        "LLM/tokenizer_config.json",
                    ]:
                        tok_file = snapshot / tok_path
                        if tok_file.exists():
                            tok_config = json.loads(tok_file.read_text())
                            result = _check_token_patterns(tok_config)
                            if result:
                                return result
    except Exception as e:
        logger.debug(f"Could not check local cache for {model_name}: {e}")

    # 2) Fall back to HuggingFace API
    try:
        import requests
        import os

        paths_to_try = ["tokenizer_config.json", "LLM/tokenizer_config.json"]
        # Use provided token, or fall back to env
        token = hf_token or os.environ.get("HF_TOKEN")
        headers = {}
        if token:
            headers["Authorization"] = f"Bearer {token}"

        for tok_path in paths_to_try:
            url = f"https://huggingface.co/{model_name}/resolve/main/{tok_path}"
            resp = requests.get(url, headers = headers, timeout = 15)
            if not resp.ok:
                continue

            tok_config = resp.json()
            result = _check_token_patterns(tok_config)
            if result:
                return result

        return None
    except Exception as e:
        logger.debug(
            f"Could not detect audio type from tokenizer for {model_name}: {e}"
        )
        return None


def is_audio_input_type(audio_type: Optional[str]) -> bool:
    """Check if an audio_type accepts audio input (ASR/speech understanding).

    Whisper (ASR) and audio_vlm (Gemma3n) accept audio input.
    """
    return audio_type in ("whisper", "audio_vlm")


def _is_mmproj(filename: str) -> bool:
    """Check if a GGUF filename is a vision projection (mmproj) file."""
    return "mmproj" in filename.lower()


def detect_mmproj_file(path: str) -> Optional[str]:
    """
    Find the mmproj (vision projection) GGUF file in a directory.

    Args:
        path: Directory to search — or a .gguf file (uses its parent dir).

    Returns:
        Full path to the mmproj .gguf file, or None if not found.
    """
    p = Path(path)
    search_dir = p.parent if p.is_file() else p
    if not search_dir.is_dir():
        return None

    for f in search_dir.glob("*.gguf"):
        if _is_mmproj(f.name):
            return str(f.resolve())
    return None


def detect_gguf_model(path: str) -> Optional[str]:
    """
    Check if the given local path is or contains a GGUF model file.

    Handles two cases:
    1. path is a direct .gguf file path
    2. path is a directory containing .gguf files

    Skips mmproj (vision projection) files — those must be passed via
    ``--mmproj``, not ``-m``.  Use :func:`detect_mmproj_file` instead.

    Returns the full path to the .gguf file if found, None otherwise.
    For HuggingFace repo detection, use detect_gguf_model_remote() instead.
    """
    p = Path(path)

    # Case 1: direct .gguf file
    if p.suffix == ".gguf" and p.is_file():
        if _is_mmproj(p.name):
            return None
        return str(p.resolve())

    # Case 2: directory containing .gguf files (skip mmproj)
    if p.is_dir():
        gguf_files = sorted(
            (f for f in p.glob("*.gguf") if not _is_mmproj(f.name)),
            key = lambda f: f.stat().st_size,
            reverse = True,
        )
        if gguf_files:
            return str(gguf_files[0].resolve())

    return None


# Preferred GGUF quantization levels, in descending priority.
# Q4_K_M is a good default: small, fast, acceptable quality.
# UD (Unsloth Dynamic) variants are always preferred over standard quants
# because they provide better quality per bit. If the repo has no UD variants
# (e.g., bartowski repos), the standard quants are used as fallback.
# Ordered by best size/quality tradeoff, not raw quality.
_GGUF_QUANT_PREFERENCE = [
    # UD variants (best quality per bit) -- Q4 is the sweet spot
    "UD-Q4_K_XL",
    "UD-Q4_K_L",
    "UD-Q5_K_XL",
    "UD-Q3_K_XL",
    "UD-Q6_K_XL",
    "UD-Q6_K_S",
    "UD-Q8_K_XL",
    "UD-Q2_K_XL",
    "UD-IQ4_NL",
    "UD-IQ4_XS",
    "UD-IQ3_S",
    "UD-IQ3_XXS",
    "UD-IQ2_M",
    "UD-IQ2_XXS",
    "UD-IQ1_M",
    "UD-IQ1_S",
    # Standard quants (fallback for non-Unsloth repos)
    "Q4_K_M",
    "Q4_K_S",
    "Q5_K_M",
    "Q5_K_S",
    "Q6_K",
    "Q8_0",
    "Q3_K_M",
    "Q3_K_L",
    "Q3_K_S",
    "Q2_K",
    "Q2_K_L",
    "IQ4_NL",
    "IQ4_XS",
    "IQ3_M",
    "IQ3_XXS",
    "IQ2_M",
    "IQ1_M",
    "F16",
    "BF16",
    "F32",
]


def _pick_best_gguf(filenames: list[str]) -> Optional[str]:
    """
    Pick the best GGUF file from a list of filenames.

    Prefers quantization levels in _GGUF_QUANT_PREFERENCE order.
    Falls back to the first .gguf file found.
    """
    gguf_files = [f for f in filenames if f.endswith(".gguf")]
    if not gguf_files:
        return None

    # Try preferred quantization levels
    for quant in _GGUF_QUANT_PREFERENCE:
        for f in gguf_files:
            if quant in f:
                return f

    # Fallback: first GGUF file
    return gguf_files[0]


@dataclass
class GgufVariantInfo:
    """A single GGUF quantization variant from a HuggingFace repo."""

    filename: str  # e.g., "gemma-3-4b-it-Q4_K_M.gguf"
    quant: str  # e.g., "Q4_K_M" (extracted from filename)
    size_bytes: int  # file size


def _extract_quant_label(filename: str) -> str:
    """
    Extract quantization label like Q4_K_M, IQ4_XS, BF16 from a GGUF filename.

    Examples:
        "gemma-3-4b-it-Q4_K_M.gguf"          → "Q4_K_M"
        "model-IQ4_NL.gguf"                   → "IQ4_NL"
        "model-BF16.gguf"                     → "BF16"
        "model-UD-IQ1_S.gguf"                 → "UD-IQ1_S"
        "model-UD-TQ1_0.gguf"                 → "UD-TQ1_0"
        "MXFP4_MOE/model-MXFP4_MOE-0001.gguf"→ "MXFP4_MOE"
    """
    import re

    # Use only the basename (rfilename may include directory)
    basename = filename.rsplit("/", 1)[-1]
    # Strip .gguf and any shard suffix (-00001-of-00010)
    stem = re.sub(r"-\d{3,}-of-\d{3,}", "", basename.rsplit(".", 1)[0])
    # Match known quantization patterns
    match = re.search(
        r"(UD-)?"  # Optional UD- prefix (Ultra Discrete)
        r"(MXFP[0-9]+(?:_[A-Z0-9]+)*"  # MXFP variants: MXFP4, MXFP4_MOE
        r"|IQ[0-9]+_[A-Z]+(?:_[A-Z0-9]+)?"  # IQ variants: IQ4_XS, IQ4_NL, IQ1_S
        r"|TQ[0-9]+_[0-9]+"  # Ternary quant: TQ1_0, TQ2_0
        r"|Q[0-9]+_K_[A-Z]+"  # K-quant: Q4_K_M, Q3_K_S
        r"|Q[0-9]+_[0-9]+"  # Standard: Q8_0, Q5_1
        r"|Q[0-9]+_K"  # Short K-quant: Q6_K
        r"|BF16|F16|F32)",  # Full precision
        stem,
        re.IGNORECASE,
    )
    if match:
        prefix = match.group(1) or ""
        return f"{prefix}{match.group(2)}"
    # Fallback: last segment after hyphen
    return stem.split("-")[-1]


def list_gguf_variants(
    repo_id: str,
    hf_token: Optional[str] = None,
) -> tuple[list[GgufVariantInfo], bool]:
    """
    List all GGUF quantization variants in a HuggingFace repo.

    Separates main model files from mmproj (vision projection) files.
    The presence of mmproj files indicates a vision-capable model.

    Returns:
        (variants, has_vision): list of non-mmproj GGUF variants + vision flag.
    """
    from huggingface_hub import model_info as hf_model_info

    info = hf_model_info(repo_id, token = hf_token, files_metadata = True)
    variants: list[GgufVariantInfo] = []
    has_vision = False

    quant_totals: dict[str, int] = {}  # quant -> total bytes
    quant_first_file: dict[str, str] = {}  # quant -> first filename (for display)

    for sibling in info.siblings:
        fname = sibling.rfilename
        if not fname.endswith(".gguf"):
            continue
        size = sibling.size or 0

        # mmproj files are vision projection models, not main model files
        if "mmproj" in fname.lower():
            has_vision = True
            continue

        quant = _extract_quant_label(fname)
        quant_totals[quant] = quant_totals.get(quant, 0) + size
        if quant not in quant_first_file:
            quant_first_file[quant] = fname

    for quant, total_size in quant_totals.items():
        variants.append(
            GgufVariantInfo(
                filename = quant_first_file[quant],
                quant = quant,
                size_bytes = total_size,
            )
        )

    # Sort by size descending (largest = best quality first).
    # Recommended pinning and OOM demotion are handled client-side
    # where GPU VRAM info is available.
    variants.sort(key = lambda v: -v.size_bytes)

    return variants, has_vision


def detect_gguf_model_remote(
    repo_id: str,
    hf_token: Optional[str] = None,
) -> Optional[str]:
    """
    Check if a HuggingFace repo contains GGUF files.

    Returns the filename of the best GGUF file in the repo, or None.
    """
    try:
        from huggingface_hub import model_info as hf_model_info

        info = hf_model_info(repo_id, token = hf_token)
        repo_files = [s.rfilename for s in info.siblings]
        return _pick_best_gguf(repo_files)
    except Exception as e:
        logger.debug(f"Could not check GGUF files for '{repo_id}': {e}")
        return None


def download_gguf_file(
    repo_id: str,
    filename: str,
    hf_token: Optional[str] = None,
) -> str:
    """
    Download a specific GGUF file from a HuggingFace repo.

    Returns the local path to the downloaded file.
    """
    from huggingface_hub import hf_hub_download

    local_path = hf_hub_download(
        repo_id = repo_id,
        filename = filename,
        token = hf_token,
    )
    return local_path


# Cache embedding detection results per session to avoid repeated HF API calls
_embedding_detection_cache: Dict[tuple, bool] = {}


def is_embedding_model(model_name: str, hf_token: Optional[str] = None) -> bool:
    """
    Detect embedding/sentence-transformer models using HuggingFace model metadata.

    Uses a belt-and-suspenders approach combining three signals:
      1. "sentence-transformers" in model tags
      2. "feature-extraction" in model tags
      3. pipeline_tag is "sentence-similarity" or "feature-extraction"

    This catches all known embedding models including those like gte-modernbert
    whose library_name is "transformers" rather than "sentence-transformers".

    Args:
        model_name: Model identifier (HF repo or local path)
        hf_token: Optional HF token for accessing gated/private models

    Returns:
        True if the model is an embedding model, False otherwise.
        Defaults to False for local paths or on errors.
    """
    cache_key = (model_name, hf_token)
    if cache_key in _embedding_detection_cache:
        return _embedding_detection_cache[cache_key]

    # Local paths: check for sentence-transformer marker file (modules.json)
    if is_local_path(model_name):
        local_dir = normalize_path(model_name)
        is_emb = os.path.isfile(os.path.join(local_dir, "modules.json"))
        _embedding_detection_cache[cache_key] = is_emb
        return is_emb

    try:
        from huggingface_hub import model_info as hf_model_info

        info = hf_model_info(model_name, token = hf_token)
        tags = set(info.tags or [])
        pipeline_tag = info.pipeline_tag or ""

        is_emb = (
            "sentence-transformers" in tags
            or "feature-extraction" in tags
            or pipeline_tag in ("sentence-similarity", "feature-extraction")
        )

        _embedding_detection_cache[cache_key] = is_emb
        if is_emb:
            logger.info(
                f"Model {model_name} detected as embedding model: "
                f"pipeline_tag={pipeline_tag}, "
                f"sentence-transformers in tags={('sentence-transformers' in tags)}, "
                f"feature-extraction in tags={('feature-extraction' in tags)}"
            )
        return is_emb

    except Exception as e:
        logger.warning(f"Could not determine if {model_name} is embedding model: {e}")
        _embedding_detection_cache[cache_key] = False
        return False


def scan_trained_loras(outputs_dir: str = str(outputs_root())) -> List[Tuple[str, str]]:
    """
    Scan outputs folder for trained LoRA adapters.

    Returns:
        List of tuples: [(display_name, adapter_path), ...]

    Example:
        [
            ("unsloth_Meta-Llama-3.1_...", "./outputs/unsloth_Meta-Llama-3.1_.../"),
            ("my_finetuned_model", "./outputs/my_finetuned_model/"),
        ]
    """
    trained_loras = []
    outputs_path = resolve_output_dir(outputs_dir)

    if not outputs_path.exists():
        logger.warning(f"Outputs directory not found: {outputs_dir}")
        return trained_loras

    try:
        for item in outputs_path.iterdir():
            if item.is_dir():
                # Check if this directory contains a LoRA adapter
                adapter_config = item / "adapter_config.json"
                adapter_model = item / "adapter_model.safetensors"

                if adapter_config.exists() or adapter_model.exists():
                    display_name = item.name
                    adapter_path = str(item)
                    trained_loras.append((display_name, adapter_path))
                    logger.debug(f"Found trained LoRA: {display_name}")

        # Sort by modification time (newest first)
        trained_loras.sort(key = lambda x: Path(x[1]).stat().st_mtime, reverse = True)

        logger.info(
            f"Found {len(trained_loras)} trained LoRA adapters in {outputs_dir}"
        )
        return trained_loras

    except Exception as e:
        logger.error(f"Error scanning outputs folder: {e}")
        return []


def scan_exported_models(
    exports_dir: str = str(exports_root()),
) -> List[Tuple[str, str, str, Optional[str]]]:
    """
    Scan exports folder for exported models (merged, LoRA, GGUF).

    Supports two directory layouts:
      - Two-level: {run}/{checkpoint}/  (merged & LoRA exports)
      - Flat:      {name}-finetune-gguf/  (GGUF exports)

    Returns:
        List of tuples: [(display_name, model_path, export_type, base_model), ...]
        export_type: "lora" | "merged" | "gguf"
    """
    results = []
    exports_path = resolve_export_dir(exports_dir)

    if not exports_path.exists():
        return results

    try:
        for run_dir in exports_path.iterdir():
            if not run_dir.is_dir():
                continue

            # Check for flat GGUF export (e.g. exports/gemma-3-4b-it-finetune-gguf/)
            # Filter out mmproj (vision projection) files — they aren't loadable as main models
            gguf_files = [f for f in run_dir.glob("*.gguf") if not _is_mmproj(f.name)]
            if gguf_files:
                base_model = None
                export_meta = run_dir / "export_metadata.json"
                try:
                    if export_meta.exists():
                        meta = json.loads(export_meta.read_text())
                        base_model = meta.get("base_model")
                except Exception:
                    pass

                display_name = run_dir.name
                model_path = str(gguf_files[0])  # path to the .gguf file
                results.append((display_name, model_path, "gguf", base_model))
                logger.debug(f"Found GGUF export: {display_name}")
                continue

            # Two-level: {run}/{checkpoint}/
            for checkpoint_dir in run_dir.iterdir():
                if not checkpoint_dir.is_dir():
                    continue

                adapter_config = checkpoint_dir / "adapter_config.json"
                config_file = checkpoint_dir / "config.json"
                has_weights = any(checkpoint_dir.glob("*.safetensors")) or any(
                    checkpoint_dir.glob("*.bin")
                )
                has_gguf = any(checkpoint_dir.glob("*.gguf"))

                base_model = None
                export_type = None

                if adapter_config.exists():
                    export_type = "lora"
                    try:
                        cfg = json.loads(adapter_config.read_text())
                        base_model = cfg.get("base_model_name_or_path")
                    except Exception:
                        pass
                elif config_file.exists() and has_weights:
                    export_type = "merged"
                    export_meta = checkpoint_dir / "export_metadata.json"
                    try:
                        if export_meta.exists():
                            meta = json.loads(export_meta.read_text())
                            base_model = meta.get("base_model")
                    except Exception:
                        pass
                elif has_gguf:
                    export_type = "gguf"
                    gguf_list = list(checkpoint_dir.glob("*.gguf"))
                    # Check checkpoint_dir first, then fall back to parent run_dir
                    # (export.py writes metadata to the top-level export directory)
                    for meta_dir in (checkpoint_dir, run_dir):
                        export_meta = meta_dir / "export_metadata.json"
                        try:
                            if export_meta.exists():
                                meta = json.loads(export_meta.read_text())
                                base_model = meta.get("base_model")
                                if base_model:
                                    break
                        except Exception:
                            pass

                    display_name = f"{run_dir.name} / {checkpoint_dir.name}"
                    model_path = str(gguf_list[0]) if gguf_list else str(checkpoint_dir)
                    results.append((display_name, model_path, export_type, base_model))
                    logger.debug(f"Found GGUF export: {display_name}")
                    continue
                else:
                    continue

                # Fallback: read base model from the original training run's
                # adapter_config.json in ./outputs/{run_name}/
                if not base_model:
                    outputs_adapter_cfg = (
                        resolve_output_dir(run_dir.name) / "adapter_config.json"
                    )
                    try:
                        if outputs_adapter_cfg.exists():
                            cfg = json.loads(outputs_adapter_cfg.read_text())
                            base_model = cfg.get("base_model_name_or_path")
                    except Exception:
                        pass

                display_name = f"{run_dir.name} / {checkpoint_dir.name}"
                model_path = str(checkpoint_dir)
                results.append((display_name, model_path, export_type, base_model))
                logger.debug(f"Found exported model: {display_name} ({export_type})")

        results.sort(key = lambda x: Path(x[1]).stat().st_mtime, reverse = True)
        logger.info(f"Found {len(results)} exported models in {exports_dir}")
        return results

    except Exception as e:
        logger.error(f"Error scanning exports folder: {e}")
        return []


def get_base_model_from_lora(lora_path: str) -> Optional[str]:
    """
    Read the base model name from a LoRA adapter's config.

    Args:
        lora_path: Path to the LoRA adapter directory

    Returns:
        Base model identifier (e.g., "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit")
        or None if not found

    Example:
        >>> get_base_model_from_lora("./outputs/unsloth_Meta-Llama-3.1_.../")
        "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
    """
    try:
        lora_path_obj = Path(lora_path)

        # Try adapter_config.json first
        adapter_config_path = lora_path_obj / "adapter_config.json"
        if adapter_config_path.exists():
            with open(adapter_config_path, "r") as f:
                config = json.load(f)
                base_model = config.get("base_model_name_or_path")
                if base_model:
                    logger.info(
                        f"Detected base model from adapter_config.json: {base_model}"
                    )
                    return base_model

        # Fallback: try training_args.bin (requires torch)
        training_args_path = lora_path_obj / "training_args.bin"
        if training_args_path.exists():
            try:
                import torch

                training_args = torch.load(training_args_path)
                if hasattr(training_args, "model_name_or_path"):
                    base_model = training_args.model_name_or_path
                    logger.info(
                        f"Detected base model from training_args.bin: {base_model}"
                    )
                    return base_model
            except Exception as e:
                logger.warning(f"Could not load training_args.bin: {e}")

        # Last resort: parse from directory name
        # Format: unsloth_Meta-Llama-3.1-8B-Instruct-bnb-4bit_timestamp
        dir_name = lora_path_obj.name
        if dir_name.startswith("unsloth_"):
            # Remove timestamp suffix (usually _1234567890)
            parts = dir_name.split("_")
            # Reconstruct model name
            if len(parts) >= 2:
                model_parts = parts[1:-1]  # Skip "unsloth" and timestamp
                base_model = "unsloth/" + "_".join(model_parts)
                logger.info(f"Detected base model from directory name: {base_model}")
                return base_model

        logger.warning(f"Could not detect base model for LoRA: {lora_path}")
        return None

    except Exception as e:
        logger.error(f"Error reading base model from LoRA config: {e}")
        return None


# Status indicators that appear in UI dropdowns
UI_STATUS_INDICATORS = [" (Ready)", " (Loading...)", " (Active)", "↓ "]


def load_model_defaults(model_name: str) -> Dict[str, Any]:
    """
    Load default training parameters for a model from YAML file.

    Args:
        model_name: Model identifier (e.g., "unsloth/Meta-Llama-3.1-8B-bnb-4bit")

    Returns:
        Dictionary with default parameters from YAML file, or empty dict if not found

    The function looks for a YAML file in configs/model_defaults/ (including subfolders)
    based on the model name or its aliases from MODEL_NAME_MAPPING.
    If no specific file exists, it falls back to default.yaml.
    """
    try:
        # Get the script directory to locate configs
        script_dir = Path(__file__).parent.parent.parent
        defaults_dir = script_dir / "assets" / "configs" / "model_defaults"

        # First, check if model is in the mapping
        if model_name.lower() in _REVERSE_MODEL_MAPPING:
            canonical_file = _REVERSE_MODEL_MAPPING[model_name.lower()]
            # Search in subfolders and root
            for config_path in defaults_dir.rglob(canonical_file):
                if config_path.is_file():
                    with open(config_path, "r", encoding = "utf-8") as f:
                        config = yaml.safe_load(f) or {}
                        logger.info(
                            f"Loaded model defaults from {config_path} (via mapping)"
                        )
                        return config

        # If model_name is a local path (e.g. /home/.../Spark-TTS-0.5B/LLM from
        # adapter_config.json), try matching the last 1-2 path components against
        # the registry (e.g. "Spark-TTS-0.5B/LLM").
        if model_name not in _REVERSE_MODEL_MAPPING and (
            model_name.startswith("/") or model_name.startswith(".")
        ):
            parts = Path(model_name).parts
            for depth in [2, 1]:
                if len(parts) >= depth:
                    suffix = "/".join(parts[-depth:])
                    if suffix in _REVERSE_MODEL_MAPPING:
                        canonical_file = _REVERSE_MODEL_MAPPING[suffix]
                        for config_path in defaults_dir.rglob(canonical_file):
                            if config_path.is_file():
                                with open(config_path, "r", encoding = "utf-8") as f:
                                    config = yaml.safe_load(f) or {}
                                    logger.info(
                                        f"Loaded model defaults from {config_path} (via path suffix '{suffix}')"
                                    )
                                    return config

        # Try exact model name match (for backward compatibility)
        model_filename = model_name.replace("/", "_") + ".yaml"
        # Search in subfolders and root
        for config_path in defaults_dir.rglob(model_filename):
            if config_path.is_file():
                with open(config_path, "r", encoding = "utf-8") as f:
                    config = yaml.safe_load(f) or {}
                    logger.info(f"Loaded model defaults from {config_path}")
                    return config

        # Fall back to default.yaml
        default_config_path = defaults_dir / "default.yaml"
        if default_config_path.exists():
            with open(default_config_path, "r", encoding = "utf-8") as f:
                config = yaml.safe_load(f) or {}
                logger.info(f"Loaded default model defaults from {default_config_path}")
                return config

        logger.warning(f"No default config found for model {model_name}")
        return {}

    except Exception as e:
        logger.error(f"Error loading model defaults for {model_name}: {e}")
        return {}


@dataclass
class ModelConfig:
    """Configuration for a model to load"""

    identifier: str  # Clean model identifier (org/name or path)
    display_name: str  # Original UI display name
    path: str  # Normalized filesystem path
    is_local: bool  # Is this a local file vs HF model?
    is_cached: bool  # Is this already in HF cache?
    is_vision: bool  # Is this a vision model?
    is_lora: bool  # Is this a lora adapter?
    is_gguf: bool = False  # Is this a GGUF model?
    is_audio: bool = False  # Is this a TTS audio model?
    audio_type: Optional[str] = (
        None  # Audio codec type: 'snac', 'csm', 'bicodec', 'dac'
    )
    has_audio_input: bool = False  # Accepts audio input (ASR/speech understanding)
    gguf_file: Optional[str] = None  # Full path to the .gguf file (local mode)
    gguf_mmproj_file: Optional[str] = (
        None  # Full path to the mmproj .gguf file (vision projection)
    )
    gguf_hf_repo: Optional[str] = (
        None  # HF repo ID for -hf mode (e.g. "unsloth/gemma-3-4b-it-GGUF")
    )
    gguf_variant: Optional[str] = None  # Quantization variant (e.g. "Q4_K_M")
    base_model: Optional[str] = None  # Base model (for LoRAs)

    @classmethod
    def from_lora_path(
        cls, lora_path: str, hf_token: Optional[str] = None
    ) -> Optional["ModelConfig"]:
        """
        Create ModelConfig from a local LoRA adapter path.

        Automatically detects the base model from adapter config.

        Args:
            lora_path: Path to LoRA adapter (e.g., "./outputs/unsloth_Meta-Llama-3.1_.../")
            hf_token: HF token for vision detection

        Returns:
            ModelConfig for the LoRA adapter
        """
        try:
            lora_path_obj = Path(lora_path)

            if not lora_path_obj.exists():
                logger.error(f"LoRA path does not exist: {lora_path}")
                return None

            # Get base model
            base_model = get_base_model_from_lora(lora_path)
            if not base_model:
                logger.error(f"Could not determine base model for LoRA: {lora_path}")
                return None

            # Check if base model is vision
            is_vision = is_vision_model(base_model, hf_token = hf_token)

            # Check if base model is audio
            audio_type = detect_audio_type(base_model, hf_token = hf_token)

            display_name = lora_path_obj.name
            identifier = lora_path  # Use path as identifier for local LoRAs

            return cls(
                identifier = identifier,
                display_name = display_name,
                path = lora_path,
                is_local = True,
                is_cached = True,  # Local LoRAs are always "cached"
                is_vision = is_vision,
                is_lora = True,
                is_audio = audio_type is not None and audio_type != "audio_vlm",
                audio_type = audio_type,
                has_audio_input = is_audio_input_type(audio_type),
                base_model = base_model,
            )

        except Exception as e:
            logger.error(f"Error creating ModelConfig from LoRA path: {e}")
            return None

    @classmethod
    def from_identifier(
        cls,
        model_id: str,
        hf_token: Optional[str] = None,
        is_lora: bool = False,
        gguf_variant: Optional[str] = None,
    ) -> Optional["ModelConfig"]:
        """
        Create ModelConfig from a clean model identifier.

        For FastAPI routes where the frontend sends sanitized model paths.
        No Gradio dropdown parsing - expects clean identifiers like:
        - "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
        - "./outputs/my_lora_adapter"
        - "/absolute/path/to/model"

        Args:
            model_id: Clean model identifier (HF repo name or local path)
            hf_token: Optional HF token for vision detection on gated models
            is_lora: Whether this is a LoRA adapter
            gguf_variant: Optional GGUF quantization variant (e.g. "Q4_K_M").
                For remote GGUF repos, specifies which quant to load via -hf.
                If None, auto-selects using _pick_best_gguf().

        Returns:
            ModelConfig or None if configuration cannot be created
        """
        if not model_id or not model_id.strip():
            return None

        identifier = model_id.strip()
        is_local = is_local_path(identifier)
        path = normalize_path(identifier) if is_local else identifier

        # Add unsloth/ prefix for shorthand HF models
        if not is_local and "/" not in identifier:
            identifier = f"unsloth/{identifier}"
            path = identifier

        # Enforce lowercase for remote Hugging Face identifiers to prevent cache duplication
        # Hugging Face Hub APIs are case-insensitive remotely, but case-sensitive locally (repo_folder_name).
        if not is_local:
            identifier = identifier.lower()
            path = path.lower()

        # Auto-detect GGUF models (check before LoRA/vision detection)
        if is_local:
            gguf_file = detect_gguf_model(path)
            if gguf_file:
                display_name = Path(gguf_file).stem
                logger.info(f"Detected local GGUF model: {gguf_file}")

                # Detect vision: check if base model is vision, then look for mmproj
                mmproj_file = None
                gguf_is_vision = False
                gguf_dir = Path(gguf_file).parent

                # Determine if this is a vision model from export metadata
                base_is_vision = False
                meta_path = gguf_dir / "export_metadata.json"
                if meta_path.exists():
                    try:
                        meta = json.loads(meta_path.read_text())
                        base = meta.get("base_model")
                        if base and is_vision_model(base, hf_token = hf_token):
                            base_is_vision = True
                            logger.info(f"GGUF base model '{base}' is a vision model")
                    except Exception as e:
                        logger.debug(f"Could not read export metadata: {e}")

                # If vision (or mmproj happens to exist), find the mmproj file
                mmproj_file = detect_mmproj_file(gguf_file)
                if mmproj_file:
                    gguf_is_vision = True
                    logger.info(f"Detected mmproj for vision: {mmproj_file}")
                elif base_is_vision:
                    logger.warning(
                        f"Base model is vision but no mmproj file found in {gguf_dir}"
                    )

                return cls(
                    identifier = identifier,
                    display_name = display_name,
                    path = path,
                    is_local = True,
                    is_cached = True,
                    is_vision = gguf_is_vision,
                    is_lora = False,
                    is_gguf = True,
                    gguf_file = gguf_file,
                    gguf_mmproj_file = mmproj_file,
                )
        else:
            # Check if the HF repo contains GGUF files
            gguf_filename = detect_gguf_model_remote(identifier, hf_token = hf_token)
            if gguf_filename:
                # Preflight: verify llama-server binary exists BEFORE user waits
                # for a multi-GB download that llama-server handles natively
                from core.inference.llama_cpp import LlamaCppBackend

                if not LlamaCppBackend._find_llama_server_binary():
                    raise RuntimeError(
                        "llama-server binary not found — cannot load GGUF models. "
                        "Run setup.sh to build it, or set LLAMA_SERVER_PATH."
                    )

                # Use list_gguf_variants() to detect vision & resolve variant
                variants, has_vision = list_gguf_variants(identifier, hf_token = hf_token)
                variant = gguf_variant
                if not variant:
                    # Auto-select best quantization
                    variant_filenames = [v.filename for v in variants]
                    best = _pick_best_gguf(variant_filenames)
                    if best:
                        variant = _extract_quant_label(best)
                    else:
                        variant = "Q4_K_M"  # Fallback — llama-server's own default

                display_name = f"{identifier.split('/')[-1]} ({variant})"
                logger.info(
                    f"Detected remote GGUF repo '{identifier}', "
                    f"variant={variant}, vision={has_vision}"
                )
                return cls(
                    identifier = identifier,
                    display_name = display_name,
                    path = identifier,
                    is_local = False,
                    is_cached = False,
                    is_vision = has_vision,
                    is_lora = False,
                    is_gguf = True,
                    gguf_file = None,
                    gguf_hf_repo = identifier,
                    gguf_variant = variant,
                )

        # Auto-detect LoRA for local paths (check adapter_config.json on disk)
        if not is_lora and is_local:
            detected_base = get_base_model_from_lora(path)
            if detected_base:
                is_lora = True
                logger.info(
                    f"Auto-detected local LoRA adapter at '{path}' (base: {detected_base})"
                )

        # Auto-detect LoRA for remote HF models (check repo file listing)
        if not is_lora and not is_local:
            try:
                from huggingface_hub import model_info as hf_model_info

                info = hf_model_info(identifier, token = hf_token)
                repo_files = [s.rfilename for s in info.siblings]
                if "adapter_config.json" in repo_files:
                    is_lora = True
                    logger.info(f"Auto-detected remote LoRA adapter: '{identifier}'")
            except Exception as e:
                logger.debug(
                    f"Could not check remote LoRA status for '{identifier}': {e}"
                )

        # Handle LoRA adapters
        base_model = None
        if is_lora:
            if is_local:
                # Local LoRA: read adapter_config.json from disk
                base_model = get_base_model_from_lora(path)
            else:
                # Remote LoRA: download adapter_config.json from HF
                try:
                    from huggingface_hub import hf_hub_download

                    config_path = hf_hub_download(
                        identifier, "adapter_config.json", token = hf_token
                    )
                    with open(config_path, "r") as f:
                        adapter_config = json.load(f)
                    base_model = adapter_config.get("base_model_name_or_path")
                    if base_model:
                        logger.info(f"Resolved remote LoRA base model: '{base_model}'")
                except Exception as e:
                    logger.warning(
                        f"Could not download adapter_config.json for '{identifier}': {e}"
                    )

            if not base_model:
                logger.warning(f"Could not determine base model for LoRA '{path}'")
                return None
            check_model = base_model
        else:
            check_model = identifier

        vision = is_vision_model(check_model, hf_token = hf_token)
        audio_type_val = detect_audio_type(check_model, hf_token = hf_token)
        has_audio_in = is_audio_input_type(audio_type_val)

        display_name = Path(path).name if is_local else identifier.split("/")[-1]

        return cls(
            identifier = identifier,
            display_name = display_name,
            path = path,
            is_local = is_local,
            is_cached = is_model_cached(identifier) if not is_local else True,
            is_vision = vision,
            is_lora = is_lora,
            is_audio = audio_type_val is not None and audio_type_val != "audio_vlm",
            audio_type = audio_type_val,
            has_audio_input = has_audio_in,
            base_model = base_model,
        )

    @classmethod
    def from_ui_selection(
        cls,
        dropdown_value: Optional[str],
        search_value: Optional[str],
        local_models: list = None,
        hf_token: Optional[str] = None,
        is_lora: bool = False,
    ) -> Optional["ModelConfig"]:
        """
        Create a universal ModelConfig from UI dropdown/search selections.
        Handles base models and LoRA adapters.
        """
        selected = None
        if search_value and search_value.strip():
            selected = search_value.strip()
        elif dropdown_value:
            selected = dropdown_value

        if not selected:
            return None

        display_name = selected

        #  Use the correct 'local_models' parameter to resolve display names
        if " (Active)" in selected or " (Ready)" in selected:
            clean_display_name = selected.replace(" (Active)", "").replace(
                " (Ready)", ""
            )
            if local_models:
                for local_display, local_path in local_models:
                    if local_display == clean_display_name:
                        selected = local_path
                        break

        # Clean all UI status indicators to get the final identifier
        identifier = selected
        for status in UI_STATUS_INDICATORS:
            identifier = identifier.replace(status, "")
        identifier = identifier.strip()

        is_local = is_local_path(identifier)
        path = normalize_path(identifier) if is_local else identifier

        # Add unsloth/ prefix for shorthand HF models
        if not is_local and "/" not in identifier:
            identifier = f"unsloth/{identifier}"
            path = identifier

        # --- Logic for Base Model and Vision Detection ---
        base_model = None
        is_vision = False

        if is_lora:
            # For a LoRA, we MUST find its base model.
            base_model = get_base_model_from_lora(path)
            if not base_model:
                logger.warning(
                    f"Could not determine base model for LoRA '{path}'. Cannot create config."
                )
                return None  # Cannot proceed without a base model

            # A LoRA's vision capability is determined by its base model.
            is_vision = is_vision_model(base_model, hf_token = hf_token)
        else:
            # For a base model, just check its own vision status.
            is_vision = is_vision_model(identifier, hf_token = hf_token)

        from utils.paths import is_model_cached

        is_cached = is_model_cached(identifier) if not is_local else True

        return cls(
            identifier = identifier,
            display_name = display_name,
            path = path,
            is_local = is_local,
            is_cached = is_cached,
            is_vision = is_vision,
            is_lora = is_lora,
            base_model = base_model,  # This will be None for base models, and populated for LoRAs
        )


================================================
FILE: studio/backend/utils/paths/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Path utilities for model and dataset handling
"""

from .path_utils import normalize_path, is_local_path, is_model_cached, get_cache_path
from .storage_roots import (
    studio_root,
    assets_root,
    datasets_root,
    dataset_uploads_root,
    recipe_datasets_root,
    outputs_root,
    exports_root,
    auth_root,
    auth_db_path,
    tmp_root,
    seed_uploads_root,
    unstructured_seed_cache_root,
    oxc_validator_tmp_root,
    tensorboard_root,
    ensure_dir,
    ensure_studio_directories,
    resolve_under_root,
    resolve_output_dir,
    resolve_export_dir,
    resolve_tensorboard_dir,
    resolve_dataset_path,
)

__all__ = [
    "normalize_path",
    "is_local_path",
    "is_model_cached",
    "get_cache_path",
    "studio_root",
    "assets_root",
    "datasets_root",
    "dataset_uploads_root",
    "recipe_datasets_root",
    "outputs_root",
    "exports_root",
    "auth_root",
    "auth_db_path",
    "tmp_root",
    "seed_uploads_root",
    "unstructured_seed_cache_root",
    "oxc_validator_tmp_root",
    "tensorboard_root",
    "ensure_dir",
    "ensure_studio_directories",
    "resolve_under_root",
    "resolve_output_dir",
    "resolve_export_dir",
    "resolve_tensorboard_dir",
    "resolve_dataset_path",
]


================================================
FILE: studio/backend/utils/paths/path_utils.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Path utilities for model and dataset handling
"""

import os
from pathlib import Path
from typing import Optional
import structlog
from loggers import get_logger

logger = get_logger(__name__)


def normalize_path(path: str) -> str:
    """
    Convert Windows paths to WSL format if needed.

    Examples:
        C:\\Users\\... -> /mnt/c/Users/...
        /home/user/... -> /home/user/... (unchanged)
    """
    if not path:
        return path

    # Handle Windows drive letters (C:\\ or c:\\)
    if len(path) >= 3 and path[1] == ":" and path[2] in ("\\", "/"):
        drive = path[0].lower()
        rest = path[3:].replace("\\", "/")
        return f"/mnt/{drive}/{rest}"

    # Already Unix-style or relative
    return path.replace("\\", "/")


def is_local_path(path: str) -> bool:
    """
    Check if path is a local filesystem path vs HuggingFace model identifier.

    Examples:
        True: /home/user/model, C:\\models, ./model, ~/model
        False: unsloth/llama-3.1-8b, microsoft/phi-2
    """
    if not path:
        return False

    # If it exists on disk, treat as local (covers relative paths like "outputs/foo").
    try:
        if Path(normalize_path(path)).expanduser().exists():
            return True
    except Exception:
        pass

    # Obvious HF patterns
    if path.count("/") == 1 and not path.startswith(("/", ".", "~")):
        return False  # Looks like org/model format

    # Filesystem indicators
    return (
        path.startswith(("/", ".", "~"))  # Unix absolute/relative
        or ":" in path  # Windows drive or URL
        or "\\" in path  # Windows separator
        or os.path.isabs(path)  # System-absolute
    )


def get_cache_path(model_name: str) -> Optional[Path]:
    """Get HuggingFace cache path for a model if it exists."""
    cache_dir = Path.home() / ".cache" / "huggingface" / "hub"
    model_cache_name = model_name.replace("/", "--")
    model_cache_path = cache_dir / f"models--{model_cache_name}"

    return model_cache_path if model_cache_path.exists() else None


def is_model_cached(model_name: str) -> bool:
    """Check if model is downloaded in HuggingFace cache."""
    cache_path = get_cache_path(model_name)
    if not cache_path:
        return False

    # Check for actual model files
    for suffix in [".safetensors", ".bin", ".json"]:
        if list(cache_path.rglob(f"*{suffix}")):
            return True

    return False


================================================
FILE: studio/backend/utils/paths/storage_roots.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from __future__ import annotations

import os
from pathlib import Path
import tempfile


def studio_root() -> Path:
    return Path.home() / ".unsloth" / "studio"


def cache_root() -> Path:
    """Central cache directory for all studio downloads (models, datasets, etc.)."""
    return Path.home() / ".unsloth" / "studio" / "cache"


def assets_root() -> Path:
    return studio_root() / "assets"


def datasets_root() -> Path:
    return assets_root() / "datasets"


def dataset_uploads_root() -> Path:
    return datasets_root() / "uploads"


def recipe_datasets_root() -> Path:
    return datasets_root() / "recipes"


def outputs_root() -> Path:
    return studio_root() / "outputs"


def exports_root() -> Path:
    return studio_root() / "exports"


def auth_root() -> Path:
    return studio_root() / "auth"


def auth_db_path() -> Path:
    return auth_root() / "auth.db"


def tmp_root() -> Path:
    return Path(tempfile.gettempdir()) / "unsloth-studio"


def seed_uploads_root() -> Path:
    return tmp_root() / "seed-uploads"


def unstructured_seed_cache_root() -> Path:
    return tmp_root() / "unstructured-seed-cache"


def oxc_validator_tmp_root() -> Path:
    return tmp_root() / "oxc-validator"


def tensorboard_root() -> Path:
    return studio_root() / "runs"


def ensure_dir(path: Path) -> Path:
    path.mkdir(parents = True, exist_ok = True)
    return path


def _setup_cache_env() -> None:
    """Set cache environment variables for HuggingFace, uv, and vLLM.

    Only sets variables that are not already set by the user, so
    explicit overrides (e.g. HF_HOME=/data/hf) are respected.
    Works on Linux, macOS, and Windows.
    """
    root = cache_root()
    hf_dir = root / "huggingface"
    defaults = {
        "HF_HOME": str(hf_dir),
        "HF_HUB_CACHE": str(hf_dir / "hub"),
        "HF_XET_CACHE": str(hf_dir / "xet"),
        "UV_CACHE_DIR": str(root / "uv"),
        "VLLM_CACHE_ROOT": str(root / "vllm"),
    }
    for key, value in defaults.items():
        if key not in os.environ:
            os.environ[key] = value
            Path(value).mkdir(parents = True, exist_ok = True)


def ensure_studio_directories() -> None:
    """Create all standard studio directories on startup."""
    for dir_fn in (
        studio_root,
        assets_root,
        datasets_root,
        dataset_uploads_root,
        recipe_datasets_root,
        outputs_root,
        exports_root,
        auth_root,
        tensorboard_root,
    ):
        ensure_dir(dir_fn())
    _setup_cache_env()


def _clean_relative_path(
    path_value: str, *, strip_prefixes: tuple[str, ...] = ()
) -> Path:
    path = Path(path_value).expanduser()
    parts = [part for part in path.parts if part not in ("", ".")]
    while parts and parts[0] in strip_prefixes:
        parts = parts[1:]
    return Path(*parts) if parts else Path()


def resolve_under_root(
    path_value: str | None,
    *,
    root: Path,
    strip_prefixes: tuple[str, ...] = (),
) -> Path:
    if not path_value or not str(path_value).strip():
        return root

    path = Path(str(path_value).strip()).expanduser()
    if path.is_absolute():
        return path

    cleaned = _clean_relative_path(str(path), strip_prefixes = strip_prefixes)
    return root / cleaned


def resolve_output_dir(path_value: str | None = None) -> Path:
    return resolve_under_root(
        path_value,
        root = outputs_root(),
        strip_prefixes = ("outputs",),
    )


def resolve_export_dir(path_value: str | None = None) -> Path:
    return resolve_under_root(
        path_value,
        root = exports_root(),
        strip_prefixes = ("exports",),
    )


def resolve_tensorboard_dir(path_value: str | None = None) -> Path:
    return resolve_under_root(
        path_value,
        root = tensorboard_root(),
        strip_prefixes = ("runs", "tensorboard"),
    )


def resolve_dataset_path(path_value: str) -> Path:
    path = Path(path_value).expanduser()
    if path.is_absolute():
        return path

    parts = [part for part in Path(path_value).parts if part not in ("", ".")]
    if parts[:2] == ["assets", "datasets"]:
        parts = parts[2:]
    if parts and parts[0] == "uploads":
        cleaned = Path(*parts[1:]) if len(parts) > 1 else Path()
        return dataset_uploads_root() / cleaned
    if parts and parts[0] == "recipes":
        cleaned = Path(*parts[1:]) if len(parts) > 1 else Path()
        return recipe_datasets_root() / cleaned

    cleaned = Path(*parts) if parts else Path()
    candidates = [
        dataset_uploads_root() / cleaned,
        recipe_datasets_root() / cleaned,
        datasets_root() / cleaned,
        dataset_uploads_root() / cleaned.name,
        recipe_datasets_root() / cleaned.name,
    ]
    for candidate in candidates:
        if candidate.exists():
            return candidate
    return candidates[0]


================================================
FILE: studio/backend/utils/transformers_version.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Automatic transformers version switching.

Some newer model architectures (Ministral-3, GLM-4.7-Flash, Qwen3-30B-A3B MoE,
tiny_qwen3_moe) require transformers>=5.3.0, while everything else needs the
default 4.57.x that ships with Unsloth.

When loading a LoRA adapter with a custom name, we resolve the base model from
``adapter_config.json`` and check *that* against the model list.

Strategy:
  Training and inference run in subprocesses that activate the correct version
  via sys.path (prepending .venv_t5/ for 5.x models). See:
    - core/training/worker.py
    - core/inference/worker.py

  For export (still in-process), ensure_transformers_version() does a lightweight
  sys.path swap using the same .venv_t5/ directory pre-installed by setup.sh.
"""

import importlib
import json
import structlog
from loggers import get_logger
import os
import shutil
import subprocess
import sys
from pathlib import Path

logger = get_logger(__name__)


# ---------------------------------------------------------------------------
# Detection
# ---------------------------------------------------------------------------

# Lowercase substrings — if ANY appears anywhere in the lowered model name,
# we need transformers 5.x.
TRANSFORMERS_5_MODEL_SUBSTRINGS: tuple[str, ...] = (
    "ministral-3-",  # Ministral-3-{3,8,14}B-{Instruct,Reasoning,Base}-2512
    "glm-4.7-flash",  # GLM-4.7-Flash
    "qwen3-30b-a3b",  # Qwen3-30B-A3B-Instruct-2507 and variants
    "qwen3.5",  # Qwen3.5 family (35B-A3B, etc.)
    "qwen3-next",  # Qwen3-Next and variants
    "tiny_qwen3_moe",  # imdatta0/tiny_qwen3_moe_2.8B_0.7B
)

# Tokenizer classes that only exist in transformers>=5.x
_TRANSFORMERS_5_TOKENIZER_CLASSES: set[str] = {
    "TokenizersBackend",
}

# Cache for dynamic tokenizer_config.json lookups to avoid repeated fetches
_tokenizer_class_cache: dict[str, bool] = {}

# Versions
TRANSFORMERS_5_VERSION = "5.3.0"
TRANSFORMERS_DEFAULT_VERSION = "4.57.6"

# Pre-installed directory for transformers 5.x — created by setup.sh / setup.ps1
_VENV_T5_DIR = str(Path.home() / ".unsloth" / "studio" / ".venv_t5")


def _resolve_base_model(model_name: str) -> str:
    """If *model_name* points to a LoRA adapter, return its base model.

    Checks for ``adapter_config.json`` locally first.  Only calls the heavier
    ``get_base_model_from_lora`` for paths that are actual local directories
    (avoids noisy warnings for plain HF model IDs).

    Returns the original *model_name* unchanged if it is not a LoRA adapter.
    """
    # --- Fast local check ---------------------------------------------------
    local_path = Path(model_name)
    adapter_cfg_path = local_path / "adapter_config.json"
    if adapter_cfg_path.is_file():
        try:
            with open(adapter_cfg_path) as f:
                cfg = json.load(f)
            base = cfg.get("base_model_name_or_path")
            if base:
                logger.info(
                    "Resolved LoRA adapter '%s' → base model '%s'",
                    model_name,
                    base,
                )
                return base
        except Exception as exc:
            logger.debug("Could not read %s: %s", adapter_cfg_path, exc)

    # --- Only try the heavier fallback for local directories ----------------
    if local_path.is_dir():
        try:
            from utils.models import get_base_model_from_lora

            base = get_base_model_from_lora(model_name)
            if base:
                logger.info(
                    "Resolved LoRA adapter '%s' → base model '%s' "
                    "(via get_base_model_from_lora)",
                    model_name,
                    base,
                )
                return base
        except Exception as exc:
            logger.debug(
                "get_base_model_from_lora failed for '%s': %s",
                model_name,
                exc,
            )

    return model_name


def _check_tokenizer_config_needs_v5(model_name: str) -> bool:
    """Fetch tokenizer_config.json from HuggingFace and check if the
    tokenizer_class requires transformers 5.x.

    Results are cached in ``_tokenizer_class_cache`` to avoid repeated fetches.
    Returns False on any network/parse error (fail-open to default version).
    """
    if model_name in _tokenizer_class_cache:
        return _tokenizer_class_cache[model_name]

    import urllib.request

    url = f"https://huggingface.co/{model_name}/raw/main/tokenizer_config.json"
    try:
        req = urllib.request.Request(url, headers = {"User-Agent": "unsloth-studio"})
        with urllib.request.urlopen(req, timeout = 10) as resp:
            data = json.loads(resp.read().decode())
        tokenizer_class = data.get("tokenizer_class", "")
        result = tokenizer_class in _TRANSFORMERS_5_TOKENIZER_CLASSES
        if result:
            logger.info(
                "Dynamic check: %s uses tokenizer_class=%s (requires transformers 5.x)",
                model_name,
                tokenizer_class,
            )
        _tokenizer_class_cache[model_name] = result
        return result
    except Exception as exc:
        logger.debug(
            "Could not fetch tokenizer_config.json for '%s': %s", model_name, exc
        )
        _tokenizer_class_cache[model_name] = False
        return False


def needs_transformers_5(model_name: str) -> bool:
    """Return True if *model_name* belongs to an architecture that requires
    ``transformers>=5.3.0``.

    First checks the hardcoded substring list for known models, then
    dynamically fetches ``tokenizer_config.json`` from HuggingFace to check
    if the tokenizer_class (e.g. ``TokenizersBackend``) requires v5.
    """
    lowered = model_name.lower()
    if any(sub in lowered for sub in TRANSFORMERS_5_MODEL_SUBSTRINGS):
        return True
    return _check_tokenizer_config_needs_v5(model_name)


# ---------------------------------------------------------------------------
# Version switching (in-process — used only by export)
# ---------------------------------------------------------------------------


def _get_in_memory_version() -> str | None:
    """Return the transformers version currently loaded in this process."""
    tf = sys.modules.get("transformers")
    if tf is not None:
        return getattr(tf, "__version__", None)
    return None


# All top-level prefixes that hold references to transformers internals.
_PURGE_PREFIXES = (
    "transformers",
    "huggingface_hub",
    "unsloth",
    "unsloth_zoo",
    "peft",
    "trl",
    "accelerate",
    "auto_gptq",
    # NOTE: bitsandbytes is intentionally EXCLUDED — it registers torch custom
    # operators at import time via torch.library.define(). Those registrations
    # live in torch's global operator registry which survives module purge.
    # Re-importing bitsandbytes after purge → duplicate registration → crash.
    # Our own modules that import from transformers at module level
    # (e.g. model_config.py: `from transformers import AutoConfig`)
    "utils.models",
    "core.training",
    "core.inference",
    "core.export",
)


def _purge_modules() -> int:
    """Remove all cached modules for transformers and its dependents.

    Returns the number of modules purged.
    """
    importlib.invalidate_caches()
    to_remove = [
        k
        for k in list(sys.modules.keys())
        if any(k == p or k.startswith(p + ".") for p in _PURGE_PREFIXES)
    ]
    for key in to_remove:
        del sys.modules[key]
    return len(to_remove)


_VENV_T5_PACKAGES = (
    f"transformers=={TRANSFORMERS_5_VERSION}",
    "huggingface_hub==1.7.1",
    "hf_xet==1.4.2",
    "tiktoken",
)


def _venv_t5_is_valid() -> bool:
    """Return True if .venv_t5/ has all required packages at the correct versions."""
    if not os.path.isdir(_VENV_T5_DIR) or not os.listdir(_VENV_T5_DIR):
        return False
    # Check that the key package directories exist AND match the required version
    for pkg_spec in _VENV_T5_PACKAGES:
        parts = pkg_spec.split("==")
        pkg_name = parts[0]
        pkg_version = parts[1] if len(parts) > 1 else None
        pkg_name_norm = pkg_name.replace("-", "_")
        # Check directory exists
        if not any(
            (Path(_VENV_T5_DIR) / d).is_dir()
            for d in (pkg_name_norm, pkg_name_norm.replace("_", "-"))
        ):
            return False
        # For unpinned packages, existence is enough
        if pkg_version is None:
            continue
        # Check version via .dist-info metadata
        dist_info_found = False
        for di in Path(_VENV_T5_DIR).glob(f"{pkg_name_norm}-*.dist-info"):
            metadata = di / "METADATA"
            if not metadata.is_file():
                continue
            for line in metadata.read_text(errors = "replace").splitlines():
                if line.startswith("Version:"):
                    installed_ver = line.split(":", 1)[1].strip()
                    if installed_ver != pkg_version:
                        logger.info(
                            ".venv_t5 has %s==%s but need %s",
                            pkg_name,
                            installed_ver,
                            pkg_version,
                        )
                        return False
                    dist_info_found = True
                    break
            if dist_info_found:
                break
        if not dist_info_found:
            return False
    return True


def _install_to_venv_t5(pkg: str) -> bool:
    """Install a single package into .venv_t5/, preferring uv then pip."""
    # Try uv first (faster) if already on PATH -- do NOT install uv at runtime
    if shutil.which("uv"):
        result = subprocess.run(
            [
                "uv",
                "pip",
                "install",
                "--python",
                sys.executable,
                "--target",
                _VENV_T5_DIR,
                "--no-deps",
                "--upgrade",
                pkg,
            ],
            stdout = subprocess.PIPE,
            stderr = subprocess.STDOUT,
            text = True,
        )
        if result.returncode == 0:
            return True
        logger.warning("uv install of %s failed, falling back to pip", pkg)

    # Fallback to pip
    result = subprocess.run(
        [
            sys.executable,
            "-m",
            "pip",
            "install",
            "--target",
            _VENV_T5_DIR,
            "--no-deps",
            "--upgrade",
            pkg,
        ],
        stdout = subprocess.PIPE,
        stderr = subprocess.STDOUT,
        text = True,
    )
    if result.returncode != 0:
        logger.error("install failed:\n%s", result.stdout)
        return False
    return True


def _ensure_venv_t5_exists() -> bool:
    """Ensure .venv_t5/ exists with all required packages. Install if missing."""
    if _venv_t5_is_valid():
        return True

    logger.warning(
        ".venv_t5 not found or incomplete at %s -- installing at runtime", _VENV_T5_DIR
    )
    shutil.rmtree(_VENV_T5_DIR, ignore_errors = True)
    os.makedirs(_VENV_T5_DIR, exist_ok = True)
    for pkg in _VENV_T5_PACKAGES:
        if not _install_to_venv_t5(pkg):
            return False
    logger.info("Installed transformers 5.x to %s", _VENV_T5_DIR)
    return True


def _activate_5x() -> None:
    """Prepend .venv_t5/ to sys.path, purge stale modules, reimport."""
    if not _ensure_venv_t5_exists():
        raise RuntimeError(
            f"Cannot activate transformers 5.x: .venv_t5 missing at {_VENV_T5_DIR}"
        )

    if _VENV_T5_DIR not in sys.path:
        sys.path.insert(0, _VENV_T5_DIR)
        logger.info("Prepended %s to sys.path", _VENV_T5_DIR)

    count = _purge_modules()
    logger.info("Purged %d cached modules", count)

    import transformers

    logger.info("Loaded transformers %s", transformers.__version__)


def _deactivate_5x() -> None:
    """Remove .venv_t5/ from sys.path, purge stale modules, reimport."""
    while _VENV_T5_DIR in sys.path:
        sys.path.remove(_VENV_T5_DIR)
    logger.info("Removed %s from sys.path", _VENV_T5_DIR)

    count = _purge_modules()
    logger.info("Purged %d cached modules", count)

    import transformers

    logger.info("Reverted to transformers %s", transformers.__version__)


def ensure_transformers_version(model_name: str) -> None:
    """Ensure the correct ``transformers`` version is active for *model_name*.

    Uses sys.path with .venv_t5/ (pre-installed by setup.sh):
      • Need 5.x → prepend .venv_t5/ to sys.path, purge modules.
      • Need 4.x → remove .venv_t5/ from sys.path, purge modules.

    For LoRA adapters with custom names, the base model is resolved from
    ``adapter_config.json`` before checking.

    NOTE: Training and inference use subprocess isolation instead of this
    function. This is only used by the export path (routes/export.py).
    """
    # Resolve LoRA adapters to their base model for accurate detection
    resolved = _resolve_base_model(model_name)
    want_5 = needs_transformers_5(resolved)
    target_version = TRANSFORMERS_5_VERSION if want_5 else TRANSFORMERS_DEFAULT_VERSION
    target_major = int(target_version.split(".")[0])

    # Check what's actually loaded in memory
    in_memory = _get_in_memory_version()

    logger.info(
        "Version check for '%s' (resolved: '%s'): need=%s, in_memory=%s",
        model_name,
        resolved,
        target_version,
        in_memory,
    )

    # --- Already correct? ---------------------------------------------------
    if in_memory is not None:
        in_memory_major = int(in_memory.split(".")[0])
        if in_memory_major == target_major:
            logger.info(
                "transformers %s already loaded — correct for '%s'",
                in_memory,
                model_name,
            )
            return

    # --- Switch version -----------------------------------------------------
    if want_5:
        logger.info("Activating transformers %s via .venv_t5…", TRANSFORMERS_5_VERSION)
        _activate_5x()
    else:
        logger.info(
            "Reverting to default transformers %s…", TRANSFORMERS_DEFAULT_VERSION
        )
        _deactivate_5x()

    final = _get_in_memory_version()
    logger.info("✓ transformers version is now %s", final)


================================================
FILE: studio/backend/utils/utils.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""
Shared backend utilities
"""

import os
import structlog
from loggers import get_logger
from contextlib import contextmanager
from pathlib import Path
import shutil
import tempfile


logger = get_logger(__name__)


@contextmanager
def without_hf_auth():
    """
    Context manager to temporarily disable HuggingFace authentication.

    Usage:
        with without_hf_auth():
            # Code that should run without cached tokens
            model_info(model_name, token=None)
    """
    # Save environment variables
    saved_env = {}
    env_vars = ["HF_TOKEN", "HUGGINGFACE_HUB_TOKEN", "HF_HOME"]
    for var in env_vars:
        if var in os.environ:
            saved_env[var] = os.environ[var]
            del os.environ[var]

    # Save disable flag
    saved_disable = os.environ.get("HF_HUB_DISABLE_IMPLICIT_TOKEN")
    os.environ["HF_HUB_DISABLE_IMPLICIT_TOKEN"] = "1"

    # Move token files temporarily
    token_files = []
    token_locations = [
        Path.home() / ".cache" / "huggingface" / "token",
        Path.home() / ".huggingface" / "token",
    ]

    for token_loc in token_locations:
        if token_loc.exists():
            temp = tempfile.NamedTemporaryFile(delete = False)
            temp.close()
            shutil.move(str(token_loc), temp.name)
            token_files.append((token_loc, temp.name))

    try:
        yield
    finally:
        # Restore tokens
        for original, temp in token_files:
            try:
                original.parent.mkdir(parents = True, exist_ok = True)
                shutil.move(temp, str(original))
            except Exception as e:
                logger.error(f"Failed to restore token {original}: {e}")

        # Restore environment
        for var, value in saved_env.items():
            os.environ[var] = value

        if saved_disable is not None:
            os.environ["HF_HUB_DISABLE_IMPLICIT_TOKEN"] = saved_disable
        else:
            os.environ.pop("HF_HUB_DISABLE_IMPLICIT_TOKEN", None)


def format_error_message(error: Exception, model_name: str) -> str:
    """
    Format user-friendly error messages for common issues.

    Args:
        error: The exception that occurred
        model_name: Name of the model being loaded

    Returns:
        User-friendly error string
    """
    error_str = str(error).lower()
    model_short = model_name.split("/")[-1] if "/" in model_name else model_name

    if "repository not found" in error_str or "404" in error_str:
        return f"Model '{model_short}' not found. Check the model name."

    if "401" in error_str or "unauthorized" in error_str:
        return f"Authentication failed for '{model_short}'. Please provide a valid HF token."

    if "gated" in error_str or "access to model" in error_str:
        return f"Model '{model_short}' requires authentication. Please provide a valid HF token."

    if "invalid user token" in error_str:
        return "Invalid HF token. Please check your token and try again."

    if (
        "memory" in error_str
        or "cuda" in error_str
        or "mlx" in error_str
        or "out of memory" in error_str
    ):
        from utils.hardware import get_device

        device = get_device()
        device_label = {"cuda": "GPU", "mlx": "Apple Silicon GPU", "cpu": "system"}.get(
            device.value, "GPU"
        )
        return f"Not enough {device_label} memory to load '{model_short}'. Try a smaller model or free memory."

    # Generic fallback
    return str(error)


================================================
FILE: studio/frontend/.gitignore
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*

node_modules
dist
dist-ssr
test/
*.local
.env
.env.*
.omx/

# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
._*
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?
/src/features/recipe-studio/AGENTS.md
/docs


================================================
FILE: studio/frontend/.gitkeep
================================================


================================================
FILE: studio/frontend/biome.json
================================================
{
  "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
  "files": {
    "ignore": [
      "dist",
      "node_modules",
      "test",
      "test/**",
      "**/._*",
      "._*",
      "**/.DS_Store",
      "tsconfig*.json"
    ]
  },
  "formatter": {
    "enabled": true,
    "indentStyle": "space",
    "indentWidth": 2
  },
  "organizeImports": {
    "enabled": true
  },
  "linter": {
    "enabled": true,
    "rules": {
      "recommended": true,
      "a11y": { "all": true },
      "complexity": { "all": true },
      "correctness": { "all": true, "useImportExtensions": "off" },
      "performance": { "all": true },
      "security": { "all": true },
      "style": {
        "all": true,
        "useNamingConvention": { "options": { "strictCase": false } }
      },
      "suspicious": { "all": true, "noReactSpecificProps": "off" }
    }
  },
  "overrides": [
    {
      "include": ["vite.config.ts", "eslint.config.js"],
      "linter": {
        "rules": {
          "correctness": { "noNodejsModules": "off" },
          "style": { "noDefaultExport": "off" }
        }
      }
    },
    {
      "include": ["src/components/assistant-ui/reasoning.tsx"],
      "linter": {
        "rules": {
          "style": { "useNamingConvention": "off" }
        }
      }
    },
    {
      "include": ["src/components/assistant-ui/attachment.tsx"],
      "linter": {
        "rules": {
          "style": { "useNamingConvention": "off" }
        }
      }
    },
    {
      "include": ["src/components/assistant-ui/tool-fallback.tsx"],
      "linter": {
        "rules": {
          "style": { "useNamingConvention": "off" }
        }
      }
    },
    {
      "include": ["src/components/component-example.tsx"],
      "linter": {
        "rules": {
          "style": { "noNamespaceImport": "off" }
        }
      }
    },
    {
      "include": ["src/config/env.ts"],
      "linter": {
        "rules": {
          "style": { "useNamingConvention": "off" }
        }
      }
    },
    {
      "include": ["src/components/layout/index.ts"],
      "linter": {
        "rules": {
          "performance": { "noBarrelFile": "off" }
        }
      }
    },
    {
      "include": ["src/features/**/index.ts"],
      "linter": {
        "rules": {
          "performance": { "noBarrelFile": "off" }
        }
      }
    },
    {
      "include": ["src/features/chat/thread-sidebar.tsx"],
      "linter": {
        "rules": {
          "a11y": { "useSemanticElements": "off" }
        }
      }
    },
    {
      "include": ["src/features/chat/runtime-provider.tsx"],
      "linter": {
        "rules": {
          "style": { "useNamingConvention": "off" }
        }
      }
    },
    {
      "include": ["src/components/assistant-ui/thread.tsx"],
      "linter": {
        "rules": {
          "style": { "useNamingConvention": "off" }
        }
      }
    },
    {
      "include": ["src/features/onboarding/components/steps/summary-step.tsx"],
      "linter": {
        "rules": {
          "style": { "useExplicitLengthCheck": "off" }
        }
      }
    },
    {
      "include": ["src/components/ui/**"],
      "linter": {
        "enabled": false
      },
      "formatter": {
        "enabled": false
      },
      "organizeImports": {
        "enabled": false
      }
    }
  ]
}


================================================
FILE: studio/frontend/components.json
================================================
{
  "$schema": "https://ui.shadcn.com/schema.json",
  "style": "radix-maia",
  "rsc": false,
  "tsx": true,
  "tailwind": {
    "config": "",
    "css": "src/index.css",
    "baseColor": "neutral",
    "cssVariables": true,
    "prefix": ""
  },
  "iconLibrary": "hugeicons",
  "menuColor": "default",
  "menuAccent": "subtle",
  "aliases": {
    "components": "@/components",
    "utils": "@/lib/utils",
    "ui": "@/components/ui",
    "lib": "@/lib",
    "hooks": "@/hooks"
  },
  "registries": {
    "@magicui": "https://magicui.design/r/{name}"
  }
}


================================================
FILE: studio/frontend/data-designer.openapi (1).yaml
================================================
openapi: 3.1.0
info:
  title: NeMo Data Designer Microservice
  description: Service for generating synthetic data.
  version: 1.5.0
paths:
  /v1/data-designer/jobs:
    post:
      tags:
      - Data Designer
      summary: Create Job
      operationId: create_job_v1_data_designer_jobs_post
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/DataDesignerJobRequest'
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DataDesignerJob'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
    get:
      tags:
      - Data Designer
      summary: List Jobs
      operationId: list_jobs_v1_data_designer_jobs_get
      parameters:
      - name: page
        in: query
        required: false
        schema:
          type: integer
          exclusiveMinimum: 0
          description: Page number.
          default: 1
          title: Page
        description: Page number.
      - name: page_size
        in: query
        required: false
        schema:
          type: integer
          exclusiveMinimum: 0
          description: Page size.
          default: 10
          title: Page Size
        description: Page size.
      - name: sort
        in: query
        required: false
        schema:
          allOf:
          - $ref: '#/components/schemas/DataDesignerJobsSortField'
          description: The field to sort by. To sort in decreasing order, use `-`
            in front of the field name.
          default: -created_at
        description: The field to sort by. To sort in decreasing order, use `-` in
          front of the field name.
      - in: query
        name: filter
        style: deepObject
        required: false
        explode: true
        schema:
          $ref: '#/components/schemas/DataDesignerJobsListFilter'
        description: Filter jobs on various criteria.
      - in: query
        name: search
        style: deepObject
        required: false
        explode: true
        schema:
          $ref: '#/components/schemas/DataDesignerJobsSearch'
        description: "\nSearch jobs using substring matching.\nYou can combine multiple\
          \ search fields and filters.\n\nFor example:\n- `?search[name]=training`:\
          \ searches all jobs with 'training' in the name.\n- `?search[project]=my-project`:\
          \ searches all jobs with 'my-project'\n  in the project field.\n- `?search[name]=training&search[name]=eval`:\
          \ searches all jobs with\n  'training' OR 'eval' in the name.\n- `?search[name]=training&search[project]=my-project`:\
          \ searches all\n  jobs with 'training' in the name AND 'my-project' in the\
          \ project.\n"
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DataDesignerJobsPage'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /v1/data-designer/jobs/{job_id}:
    get:
      tags:
      - Data Designer
      summary: Get Job
      operationId: get_job_v1_data_designer_jobs__job_id__get
      parameters:
      - name: job_id
        in: path
        required: true
        schema:
          type: string
          title: Job Id
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DataDesignerJob'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
    delete:
      tags:
      - Data Designer
      summary: Delete Job
      operationId: delete_job_v1_data_designer_jobs__job_id__delete
      parameters:
      - name: job_id
        in: path
        required: true
        schema:
          type: string
          title: Job Id
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema: {}
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /v1/data-designer/jobs/{job_id}/cancel:
    post:
      tags:
      - Data Designer
      summary: Cancel Job
      operationId: cancel_job_v1_data_designer_jobs__job_id__cancel_post
      parameters:
      - name: job_id
        in: path
        required: true
        schema:
          type: string
          title: Job Id
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DataDesignerJob'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /v1/data-designer/jobs/{job_id}/logs:
    get:
      tags:
      - Data Designer
      summary: Get Job Logs
      operationId: get_job_logs_v1_data_designer_jobs__job_id__logs_get
      parameters:
      - name: job_id
        in: path
        required: true
        schema:
          type: string
          title: Job Id
      - name: limit
        in: query
        required: false
        schema:
          anyOf:
          - type: integer
          - type: 'null'
          title: Limit
      - name: page_cursor
        in: query
        required: false
        schema:
          anyOf:
          - type: string
          - type: 'null'
          title: Page Cursor
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PlatformJobLogPage'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /v1/data-designer/jobs/{job_id}/results:
    get:
      tags:
      - Data Designer
      summary: List Job Results
      operationId: list_job_results_v1_data_designer_jobs__job_id__results_get
      parameters:
      - name: job_id
        in: path
        required: true
        schema:
          type: string
          title: Job Id
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PlatformJobListResultResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /v1/data-designer/jobs/{job_id}/results/analysis/download:
    get:
      tags:
      - Data Designer
      summary: Download Job Result Analysis
      operationId: download_job_result_analysis_v1_data_designer_jobs__job_id__results_analysis_download_get
      parameters:
      - name: job_id
        in: path
        required: true
        schema:
          type: string
          title: Job Id
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema: {}
        '404':
          description: Not Found
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /v1/data-designer/jobs/{job_id}/results/dataset/download:
    get:
      tags:
      - Data Designer
      summary: Download Job Result Dataset
      operationId: download_job_result_dataset_v1_data_designer_jobs__job_id__results_dataset_download_get
      parameters:
      - name: job_id
        in: path
        required: true
        schema:
          type: string
          title: Job Id
      responses:
        '200':
          description: Successful Response
          content:
            application/octet-stream:
              schema:
                type: string
                format: binary
        '404':
          description: Not Found
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /v1/data-designer/jobs/{job_id}/results/{result_name}:
    get:
      tags:
      - Data Designer
      summary: Get Job Result
      operationId: get_job_result_v1_data_designer_jobs__job_id__results__result_name__get
      parameters:
      - name: job_id
        in: path
        required: true
        schema:
          type: string
          title: Job Id
      - name: result_name
        in: path
        required: true
        schema:
          type: string
          title: Result Name
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PlatformJobResultResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /v1/data-designer/jobs/{job_id}/results/{result_name}/download:
    get:
      tags:
      - Data Designer
      summary: Download Job Result
      operationId: download_job_result_v1_data_designer_jobs__job_id__results__result_name__download_get
      parameters:
      - name: job_id
        in: path
        required: true
        schema:
          type: string
          title: Job Id
      - name: result_name
        in: path
        required: true
        schema:
          type: string
          title: Result Name
      responses:
        '200':
          description: Successful Response
          content:
            application/octet-stream:
              schema:
                type: string
                format: binary
        '404':
          description: Not Found
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /v1/data-designer/jobs/{job_id}/status:
    get:
      tags:
      - Data Designer
      summary: Get Job Status
      operationId: get_job_status_v1_data_designer_jobs__job_id__status_get
      parameters:
      - name: job_id
        in: path
        required: true
        schema:
          type: string
          title: Job Id
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PlatformJobStatusResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /v1/data-designer/preview:
    post:
      tags:
      - Data Designer
      summary: Generate preview Data Designer
      operationId: preview_v1_data_designer_preview_post
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/PreviewRequest'
        required: true
      responses:
        '200':
          description: Successful Response
          content:
            application/jsonl:
              schema:
                $ref: '#/components/schemas/PreviewMessage'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
  /v1/data-designer/settings:
    get:
      tags:
      - Data Designer
      summary: Get Data Designer settings
      description: Returns the settings available for Data Designer.
      operationId: get_settings_v1_data_designer_settings_get
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SettingsResponse'
components:
  schemas:
    BernoulliMixtureSamplerParams:
      properties:
        p:
          type: number
          maximum: 1.0
          minimum: 0.0
          title: P
          description: Bernoulli distribution probability of success.
        dist_name:
          type: string
          title: Dist Name
          description: Mixture distribution name. Samples will be equal to the distribution
            sample with probability `p`, otherwise equal to 0. Must be a valid scipy.stats
            distribution name.
        dist_params:
          additionalProperties: true
          type: object
          title: Dist Params
          description: Parameters of the scipy.stats distribution given in `dist_name`.
        sampler_type:
          type: string
          const: bernoulli_mixture
          title: Sampler Type
          default: bernoulli_mixture
      additionalProperties: false
      type: object
      required:
      - p
      - dist_name
      - dist_params
      title: BernoulliMixtureSamplerParams
      description: "Parameters for sampling from a Bernoulli mixture distribution.\n\
        \nCombines a Bernoulli distribution with another continuous distribution,\
        \ creating a mixture\nwhere values are either 0 (with probability 1-p) or\
        \ sampled from the specified distribution\n(with probability p). This is useful\
        \ for modeling scenarios with many zero values mixed with\na continuous distribution\
        \ of non-zero values.\n\nCommon use cases include modeling sparse events,\
        \ zero-inflated data, or situations where\nan outcome either doesn't occur\
        \ (0) or follows a specific distribution when it does occur.\n\nAttributes:\n\
        \    p: Probability of sampling from the mixture distribution (non-zero outcome).\n\
        \        Must be between 0.0 and 1.0 (inclusive). With probability 1-p, the\
        \ sample is 0.\n    dist_name: Name of the scipy.stats distribution to sample\
        \ from when outcome is non-zero.\n        Must be a valid scipy.stats distribution\
        \ name (e.g., \"norm\", \"gamma\", \"expon\").\n    dist_params: Parameters\
        \ for the specified scipy.stats distribution."
    BernoulliSamplerParams:
      properties:
        p:
          type: number
          maximum: 1.0
          minimum: 0.0
          title: P
          description: Probability of success.
        sampler_type:
          type: string
          const: bernoulli
          title: Sampler Type
          default: bernoulli
      additionalProperties: false
      type: object
      required:
      - p
      title: BernoulliSamplerParams
      description: "Parameters for sampling from a Bernoulli distribution.\n\nSamples\
        \ binary values (0 or 1) representing the outcome of a single trial with a\
        \ fixed\nprobability of success. This is the simplest discrete probability\
        \ distribution, useful for\nmodeling binary outcomes like success/failure,\
        \ yes/no, or true/false.\n\nAttributes:\n    p: Probability of success (sampling\
        \ 1). Must be between 0.0 and 1.0 (inclusive).\n        The probability of\
        \ failure (sampling 0) is automatically 1 - p."
    BinomialSamplerParams:
      properties:
        n:
          type: integer
          title: N
          description: Number of trials.
        p:
          type: number
          maximum: 1.0
          minimum: 0.0
          title: P
          description: Probability of success on each trial.
        sampler_type:
          type: string
          const: binomial
          title: Sampler Type
          default: binomial
      additionalProperties: false
      type: object
      required:
      - n
      - p
      title: BinomialSamplerParams
      description: "Parameters for sampling from a Binomial distribution.\n\nSamples\
        \ integer values representing the number of successes in a fixed number of\
        \ independent\nBernoulli trials, each with the same probability of success.\
        \ Commonly used to model the number\nof successful outcomes in repeated experiments.\n\
        \nAttributes:\n    n: Number of independent trials. Must be a positive integer.\n\
        \    p: Probability of success on each trial. Must be between 0.0 and 1.0\
        \ (inclusive)."
    BuildStage:
      type: string
      enum:
      - pre_batch
      - post_batch
      - pre_generation
      - post_generation
      title: BuildStage
    CategorySamplerParams:
      properties:
        values:
          items:
            anyOf:
            - type: string
            - type: integer
            - type: number
          type: array
          minItems: 1
          title: Values
          description: List of possible categorical values that can be sampled from.
        weights:
          type: array
          items:
            type: number
          title: Weights
          description: List of unnormalized probability weights to assigned to each
            value, in order. Larger values will be sampled with higher probability.
        sampler_type:
          type: string
          const: category
          title: Sampler Type
          default: category
      additionalProperties: false
      type: object
      required:
      - values
      title: CategorySamplerParams
      description: "Parameters for categorical sampling with optional probability\
        \ weighting.\n\nSamples values from a discrete set of categories. When weights\
        \ are provided, values are\nsampled according to their assigned probabilities.\
        \ Without weights, uniform sampling is used.\n\nAttributes:\n    values: List\
        \ of possible categorical values to sample from. Can contain strings, integers,\n\
        \        or floats. Must contain at least one value.\n    weights: Optional\
        \ unnormalized probability weights for each value. If provided, must be\n\
        \        the same length as `values`. Weights are automatically normalized\
        \ to sum to 1.0.\n        Larger weights result in higher sampling probability\
        \ for the corresponding value."
    CodeLang:
      type: string
      enum:
      - go
      - javascript
      - java
      - kotlin
      - python
      - ruby
      - rust
      - scala
      - swift
      - typescript
      - sql:sqlite
      - sql:tsql
      - sql:bigquery
      - sql:mysql
      - sql:postgres
      - sql:ansi
      title: CodeLang
    CodeValidatorParams:
      properties:
        code_lang:
          allOf:
          - $ref: '#/components/schemas/CodeLang'
          description: The language of the code to validate
      additionalProperties: false
      type: object
      required:
      - code_lang
      title: CodeValidatorParams
      description: "Configuration for code validation. Supports Python and SQL code\
        \ validation.\n\nAttributes:\n    code_lang: The language of the code to validate.\
        \ Supported values include: `python`,\n        `sql:sqlite`, `sql:postgres`,\
        \ `sql:mysql`, `sql:tsql`, `sql:bigquery`, `sql:ansi`."
    ColumnInequalityConstraint:
      properties:
        target_column:
          type: string
          title: Target Column
        rhs:
          type: string
          title: Rhs
        operator:
          $ref: '#/components/schemas/InequalityOperator'
      additionalProperties: false
      type: object
      required:
      - target_column
      - rhs
      - operator
      title: ColumnInequalityConstraint
    DataDesignerConfig:
      properties:
        columns:
          items:
            oneOf:
            - $ref: '#/components/schemas/ExpressionColumnConfig'
            - $ref: '#/components/schemas/LLMCodeColumnConfig'
            - $ref: '#/components/schemas/LLMJudgeColumnConfig'
            - $ref: '#/components/schemas/LLMStructuredColumnConfig'
            - $ref: '#/components/schemas/LLMTextColumnConfig'
            - $ref: '#/components/schemas/SamplerColumnConfig'
            - $ref: '#/components/schemas/SeedDatasetColumnConfig'
            - $ref: '#/components/schemas/ValidationColumnConfig'
            discriminator:
              propertyName: column_type
              mapping:
                expression: '#/components/schemas/ExpressionColumnConfig'
                llm-code: '#/components/schemas/LLMCodeColumnConfig-Input'
                llm-judge: '#/components/schemas/LLMJudgeColumnConfig-Input'
                llm-structured: '#/components/schemas/LLMStructuredColumnConfig-Input'
                llm-text: '#/components/schemas/LLMTextColumnConfig-Input'
                sampler: '#/components/schemas/SamplerColumnConfig'
                seed-dataset: '#/components/schemas/SeedDatasetColumnConfig'
                validation: '#/components/schemas/ValidationColumnConfig-Input'
          type: array
          minItems: 1
          title: Columns
        model_configs:
          type: array
          items:
            $ref: '#/components/schemas/ModelConfigInput'
          title: Model Configs
        seed_config:
          $ref: '#/components/schemas/SeedConfig'
        constraints:
          type: array
          items:
            anyOf:
            - $ref: '#/components/schemas/ScalarInequalityConstraint'
            - $ref: '#/components/schemas/ColumnInequalityConstraint'
          title: Constraints
        profilers:
          type: array
          items:
            $ref: '#/components/schemas/JudgeScoreProfilerConfig'
          title: Profilers
        processors:
          type: array
          items:
            $ref: '#/components/schemas/ProcessorConfig'
          title: Processors
      additionalProperties: false
      type: object
      required:
      - columns
      title: DataDesignerConfig
      description: "Configuration for NeMo Data Designer.\n\nThis class defines the\
        \ main configuration structure for NeMo Data Designer,\nwhich orchestrates\
        \ the generation of synthetic data.\n\nAttributes:\n    columns: Required\
        \ list of column configurations defining how each column\n        should be\
        \ generated. Must contain at least one column.\n    model_configs: Optional\
        \ list of model configurations for LLM-based generation.\n        Each model\
        \ config defines the model, provider, and inference parameters.\n    seed_config:\
        \ Optional seed dataset settings to use for generation.\n    constraints:\
        \ Optional list of column constraints.\n    profilers: Optional list of column\
        \ profilers for analyzing generated data characteristics."
    DataDesignerJob:
      properties:
        id:
          type: string
          title: Id
        name:
          type: string
          title: Name
        description:
          type: string
          title: Description
        project:
          type: string
          title: Project
        namespace:
          type: string
          title: Namespace
        created_at:
          type: string
          title: Created At
        updated_at:
          type: string
          title: Updated At
        spec:
          $ref: '#/components/schemas/DataDesignerJobConfig'
        status:
          $ref: '#/components/schemas/PlatformJobStatus'
        status_details:
          type: object
          additionalProperties: true
          title: Status Details
        error_details:
          type: object
          additionalProperties: true
          title: Error Details
        ownership:
          type: object
          additionalProperties: true
          title: Ownership
        custom_fields:
          type: object
          additionalProperties: true
          title: Custom Fields
      type: object
      required:
      - name
      - spec
      title: DataDesignerJob
    DataDesignerJobConfig:
      properties:
        num_records:
          type: integer
          title: Num Records
        config:
          $ref: '#/components/schemas/DataDesignerConfig'
      type: object
      required:
      - num_records
      - config
      title: DataDesignerJobConfig
    DataDesignerJobRequest:
      properties:
        name:
          type: string
          title: Name
        description:
          type: string
          title: Description
        namespace:
          type: string
          title: Namespace
        project:
          type: string
          title: Project
        spec:
          $ref: '#/components/schemas/DataDesignerJobConfig'
        ownership:
          type: object
          additionalProperties: true
          title: Ownership
        custom_fields:
          type: object
          additionalProperties: true
          title: Custom Fields
      type: object
      required:
      - spec
      title: DataDesignerJobRequest
    DataDesignerJobsListFilter:
      properties:
        created_at:
          allOf:
          - $ref: '#/components/schemas/DatetimeFilter'
          description: Jobs created at 'gte' datetime or 'lte' datetime.
        name:
          type: string
          title: Name
          description: Name of the job.
        namespace:
          type: string
          title: Namespace
          description: Namespace of the job.
        project:
          type: string
          title: Project
          description: Project containing the job.
        status:
          allOf:
          - $ref: '#/components/schemas/PlatformJobStatus'
          description: The current status.
        updated_at:
          allOf:
          - $ref: '#/components/schemas/DatetimeFilter'
          description: Jobs updated at 'gte' datetime or 'lte' datetime.
      additionalProperties: false
      type: object
      title: DataDesignerJobsListFilter
    DataDesignerJobsPage:
      properties:
        object:
          type: string
          title: Object
          description: The type of object being returned.
          default: list
        data:
          items:
            $ref: '#/components/schemas/DataDesignerJob'
          type: array
          title: Data
        pagination:
          allOf:
          - $ref: '#/components/schemas/PaginationData'
          description: Pagination information.
        sort:
          type: string
          title: Sort
          description: The field on which the results are sorted.
        filter:
          allOf:
          - $ref: '#/components/schemas/DataDesignerJobsListFilter'
          description: Filtering information.
        search:
          allOf:
          - $ref: '#/components/schemas/DataDesignerJobsSearch'
          description: Search information.
      type: object
      required:
      - data
      title: DataDesignerJobsPage
    DataDesignerJobsSearch:
      properties:
        name:
          type: array
          items:
            type: string
          title: Name
          description: Search jobs where name contains any of these strings.
        project:
          type: array
          items:
            type: string
          title: Project
          description: Search jobs where project contains any of these strings.
      type: object
      title: DataDesignerJobsSearch
    DataDesignerJobsSortField:
      type: string
      enum:
      - created_at
      - -created_at
      - updated_at
      - -updated_at
      title: DataDesignerJobsSortField
    DatetimeFilter:
      properties:
        gte:
          type: string
          title: Gte
          description: Filter for results greater than or equal to this datetime.
        lte:
          type: string
          title: Lte
          description: Filter for results less than or equal to this datetime.
      additionalProperties: false
      type: object
      title: DatetimeFilter
    DatetimeSamplerParams:
      properties:
        start:
          type: string
          title: Start
          description: Earliest possible datetime for sampling range, inclusive.
        end:
          type: string
          title: End
          description: Latest possible datetime for sampling range, inclusive.
        unit:
          type: string
          enum:
          - Y
          - M
          - D
          - h
          - m
          - s
          title: Unit
          description: Sampling units, e.g. the smallest possible time interval between
            samples.
          default: D
        sampler_type:
          type: string
          const: datetime
          title: Sampler Type
          default: datetime
      additionalProperties: false
      type: object
      required:
      - start
      - end
      title: DatetimeSamplerParams
      description: "Parameters for uniform datetime sampling within a specified range.\n\
        \nSamples datetime values uniformly between a start and end date with a specified\
        \ granularity.\nThe sampling unit determines the smallest possible time interval\
        \ between consecutive samples.\n\nAttributes:\n    start: Earliest possible\
        \ datetime for the sampling range (inclusive). Must be a valid\n        datetime\
        \ string parseable by pandas.to_datetime().\n    end: Latest possible datetime\
        \ for the sampling range (inclusive). Must be a valid\n        datetime string\
        \ parseable by pandas.to_datetime().\n    unit: Time unit for sampling granularity.\
        \ Options:\n        - \"Y\": Years\n        - \"M\": Months\n        - \"\
        D\": Days (default)\n        - \"h\": Hours\n        - \"m\": Minutes\n  \
        \      - \"s\": Seconds"
    DisplayModelProvider:
      properties:
        name:
          type: string
          title: Name
        provider_type:
          type: string
          title: Provider Type
          default: openai
        extra_body:
          type: object
          additionalProperties: true
          title: Extra Body
        allowed_models:
          type: array
          items:
            type: string
          title: Allowed Models
      additionalProperties: false
      type: object
      required:
      - name
      title: DisplayModelProvider
    DistributionType:
      type: string
      enum:
      - uniform
      - manual
      title: DistributionType
    ExpressionColumnConfig:
      properties:
        name:
          type: string
          title: Name
        drop:
          type: boolean
          title: Drop
          default: false
        column_type:
          type: string
          const: expression
          title: Column Type
          default: expression
        expr:
          type: string
          title: Expr
        dtype:
          type: string
          enum:
          - int
          - float
          - str
          - bool
          title: Dtype
          default: str
      additionalProperties: false
      type: object
      required:
      - name
      - expr
      title: ExpressionColumnConfig
      description: "Configuration for derived columns using Jinja2 expressions.\n\n\
        Expression columns compute values by evaluating Jinja2 templates that reference\
        \ other\ncolumns. Useful for transformations, concatenations, conditional\
        \ logic, and derived\nfeatures without requiring LLM generation. The expression\
        \ is evaluated row-by-row.\n\nAttributes:\n    expr: Jinja2 expression to\
        \ evaluate. Can reference other column values using\n        {{ column_name\
        \ }} syntax. Supports filters, conditionals, and arithmetic.\n        Must\
        \ be a valid, non-empty Jinja2 template.\n    dtype: Data type to cast the\
        \ result to. Must be one of \"int\", \"float\", \"str\", or \"bool\".\n  \
        \      Defaults to \"str\". Type conversion is applied after expression evaluation.\n\
        \    column_type: Discriminator field, always \"expression\" for this configuration\
        \ type."
    FileStorageType:
      type: string
      enum:
      - nds
      title: FileStorageType
    GaussianSamplerParams:
      properties:
        mean:
          type: number
          title: Mean
          description: Mean of the Gaussian distribution
        stddev:
          type: number
          title: Stddev
          description: Standard deviation of the Gaussian distribution
        decimal_places:
          type: integer
          title: Decimal Places
          description: Number of decimal places to round the sampled values to.
        sampler_type:
          type: string
          const: gaussian
          title: Sampler Type
          default: gaussian
      additionalProperties: false
      type: object
      required:
      - mean
      - stddev
      title: GaussianSamplerParams
      description: "Parameters for sampling from a Gaussian (Normal) distribution.\n\
        \nSamples continuous values from a normal distribution characterized by its\
        \ mean and standard\ndeviation. The Gaussian distribution is one of the most\
        \ commonly used probability distributions,\nappearing naturally in many real-world\
        \ phenomena due to the Central Limit Theorem.\n\nAttributes:\n    mean: Mean\
        \ (center) of the Gaussian distribution. This is the expected value and the\n\
        \        location of the distribution's peak.\n    stddev: Standard deviation\
        \ of the Gaussian distribution. Controls the spread or width\n        of the\
        \ distribution. Must be positive.\n    decimal_places: Optional number of\
        \ decimal places to round sampled values to. If None,\n        values are\
        \ not rounded."
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          type: array
          title: Detail
      type: object
      title: HTTPValidationError
    ImageContext:
      properties:
        modality:
          allOf:
          - $ref: '#/components/schemas/Modality'
          default: image
        column_name:
          type: string
          title: Column Name
        data_type:
          $ref: '#/components/schemas/ModalityDataType'
        image_format:
          $ref: '#/components/schemas/ImageFormat'
      type: object
      required:
      - column_name
      - data_type
      title: ImageContext
    ImageFormat:
      type: string
      enum:
      - png
      - jpg
      - jpeg
      - gif
      - webp
      title: ImageFormat
    IndexRange:
      properties:
        start:
          type: integer
          minimum: 0.0
          title: Start
          description: The start index of the index range (inclusive)
        end:
          type: integer
          minimum: 0.0
          title: End
          description: The end index of the index range (inclusive)
      additionalProperties: false
      type: object
      required:
      - start
      - end
      title: IndexRange
    InequalityOperator:
      type: string
      enum:
      - lt
      - le
      - gt
      - ge
      title: InequalityOperator
    InferenceParametersInput:
      properties:
        temperature:
          anyOf:
          - type: number
          - $ref: '#/components/schemas/UniformDistribution'
          - $ref: '#/components/schemas/ManualDistribution'
          - type: 'null'
          title: Temperature
        top_p:
          anyOf:
          - type: number
          - $ref: '#/components/schemas/UniformDistribution'
          - $ref: '#/components/schemas/ManualDistribution'
          - type: 'null'
          title: Top P
        max_tokens:
          type: integer
          title: Max Tokens
        max_parallel_requests:
          type: integer
          minimum: 1.0
          title: Max Parallel Requests
          default: 4
        timeout:
          type: integer
          title: Timeout
        extra_body:
          type: object
          additionalProperties: true
          title: Extra Body
      additionalProperties: false
      type: object
      title: InferenceParametersInput
    InferenceParametersOutput:
      properties:
        temperature:
          anyOf:
          - type: number
          - $ref: '#/components/schemas/UniformDistribution'
          - $ref: '#/components/schemas/ManualDistribution'
          - type: 'null'
          title: Temperature
        top_p:
          anyOf:
          - type: number
          - $ref: '#/components/schemas/UniformDistribution'
          - $ref: '#/components/schemas/ManualDistribution'
          - type: 'null'
          title: Top P
        max_tokens:
          type: integer
          title: Max Tokens
        max_parallel_requests:
          type: integer
          minimum: 1.0
          title: Max Parallel Requests
          default: 4
        timeout:
          type: integer
          title: Timeout
        extra_body:
          type: object
          additionalProperties: true
          title: Extra Body
      additionalProperties: false
      type: object
      title: InferenceParametersOutput
    JudgeScoreProfilerConfig:
      properties:
        model_alias:
          type: string
          title: Model Alias
        summary_score_sample_size:
          type: integer
          title: Summary Score Sample Size
          default: 20
      additionalProperties: false
      type: object
      required:
      - model_alias
      title: JudgeScoreProfilerConfig
    LLMCodeColumnConfig:
      properties:
        name:
          type: string
          title: Name
        drop:
          type: boolean
          title: Drop
          default: false
        column_type:
          type: string
          const: llm-code
          title: Column Type
          default: llm-code
        prompt:
          type: string
          title: Prompt
        model_alias:
          type: string
          title: Model Alias
        system_prompt:
          type: string
          title: System Prompt
        multi_modal_context:
          type: array
          items:
            $ref: '#/components/schemas/ImageContext'
          title: Multi Modal Context
        code_lang:
          $ref: '#/components/schemas/CodeLang'
      additionalProperties: false
      type: object
      required:
      - name
      - prompt
      - model_alias
      - code_lang
      title: LLMCodeColumnConfig
      description: "Configuration for code generation columns using Large Language\
        \ Models.\n\nExtends LLMTextColumnConfig to generate code snippets in specific\
        \ programming languages\nor SQL dialects. The generated code is automatically\
        \ extracted from markdown code blocks\nfor the specified language. Inherits\
        \ all prompt templating capabilities.\n\nAttributes:\n    code_lang: Programming\
        \ language or SQL dialect for code generation. Supported\n        values include:\
        \ \"python\", \"javascript\", \"typescript\", \"java\", \"kotlin\", \"go\"\
        ,\n        \"rust\", \"ruby\", \"scala\", \"swift\", \"sql:sqlite\", \"sql:postgres\"\
        , \"sql:mysql\",\n        \"sql:tsql\", \"sql:bigquery\", \"sql:ansi\". See\
        \ CodeLang enum for complete list.\n    column_type: Discriminator field,\
        \ always \"llm-code\" for this configuration type."
    LLMJudgeColumnConfig:
      properties:
        name:
          type: string
          title: Name
        drop:
          type: boolean
          title: Drop
          default: false
        column_type:
          type: string
          const: llm-judge
          title: Column Type
          default: llm-judge
        prompt:
          type: string
          title: Prompt
        model_alias:
          type: string
          title: Model Alias
        system_prompt:
          type: string
          title: System Prompt
        multi_modal_context:
          type: array
          items:
            $ref: '#/components/schemas/ImageContext'
          title: Multi Modal Context
        scores:
          items:
            $ref: '#/components/schemas/Score'
          type: array
          minItems: 1
          title: Scores
      additionalProperties: false
      type: object
      required:
      - name
      - prompt
      - model_alias
      - scores
      title: LLMJudgeColumnConfig
      description: "Configuration for LLM-as-a-judge quality assessment and scoring\
        \ columns.\n\nExtends LLMTextColumnConfig to create judge columns that evaluate\
        \ and score other\ngenerated content based on the defined criteria. Useful\
        \ for quality assessment, preference\nranking, and multi-dimensional evaluation\
        \ of generated data.\n\nAttributes:\n    scores: List of Score objects defining\
        \ the evaluation dimensions. Each score\n        represents a different aspect\
        \ to evaluate (e.g., accuracy, relevance, fluency).\n        Must contain\
        \ at least one score.\n    column_type: Discriminator field, always \"llm-judge\"\
        \ for this configuration type."
    LLMStructuredColumnConfig:
      properties:
        name:
          type: string
          title: Name
        drop:
          type: boolean
          title: Drop
          default: false
        column_type:
          type: string
          const: llm-structured
          title: Column Type
          default: llm-structured
        prompt:
          type: string
          title: Prompt
        model_alias:
          type: string
          title: Model Alias
        system_prompt:
          type: string
          title: System Prompt
        multi_modal_context:
          type: array
          items:
            $ref: '#/components/schemas/ImageContext'
          title: Multi Modal Context
        output_format:
          anyOf:
          - additionalProperties: true
            type: object
          - {}
          title: Output Format
      additionalProperties: false
      type: object
      required:
      - name
      - prompt
      - model_alias
      - output_format
      title: LLMStructuredColumnConfig
      description: "Configuration for structured JSON generation columns using Large\
        \ Language Models.\n\nExtends LLMTextColumnConfig to generate structured data\
        \ conforming to a specified schema.\nUses JSON schema or Pydantic models to\
        \ define the expected output structure, enabling\ntype-safe and validated\
        \ structured output generation. Inherits prompt templating capabilities.\n\
        \nAttributes:\n    output_format: The schema defining the expected output\
        \ structure. Can be either:\n        - A Pydantic BaseModel class (recommended)\n\
        \        - A JSON schema dictionary\n    column_type: Discriminator field,\
        \ always \"llm-structured\" for this configuration type."
    LLMTextColumnConfig:
      properties:
        name:
          type: string
          title: Name
        drop:
          type: boolean
          title: Drop
          default: false
        column_type:
          type: string
          const: llm-text
          title: Column Type
          default: llm-text
        prompt:
          type: string
          title: Prompt
        model_alias:
          type: string
          title: Model Alias
        system_prompt:
          type: string
          title: System Prompt
        multi_modal_context:
          type: array
          items:
            $ref: '#/components/schemas/ImageContext'
          title: Multi Modal Context
      additionalProperties: false
      type: object
      required:
      - name
      - prompt
      - model_alias
      title: LLMTextColumnConfig
      description: "Configuration for text generation columns using Large Language\
        \ Models.\n\nLLM text columns generate free-form text content using language\
        \ models via LiteLLM.\nPrompts support Jinja2 templating to reference values\
        \ from other columns, enabling\ncontext-aware generation. The generated text\
        \ can optionally include reasoning traces\nwhen models support extended thinking.\n\
        \nAttributes:\n    prompt: Prompt template for text generation. Supports Jinja2\
        \ syntax to\n        reference other columns (e.g., \"Write a story about\
        \ {{ character_name }}\").\n        Must be a valid Jinja2 template.\n   \
        \ model_alias: Alias of the model configuration to use for generation.\n \
        \       Must match a model alias defined when initializing the DataDesignerConfigBuilder.\n\
        \    system_prompt: Optional system prompt to set model behavior and constraints.\n\
        \        Also supports Jinja2 templating. If provided, must be a valid Jinja2\
        \ template.\n        Do not put any output parsing instructions in the system\
        \ prompt. Instead,\n        use the appropriate column type for the output\
        \ you want to generate - e.g.,\n        `LLMStructuredColumnConfig` for structured\
        \ output, `LLMCodeColumnConfig` for code.\n    multi_modal_context: Optional\
        \ list of image contexts for multi-modal generation.\n        Enables vision-capable\
        \ models to generate text based on image inputs.\n    column_type: Discriminator\
        \ field, always \"llm-text\" for this configuration type."
    LocalCallableValidatorParams:
      properties:
        validation_function:
          title: Validation Function
          description: Function (Callable[[pd.DataFrame], pd.DataFrame]) to validate
            the data
        output_schema:
          type: object
          additionalProperties: true
          title: Output Schema
          description: Expected schema for local callable validator's output
      additionalProperties: false
      type: object
      required:
      - validation_function
      title: LocalCallableValidatorParams
      description: "Configuration for local callable validation. Expects a function\
        \ to be passed that validates the data.\n\nAttributes:\n    validation_function:\
        \ Function (`Callable[[pd.DataFrame], pd.DataFrame]`) to validate the\n  \
        \      data. Output must contain a column `is_valid` of type `bool`.\n   \
        \ output_schema: The JSON schema for the local callable validator's output.\
        \ If not provided,\n        the output will not be validated."
    ManualDistribution:
      properties:
        distribution_type:
          allOf:
          - $ref: '#/components/schemas/DistributionType'
          default: manual
        params:
          $ref: '#/components/schemas/ManualDistributionParams'
      additionalProperties: false
      type: object
      required:
      - params
      title: ManualDistribution
    ManualDistributionParams:
      properties:
        values:
          items:
            type: number
          type: array
          minItems: 1
          title: Values
        weights:
          type: array
          items:
            type: number
          title: Weights
      additionalProperties: false
      type: object
      required:
      - values
      title: ManualDistributionParams
    MessageType:
      type: string
      enum:
      - analysis
      - dataset
      - heartbeat
      - log
      title: MessageType
    Modality:
      type: string
      enum:
      - image
      title: Modality
    ModalityDataType:
      type: string
      enum:
      - url
      - base64
      title: ModalityDataType
    ModelConfigInput:
      properties:
        alias:
          type: string
          title: Alias
        model:
          type: string
          title: Model
        inference_parameters:
          $ref: '#/components/schemas/InferenceParametersInput'
        provider:
          type: string
          title: Provider
      additionalProperties: false
      type: object
      required:
      - alias
      - model
      title: ModelConfigInput
    ModelConfigOutput:
      properties:
        alias:
          type: string
          title: Alias
        model:
          type: string
          title: Model
        inference_parameters:
          $ref: '#/components/schemas/InferenceParametersOutput'
        provider:
          type: string
          title: Provider
      additionalProperties: false
      type: object
      required:
      - alias
      - model
      title: ModelConfigOutput
    PaginationData:
      properties:
        page:
          type: integer
          title: Page
          description: The current page number.
        page_size:
          type: integer
          title: Page Size
          description: The page size used for the query.
        current_page_size:
          type: integer
          title: Current Page Size
          description: The size for the current page.
        total_pages:
          type: integer
          title: Total Pages
          description: The total number of pages.
        total_results:
          type: integer
          title: Total Results
          description: The total number of results.
      type: object
      required:
      - page
      - page_size
      - current_page_size
      - total_pages
      - total_results
      title: PaginationData
    PartitionBlock:
      properties:
        index:
          type: integer
          minimum: 0.0
          title: Index
          description: The index of the partition to sample from
          default: 0
        num_partitions:
          type: integer
          minimum: 1.0
          title: Num Partitions
          description: The total number of partitions in the dataset
          default: 1
      additionalProperties: false
      type: object
      title: PartitionBlock
    PersonFromFakerSamplerParams:
      properties:
        locale:
          type: string
          title: Locale
          description: Locale string, determines the language and geographic locale
            that a synthetic person will be sampled from. E.g, en_US, en_GB, fr_FR,
            ...
          default: en_US
        sex:
          type: string
          title: Sex
          description: If specified, then only synthetic people of the specified sex
            will be sampled.
        city:
          anyOf:
          - type: string
          - items:
              type: string
            type: array
          title: City
          description: If specified, then only synthetic people from these cities
            will be sampled.
        age_range:
          items:
            type: integer
          type: array
          maxItems: 2
          minItems: 2
          title: Age Range
          description: If specified, then only synthetic people within this age range
            will be sampled.
          default:
          - 18
          - 114
        sampler_type:
          type: string
          const: person_from_faker
          title: Sampler Type
          default: person_from_faker
      additionalProperties: false
      type: object
      title: PersonFromFakerSamplerParams
    PersonSamplerParams:
      properties:
        locale:
          type: string
          title: Locale
          description: 'Locale that determines the language and geographic location
            that a synthetic person will be sampled from. Must be a locale supported
            by a managed Nemotron Personas dataset. Managed datasets exist for the
            following locales: en_US, ja_JP, en_IN, hi_IN.'
          default: en_US
        sex:
          type: string
          title: Sex
          description: If specified, then only synthetic people of the specified sex
            will be sampled.
        city:
          anyOf:
          - type: string
          - items:
              type: string
            type: array
          title: City
          description: If specified, then only synthetic people from these cities
            will be sampled.
        age_range:
          items:
            type: integer
          type: array
          maxItems: 2
          minItems: 2
          title: Age Range
          description: If specified, then only synthetic people within this age range
            will be sampled.
          default:
          - 18
          - 114
        select_field_values:
          type: object
          additionalProperties:
            items:
              type: string
            type: array
          title: Select Field Values
          description: Sample synthetic people with the specified field values. This
            is meant to be a flexible argument for selecting a subset of the population
            from the managed dataset. Note that this sampler does not support rare
            combinations of field values and will likely fail if your desired subset
            is not well-represented in the managed Nemotron Personas dataset. We generally
            recommend using the `sex`, `city`, and `age_range` arguments to filter
            the population when possible.
          examples:
          - education_level:
            - high_school
            - some_college
            - bachelors
            state:
            - NY
            - CA
            - OH
            - TX
            - NV
        with_synthetic_personas:
          type: boolean
          title: With Synthetic Personas
          description: If True, then append synthetic persona columns to each generated
            person.
          default: false
        sampler_type:
          type: string
          const: person
          title: Sampler Type
          default: person
      additionalProperties: false
      type: object
      title: PersonSamplerParams
      description: "Parameters for sampling synthetic person data with demographic\
        \ attributes.\n\nGenerates realistic synthetic person data including names,\
        \ addresses, phone numbers, and other\ndemographic information. Data can be\
        \ sampled from managed datasets (when available) or generated\nusing Faker.\
        \ The sampler supports filtering by locale, sex, age, geographic location,\
        \ and can\noptionally include synthetic persona descriptions.\n\nAttributes:\n\
        \    locale: Locale string determining the language and geographic region\
        \ for synthetic people.\n        Format: language_COUNTRY (e.g., \"en_US\"\
        , \"en_GB\", \"fr_FR\", \"de_DE\", \"es_ES\", \"ja_JP\").\n        Defaults\
        \ to \"en_US\".\n    sex: If specified, filters to only sample people of the\
        \ specified sex. Options: \"Male\" or\n        \"Female\". If None, samples\
        \ both sexes.\n    city: If specified, filters to only sample people from\
        \ the specified city or cities. Can be\n        a single city name (string)\
        \ or a list of city names.\n    age_range: Two-element list [min_age, max_age]\
        \ specifying the age range to sample from\n        (inclusive). Defaults to\
        \ a standard age range. Both values must be between minimum and\n        maximum\
        \ allowed ages.\n    with_synthetic_personas: If True, appends additional\
        \ synthetic persona columns including\n        personality traits, interests,\
        \ and background descriptions. Only supported for certain\n        locales\
        \ with managed datasets.\n    sample_dataset_when_available: If True, samples\
        \ from curated managed datasets when available\n        for the specified\
        \ locale. If False or unavailable, falls back to Faker-generated data.\n \
        \       Managed datasets typically provide more realistic and diverse synthetic\
        \ people."
    PlatformJobListResultResponse:
      properties:
        object:
          type: string
          title: Object
          description: The type of object being returned.
          default: list
        data:
          items:
            $ref: '#/components/schemas/PlatformJobResultResponse'
          type: array
          title: Data
      type: object
      required:
      - data
      title: PlatformJobListResultResponse
    PlatformJobLog:
      properties:
        timestamp:
          type: string
          format: date-time
          title: Timestamp
        job_id:
          type: string
          title: Job Id
        job_step:
          type: string
          title: Job Step
        job_task:
          type: string
          title: Job Task
        message:
          type: string
          title: Message
      type: object
      required:
      - timestamp
      - job_id
      - job_step
      - job_task
      - message
      title: PlatformJobLog
    PlatformJobLogPage:
      properties:
        object:
          type: string
          title: Object
          description: The type of object being returned.
          default: list
        data:
          items:
            $ref: '#/components/schemas/PlatformJobLog'
          type: array
          title: Data
        total:
          type: integer
          title: Total
        next_page:
          type: string
          title: Next Page
        prev_page:
          type: string
          title: Prev Page
      type: object
      required:
      - data
      - total
      - next_page
      - prev_page
      title: PlatformJobLogPage
    PlatformJobResultResponse:
      properties:
        result_name:
          type: string
          title: Result Name
        job_id:
          type: string
          title: Job Id
        namespace:
          type: string
          title: Namespace
        project:
          type: string
          title: Project
        created_at:
          type: string
          format: date-time
          title: Created At
        updated_at:
          type: string
          format: date-time
          title: Updated At
        artifact_url:
          type: string
          title: Artifact Url
        artifact_storage_type:
          $ref: '#/components/schemas/FileStorageType'
      type: object
      required:
      - result_name
      - job_id
      - namespace
      - artifact_url
      - artifact_storage_type
      title: PlatformJobResultResponse
    PlatformJobStatus:
      type: string
      enum:
      - created
      - pending
      - active
      - cancelled
      - cancelling
      - error
      - completed
      - paused
      - pausing
      - resuming
      title: PlatformJobStatus
      description: 'Enumeration of possible job statuses.


        This enum represents the various states a job can be in during its lifecycle,

        from creation to a terminal state.'
    PlatformJobStatusResponse:
      properties:
        job_id:
          type: string
          title: Job Id
        status:
          $ref: '#/components/schemas/PlatformJobStatus'
        status_details:
          additionalProperties: true
          type: object
          title: Status Details
        error_details:
          type: object
          additionalProperties: true
          title: Error Details
        steps:
          items:
            $ref: '#/components/schemas/PlatformJobStepStatusResponse'
          type: array
          title: Steps
      type: object
      required:
      - job_id
      - status
      - status_details
      - error_details
      - steps
      title: PlatformJobStatusResponse
    PlatformJobStepStatusResponse:
      properties:
        name:
          type: string
          title: Name
        status:
          $ref: '#/components/schemas/PlatformJobStatus'
        status_details:
          additionalProperties: true
          type: object
          title: Status Details
        error_details:
          type: object
          additionalProperties: true
          title: Error Details
        tasks:
          items:
            $ref: '#/components/schemas/PlatformJobTaskStatusResponse'
          type: array
          title: Tasks
      type: object
      required:
      - name
      - status
      - status_details
      - error_details
      - tasks
      title: PlatformJobStepStatusResponse
    PlatformJobTaskStatusResponse:
      properties:
        id:
          type: string
          title: Id
        status:
          $ref: '#/components/schemas/PlatformJobStatus'
        status_details:
          additionalProperties: true
          type: object
          title: Status Details
        error_details:
          type: object
          additionalProperties: true
          title: Error Details
        error_stack:
          type: string
          title: Error Stack
      type: object
      required:
      - id
      - status
      - status_details
      - error_details
      - error_stack
      title: PlatformJobTaskStatusResponse
    PoissonSamplerParams:
      properties:
        mean:
          type: number
          title: Mean
          description: Mean number of events in a fixed interval.
        sampler_type:
          type: string
          const: poisson
          title: Sampler Type
          default: poisson
      additionalProperties: false
      type: object
      required:
      - mean
      title: PoissonSamplerParams
      description: "Parameters for sampling from a Poisson distribution.\n\nSamples\
        \ non-negative integer values representing the number of events occurring\
        \ in a fixed\ninterval of time or space. The Poisson distribution is commonly\
        \ used to model count data\nlike the number of arrivals, occurrences, or events\
        \ per time period.\n\nThe distribution is characterized by a single parameter\
        \ (mean/rate), and both the mean and\nvariance equal this parameter value.\n\
        \nAttributes:\n    mean: Mean number of events in the fixed interval (also\
        \ called rate parameter \u03BB).\n        Must be positive. This represents\
        \ both the expected value and the variance of the\n        distribution."
    PreviewMessage:
      properties:
        message:
          type: string
          title: Message
        message_type:
          $ref: '#/components/schemas/MessageType'
        extra:
          type: object
          additionalProperties:
            type: string
          title: Extra
      additionalProperties: false
      type: object
      required:
      - message
      - message_type
      title: PreviewMessage
    PreviewRequest:
      properties:
        config:
          $ref: '#/components/schemas/DataDesignerConfig'
        num_records:
          type: integer
          title: Num Records
      type: object
      required:
      - config
      title: PreviewRequest
    ProcessorConfig:
      properties:
        build_stage:
          allOf:
          - $ref: '#/components/schemas/BuildStage'
          description: 'The stage at which the processor will run. Supported stages:
            post_batch'
      additionalProperties: false
      type: object
      required:
      - build_stage
      title: ProcessorConfig
    RemoteValidatorParams:
      properties:
        endpoint_url:
          type: string
          title: Endpoint Url
          description: URL of the remote endpoint
        output_schema:
          type: object
          additionalProperties: true
          title: Output Schema
          description: Expected schema for remote validator's output
        timeout:
          type: number
          exclusiveMinimum: 0.0
          title: Timeout
          description: The timeout for the HTTP request
          default: 30.0
        max_retries:
          type: integer
          minimum: 0.0
          title: Max Retries
          description: The maximum number of retry attempts
          default: 3
        retry_backoff:
          type: number
          exclusiveMinimum: 1.0
          title: Retry Backoff
          description: The backoff factor for the retry delay
          default: 2.0
        max_parallel_requests:
          type: integer
          minimum: 1.0
          title: Max Parallel Requests
          description: The maximum number of parallel requests to make
          default: 4
      additionalProperties: false
      type: object
      required:
      - endpoint_url
      title: RemoteValidatorParams
      description: "Configuration for remote validation. Sends data to a remote endpoint\
        \ for validation.\n\nAttributes:\n    endpoint_url: The URL of the remote\
        \ endpoint.\n    output_schema: The JSON schema for the remote validator's\
        \ output. If not provided,\n        the output will not be validated.\n  \
        \  timeout: The timeout for the HTTP request in seconds. Defaults to 30.0.\n\
        \    max_retries: The maximum number of retry attempts. Defaults to 3.\n \
        \   retry_backoff: The backoff factor for the retry delay in seconds. Defaults\
        \ to 2.0.\n    max_parallel_requests: The maximum number of parallel requests\
        \ to make. Defaults to 4."
    SamplerColumnConfig:
      properties:
        name:
          type: string
          title: Name
        drop:
          type: boolean
          title: Drop
          default: false
        column_type:
          type: string
          const: sampler
          title: Column Type
          default: sampler
        sampler_type:
          $ref: '#/components/schemas/SamplerType'
        params:
          oneOf:
          - $ref: '#/components/schemas/SubcategorySamplerParams'
          - $ref: '#/components/schemas/CategorySamplerParams'
          - $ref: '#/components/schemas/DatetimeSamplerParams'
          - $ref: '#/components/schemas/PersonSamplerParams'
          - $ref: '#/components/schemas/PersonFromFakerSamplerParams'
          - $ref: '#/components/schemas/TimeDeltaSamplerParams'
          - $ref: '#/components/schemas/UUIDSamplerParams'
          - $ref: '#/components/schemas/BernoulliSamplerParams'
          - $ref: '#/components/schemas/BernoulliMixtureSamplerParams'
          - $ref: '#/components/schemas/BinomialSamplerParams'
          - $ref: '#/components/schemas/GaussianSamplerParams'
          - $ref: '#/components/schemas/PoissonSamplerParams'
          - $ref: '#/components/schemas/UniformSamplerParams'
          - $ref: '#/components/schemas/ScipySamplerParams'
          title: Params
          discriminator:
            propertyName: sampler_type
            mapping:
              bernoulli: '#/components/schemas/BernoulliSamplerParams'
              bernoulli_mixture: '#/components/schemas/BernoulliMixtureSamplerParams'
              binomial: '#/components/schemas/BinomialSamplerParams'
              category: '#/components/schemas/CategorySamplerParams'
              datetime: '#/components/schemas/DatetimeSamplerParams'
              gaussian: '#/components/schemas/GaussianSamplerParams'
              person: '#/components/schemas/PersonSamplerParams'
              person_from_faker: '#/components/schemas/PersonFromFakerSamplerParams'
              poisson: '#/components/schemas/PoissonSamplerParams'
              scipy: '#/components/schemas/ScipySamplerParams'
              subcategory: '#/components/schemas/SubcategorySamplerParams'
              timedelta: '#/components/schemas/TimeDeltaSamplerParams'
              uniform: '#/components/schemas/UniformSamplerParams'
              uuid: '#/components/schemas/UUIDSamplerParams'
        conditional_params:
          additionalProperties:
            oneOf:
            - $ref: '#/components/schemas/SubcategorySamplerParams'
            - $ref: '#/components/schemas/CategorySamplerParams'
            - $ref: '#/components/schemas/DatetimeSamplerParams'
            - $ref: '#/components/schemas/PersonSamplerParams'
            - $ref: '#/components/schemas/PersonFromFakerSamplerParams'
            - $ref: '#/components/schemas/TimeDeltaSamplerParams'
            - $ref: '#/components/schemas/UUIDSamplerParams'
            - $ref: '#/components/schemas/BernoulliSamplerParams'
            - $ref: '#/components/schemas/BernoulliMixtureSamplerParams'
            - $ref: '#/components/schemas/BinomialSamplerParams'
            - $ref: '#/components/schemas/GaussianSamplerParams'
            - $ref: '#/components/schemas/PoissonSamplerParams'
            - $ref: '#/components/schemas/UniformSamplerParams'
            - $ref: '#/components/schemas/ScipySamplerParams'
            discriminator:
              propertyName: sampler_type
              mapping:
                bernoulli: '#/components/schemas/BernoulliSamplerParams'
                bernoulli_mixture: '#/components/schemas/BernoulliMixtureSamplerParams'
                binomial: '#/components/schemas/BinomialSamplerParams'
                category: '#/components/schemas/CategorySamplerParams'
                datetime: '#/components/schemas/DatetimeSamplerParams'
                gaussian: '#/components/schemas/GaussianSamplerParams'
                person: '#/components/schemas/PersonSamplerParams'
                person_from_faker: '#/components/schemas/PersonFromFakerSamplerParams'
                poisson: '#/components/schemas/PoissonSamplerParams'
                scipy: '#/components/schemas/ScipySamplerParams'
                subcategory: '#/components/schemas/SubcategorySamplerParams'
                timedelta: '#/components/schemas/TimeDeltaSamplerParams'
                uniform: '#/components/schemas/UniformSamplerParams'
                uuid: '#/components/schemas/UUIDSamplerParams'
          type: object
          title: Conditional Params
          default: {}
        convert_to:
          type: string
          title: Convert To
      additionalProperties: false
      type: object
      required:
      - name
      - sampler_type
      - params
      title: SamplerColumnConfig
      description: "Configuration for columns generated using numerical samplers.\n\
        \nSampler columns provide efficient data generation using numerical samplers\
        \ for\ncommon data types and distributions. Supported samplers include UUID\
        \ generation,\ndatetime/timedelta sampling, person generation, category /\
        \ subcategory sampling,\nand various statistical distributions (uniform, gaussian,\
        \ binomial, poisson, scipy).\n\nAttributes:\n    sampler_type: Type of sampler\
        \ to use. Available types include:\n        \"uuid\", \"category\", \"subcategory\"\
        , \"uniform\", \"gaussian\", \"bernoulli\",\n        \"bernoulli_mixture\"\
        , \"binomial\", \"poisson\", \"scipy\", \"person\", \"datetime\", \"timedelta\"\
        .\n    params: Parameters specific to the chosen sampler type. Type varies\
        \ based on the `sampler_type`\n        (e.g., `CategorySamplerParams`, `UniformSamplerParams`,\
        \ `PersonSamplerParams`).\n    conditional_params: Optional dictionary for\
        \ conditional parameters. The dict keys\n        are the conditions that must\
        \ be met (e.g., \"age > 21\") for the conditional parameters\n        to be\
        \ used. The values of dict are the parameters to use when the condition is\
        \ met.\n    convert_to: Optional type conversion to apply after sampling.\
        \ Must be one of \"float\", \"int\", or \"str\".\n        Useful for converting\
        \ numerical samples to strings or other types.\n    column_type: Discriminator\
        \ field, always \"sampler\" for this configuration type.\n\n!!! tip \"Displaying\
        \ available samplers and their parameters\"\n    The config builder has an\
        \ `info` attribute that can be used to display the\n    available samplers\
        \ and their parameters:\n    ```python\n    config_builder.info.display(\"\
        samplers\")\n    ```"
    SamplerType:
      type: string
      enum:
      - bernoulli
      - bernoulli_mixture
      - binomial
      - category
      - datetime
      - gaussian
      - person
      - person_from_faker
      - poisson
      - scipy
      - subcategory
      - timedelta
      - uniform
      - uuid
      title: SamplerType
    SamplingStrategy:
      type: string
      enum:
      - ordered
      - shuffle
      title: SamplingStrategy
    ScalarInequalityConstraint:
      properties:
        target_column:
          type: string
          title: Target Column
        rhs:
          type: number
          title: Rhs
        operator:
          $ref: '#/components/schemas/InequalityOperator'
      additionalProperties: false
      type: object
      required:
      - target_column
      - rhs
      - operator
      title: ScalarInequalityConstraint
    ScipySamplerParams:
      properties:
        dist_name:
          type: string
          title: Dist Name
          description: Name of a scipy.stats distribution.
        dist_params:
          additionalProperties: true
          type: object
          title: Dist Params
          description: Parameters of the scipy.stats distribution given in `dist_name`.
        decimal_places:
          type: integer
          title: Decimal Places
          description: Number of decimal places to round the sampled values to.
        sampler_type:
          type: string
          const: scipy
          title: Sampler Type
          default: scipy
      additionalProperties: false
      type: object
      required:
      - dist_name
      - dist_params
      title: ScipySamplerParams
      description: "Parameters for sampling from any scipy.stats continuous or discrete\
        \ distribution.\n\nProvides a flexible interface to sample from the wide range\
        \ of probability distributions\navailable in scipy.stats. This enables advanced\
        \ statistical sampling beyond the built-in\ndistribution types (Gaussian,\
        \ Uniform, etc.).\n\nSee: [scipy.stats documentation](https://docs.scipy.org/doc/scipy/reference/stats.html)\n\
        \nAttributes:\n    dist_name: Name of the scipy.stats distribution to sample\
        \ from (e.g., \"beta\", \"gamma\",\n        \"lognorm\", \"expon\"). Must\
        \ be a valid distribution name from scipy.stats.\n    dist_params: Dictionary\
        \ of parameters for the specified distribution. Parameter names\n        and\
        \ values must match the scipy.stats distribution specification (e.g., {\"\
        a\": 2, \"b\": 5}\n        for beta distribution, {\"scale\": 1.5} for exponential).\n\
        \    decimal_places: Optional number of decimal places to round sampled values\
        \ to. If None,\n        values are not rounded."
    Score:
      properties:
        name:
          type: string
          title: Name
          description: A clear name for this score.
        description:
          type: string
          title: Description
          description: An informative and detailed assessment guide for using this
            score.
        options:
          additionalProperties:
            type: string
          type: object
          title: Options
          description: 'Score options in the format of {score: description}.'
      additionalProperties: false
      type: object
      required:
      - name
      - description
      - options
      title: Score
      description: "Configuration for a \"score\" in an LLM judge evaluation.\n\n\
        Defines a single scoring criterion with its possible values and descriptions.\
        \ Multiple\nScore objects can be combined in an LLMJudgeColumnConfig to create\
        \ multi-dimensional\nquality assessments.\n\nAttributes:\n    name: A clear,\
        \ concise name for this scoring dimension (e.g., \"Relevance\", \"Fluency\"\
        ).\n    description: An informative and detailed assessment guide explaining\
        \ how to evaluate\n        this dimension. Should provide clear criteria for\
        \ scoring.\n    options: Dictionary mapping score values to their descriptions.\
        \ Keys can be integers\n        (e.g., 1-5 scale) or strings (e.g., \"Poor\"\
        , \"Good\", \"Excellent\"). Values are\n        descriptions explaining what\
        \ each score level means."
    SeedConfig:
      properties:
        dataset:
          type: string
          title: Dataset
        sampling_strategy:
          allOf:
          - $ref: '#/components/schemas/SamplingStrategy'
          default: ordered
        selection_strategy:
          anyOf:
          - $ref: '#/components/schemas/IndexRange'
          - $ref: '#/components/schemas/PartitionBlock'
          title: Selection Strategy
      additionalProperties: false
      type: object
      required:
      - dataset
      title: SeedConfig
      description: "Configuration for sampling data from a seed dataset.\n\nArgs:\n\
        \    dataset: Path or identifier for the seed dataset.\n    sampling_strategy:\
        \ Strategy for how to sample rows from the dataset.\n        - ORDERED: Read\
        \ rows sequentially in their original order.\n        - SHUFFLE: Randomly\
        \ shuffle rows before sampling. When used with\n          selection_strategy,\
        \ shuffling occurs within the selected range/partition.\n    selection_strategy:\
        \ Optional strategy to select a subset of the dataset.\n        - IndexRange:\
        \ Select a specific range of indices (e.g., rows 100-200).\n        - PartitionBlock:\
        \ Select a partition by splitting the dataset into N equal parts.\n      \
        \    Partition indices are zero-based (index=0 is the first partition, index=1\
        \ is\n          the second, etc.).\n\nExamples:\n    Read rows sequentially\
        \ from start to end:\n        SeedConfig(dataset=\"my_data.parquet\", sampling_strategy=SamplingStrategy.ORDERED)\n\
        \n    Read rows in random order:\n        SeedConfig(dataset=\"my_data.parquet\"\
        , sampling_strategy=SamplingStrategy.SHUFFLE)\n\n    Read specific index range\
        \ (rows 100-199):\n        SeedConfig(\n            dataset=\"my_data.parquet\"\
        ,\n            sampling_strategy=SamplingStrategy.ORDERED,\n            selection_strategy=IndexRange(start=100,\
        \ end=199)\n        )\n\n    Read random rows from a specific index range\
        \ (shuffles within rows 100-199):\n        SeedConfig(\n            dataset=\"\
        my_data.parquet\",\n            sampling_strategy=SamplingStrategy.SHUFFLE,\n\
        \            selection_strategy=IndexRange(start=100, end=199)\n        )\n\
        \n    Read from partition 2 (3rd partition, zero-based) of 5 partitions (20%\
        \ of dataset):\n        SeedConfig(\n            dataset=\"my_data.parquet\"\
        ,\n            sampling_strategy=SamplingStrategy.ORDERED,\n            selection_strategy=PartitionBlock(index=2,\
        \ num_partitions=5)\n        )\n\n    Read shuffled rows from partition 0\
        \ of 10 partitions (shuffles within the partition):\n        SeedConfig(\n\
        \            dataset=\"my_data.parquet\",\n            sampling_strategy=SamplingStrategy.SHUFFLE,\n\
        \            selection_strategy=PartitionBlock(index=0, num_partitions=10)\n\
        \        )"
    SeedDatasetColumnConfig:
      properties:
        name:
          type: string
          title: Name
        drop:
          type: boolean
          title: Drop
          default: false
        column_type:
          type: string
          const: seed-dataset
          title: Column Type
          default: seed-dataset
      additionalProperties: false
      type: object
      required:
      - name
      title: SeedDatasetColumnConfig
      description: "Configuration for columns sourced from seed datasets.\n\nThis\
        \ config marks columns that come from seed data. It is typically created\n\
        automatically when calling `with_seed_dataset()` on the builder, rather than\n\
        being instantiated directly by users.\n\nAttributes:\n    column_type: Discriminator\
        \ field, always \"seed-dataset\" for this configuration type."
    SettingsDefaults:
      properties:
        model_configs:
          items:
            $ref: '#/components/schemas/ModelConfigOutput'
          type: array
          title: Model Configs
        model_provider:
          type: string
          title: Model Provider
      type: object
      required:
      - model_configs
      - model_provider
      title: SettingsDefaults
    SettingsResponse:
      properties:
        defaults:
          $ref: '#/components/schemas/SettingsDefaults'
        model_providers:
          items:
            $ref: '#/components/schemas/DisplayModelProvider'
          type: array
          title: Model Providers
      type: object
      required:
      - defaults
      - model_providers
      title: SettingsResponse
    SubcategorySamplerParams:
      properties:
        category:
          type: string
          title: Category
          description: Name of parent category to this subcategory.
        values:
          additionalProperties:
            items:
              anyOf:
              - type: string
              - type: integer
              - type: number
            type: array
          type: object
          title: Values
          description: Mapping from each value of parent category to a list of subcategory
            values.
        sampler_type:
          type: string
          const: subcategory
          title: Sampler Type
          default: subcategory
      additionalProperties: false
      type: object
      required:
      - category
      - values
      title: SubcategorySamplerParams
      description: "Parameters for subcategory sampling conditioned on a parent category\
        \ column.\n\nSamples subcategory values based on the value of a parent category\
        \ column. Each parent\ncategory value maps to its own list of possible subcategory\
        \ values, enabling hierarchical\nor conditional sampling patterns.\n\nAttributes:\n\
        \    category: Name of the parent category column that this subcategory depends\
        \ on.\n        The parent column must be generated before this subcategory\
        \ column.\n    values: Mapping from each parent category value to a list of\
        \ possible subcategory values.\n        Each key must correspond to a value\
        \ that appears in the parent category column."
    TimeDeltaSamplerParams:
      properties:
        dt_min:
          type: integer
          minimum: 0.0
          title: Dt Min
          description: Minimum possible time-delta for sampling range, inclusive.
            Must be less than `dt_max`.
        dt_max:
          type: integer
          exclusiveMinimum: 0.0
          title: Dt Max
          description: Maximum possible time-delta for sampling range, exclusive.
            Must be greater than `dt_min`.
        reference_column_name:
          type: string
          title: Reference Column Name
          description: Name of an existing datetime column to condition time-delta
            sampling on.
        unit:
          type: string
          enum:
          - D
          - h
          - m
          - s
          title: Unit
          description: Sampling units, e.g. the smallest possible time interval between
            samples.
          default: D
        sampler_type:
          type: string
          const: timedelta
          title: Sampler Type
          default: timedelta
      additionalProperties: false
      type: object
      required:
      - dt_min
      - dt_max
      - reference_column_name
      title: TimeDeltaSamplerParams
      description: "Parameters for sampling time deltas relative to a reference datetime\
        \ column.\n\nSamples time offsets within a specified range and adds them to\
        \ values from a reference\ndatetime column. This is useful for generating\
        \ related datetime columns like order dates\nand delivery dates, or event\
        \ start times and end times.\n\nNote:\n    Years and months are not supported\
        \ as timedelta units because they have variable lengths.\n    See: [pandas\
        \ timedelta documentation](https://pandas.pydata.org/docs/user_guide/timedeltas.html)\n\
        \nAttributes:\n    dt_min: Minimum time-delta value (inclusive). Must be non-negative\
        \ and less than `dt_max`.\n        Specified in units defined by the `unit`\
        \ parameter.\n    dt_max: Maximum time-delta value (exclusive). Must be positive\
        \ and greater than `dt_min`.\n        Specified in units defined by the `unit`\
        \ parameter.\n    reference_column_name: Name of an existing datetime column\
        \ to add the time-delta to.\n        This column must be generated before\
        \ the timedelta column.\n    unit: Time unit for the delta values. Options:\n\
        \        - \"D\": Days (default)\n        - \"h\": Hours\n        - \"m\"\
        : Minutes\n        - \"s\": Seconds"
    UUIDSamplerParams:
      properties:
        prefix:
          type: string
          title: Prefix
          description: String prepended to the front of the UUID.
        short_form:
          type: boolean
          title: Short Form
          description: If true, all UUIDs sampled will be truncated at 8 characters.
          default: false
        uppercase:
          type: boolean
          title: Uppercase
          description: If true, all letters in the UUID will be capitalized.
          default: false
        sampler_type:
          type: string
          const: uuid
          title: Sampler Type
          default: uuid
      additionalProperties: false
      type: object
      title: UUIDSamplerParams
      description: "Parameters for generating UUID (Universally Unique Identifier)\
        \ values.\n\nGenerates UUID4 (random) identifiers with optional formatting\
        \ options. UUIDs are useful\nfor creating unique identifiers for records,\
        \ entities, or transactions.\n\nAttributes:\n    prefix: Optional string to\
        \ prepend to each UUID. Useful for creating namespaced or\n        typed identifiers\
        \ (e.g., \"user-\", \"order-\", \"txn-\").\n    short_form: If True, truncates\
        \ UUIDs to 8 characters (first segment only). Default is False\n        for\
        \ full 32-character UUIDs (excluding hyphens).\n    uppercase: If True, converts\
        \ all hexadecimal letters to uppercase. Default is False for\n        lowercase\
        \ UUIDs."
    UniformDistribution:
      properties:
        distribution_type:
          allOf:
          - $ref: '#/components/schemas/DistributionType'
          default: uniform
        params:
          $ref: '#/components/schemas/UniformDistributionParams'
      additionalProperties: false
      type: object
      required:
      - params
      title: UniformDistribution
    UniformDistributionParams:
      properties:
        low:
          type: number
          title: Low
        high:
          type: number
          title: High
      additionalProperties: false
      type: object
      required:
      - low
      - high
      title: UniformDistributionParams
    UniformSamplerParams:
      properties:
        low:
          type: number
          title: Low
          description: Lower bound of the uniform distribution, inclusive.
        high:
          type: number
          title: High
          description: Upper bound of the uniform distribution, inclusive.
        decimal_places:
          type: integer
          title: Decimal Places
          description: Number of decimal places to round the sampled values to.
        sampler_type:
          type: string
          const: uniform
          title: Sampler Type
          default: uniform
      additionalProperties: false
      type: object
      required:
      - low
      - high
      title: UniformSamplerParams
      description: "Parameters for sampling from a continuous Uniform distribution.\n\
        \nSamples continuous values uniformly from a specified range, where every\
        \ value in the range\nhas equal probability of being sampled. This is useful\
        \ when all values within a range are\nequally likely, such as random percentages,\
        \ proportions, or unbiased measurements.\n\nAttributes:\n    low: Lower bound\
        \ of the uniform distribution (inclusive). Can be any real number.\n    high:\
        \ Upper bound of the uniform distribution (inclusive). Must be greater than\
        \ `low`.\n    decimal_places: Optional number of decimal places to round sampled\
        \ values to. If None,\n        values are not rounded and may have many decimal\
        \ places."
    ValidationColumnConfig:
      properties:
        name:
          type: string
          title: Name
        drop:
          type: boolean
          title: Drop
          default: false
        column_type:
          type: string
          const: validation
          title: Column Type
          default: validation
        target_columns:
          items:
            type: string
          type: array
          title: Target Columns
        validator_type:
          $ref: '#/components/schemas/ValidatorType'
        validator_params:
          anyOf:
          - $ref: '#/components/schemas/CodeValidatorParams'
          - $ref: '#/components/schemas/LocalCallableValidatorParams'
          - $ref: '#/components/schemas/RemoteValidatorParams'
          title: Validator Params
        batch_size:
          type: integer
          minimum: 1.0
          title: Batch Size
          description: Number of records to process in each batch
          default: 10
      additionalProperties: false
      type: object
      required:
      - name
      - target_columns
      - validator_type
      - validator_params
      title: ValidationColumnConfig
      description: "Configuration for validation columns that validate existing columns.\n\
        \nValidation columns execute validation logic against specified target columns\
        \ and return\nstructured results indicating pass/fail status with validation\
        \ details. Supports multiple\nvalidation strategies: code execution (Python/SQL),\
        \ local callable functions (library only),\nand remote HTTP endpoints.\n\n\
        Attributes:\n    target_columns: List of column names to validate. These columns\
        \ are passed to the\n        validator for validation. All target columns\
        \ must exist in the dataset\n        before validation runs.\n    validator_type:\
        \ The type of validator to use. Options:\n        - \"code\": Execute code\
        \ (Python or SQL) for validation. The code receives a\n          DataFrame\
        \ with target columns and must return a DataFrame with validation results.\n\
        \        - \"local_callable\": Call a local Python function with the data.\
        \ Only supported\n          when running DataDesigner locally.\n        -\
        \ \"remote\": Send data to a remote HTTP endpoint for validation. Useful for\n\
        \    validator_params: Parameters specific to the validator type. Type varies\
        \ by validator:\n        - CodeValidatorParams: Specifies code language (python\
        \ or SQL dialect like\n          \"sql:postgres\", \"sql:mysql\").\n     \
        \   - LocalCallableValidatorParams: Provides validation function (Callable[[pd.DataFrame],\n\
        \          pd.DataFrame]) and optional output schema for validation results.\n\
        \        - RemoteValidatorParams: Configures endpoint URL, HTTP timeout, retry\
        \ behavior\n          (max_retries, retry_backoff), and parallel request limits\
        \ (max_parallel_requests).\n    batch_size: Number of records to process in\
        \ each validation batch. Defaults to 10.\n        Larger batches are more\
        \ efficient but use more memory. Adjust based on validator\n        complexity\
        \ and available resources.\n    column_type: Discriminator field, always \"\
        validation\" for this configuration type."
    ValidationError:
      properties:
        loc:
          items:
            anyOf:
            - type: string
            - type: integer
          type: array
          title: Location
        msg:
          type: string
          title: Message
        type:
          type: string
          title: Error Type
      type: object
      required:
      - loc
      - msg
      - type
      title: ValidationError
    ValidatorType:
      type: string
      enum:
      - code
      - local_callable
      - remote
      title: ValidatorType
tags:
- name: Data Designer
  description: Operations related to synthetic data generation.
- name: Health Checks
  description: Operations related to NeMo Microservices platform health.


================================================
FILE: studio/frontend/eslint.config.js
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import js from "@eslint/js";
import reactHooks from "eslint-plugin-react-hooks";
import reactRefresh from "eslint-plugin-react-refresh";
import { defineConfig, globalIgnores } from "eslint/config";
import globals from "globals";
import tseslint from "typescript-eslint";

export default defineConfig([
  globalIgnores(["dist", "**/._*"]),
  {
    files: ["**/*.{ts,tsx}"],
    extends: [
      js.configs.recommended,
      tseslint.configs.recommended,
      reactHooks.configs.flat.recommended,
      reactRefresh.configs.vite,
    ],
    languageOptions: {
      ecmaVersion: 2020,
      globals: globals.browser,
    },
    rules: {
      // Allow shadcn ui components to export variants
      "react-refresh/only-export-components": [
        "warn",
        { allowConstantExport: true },
      ],
      // Import restrictions for architecture enforcement
      "no-restricted-imports": [
        "error",
        {
          patterns: [
            // Prevent cross-feature imports
            {
              group: ["@/features/*/*"],
              message: "Import from feature index only: @/features/[name]",
            },
            // Prevent app layer from importing features internals
            {
              group: ["../features/*/**"],
              message: "Use absolute imports: @/features/[name]",
            },
          ],
        },
      ],
    },
  },
]);


================================================
FILE: studio/frontend/index.html
================================================
<!doctype html>
<!-- SPDX-License-Identifier: AGPL-3.0-only -->
<!-- Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0 -->

<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <link rel="icon" type="image/png" href="/favicon.png" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Unsloth Studio</title>
  </head>
  <body>
    <div id="root"></div>
    <script type="module" src="/src/main.tsx"></script>
  </body>
</html>


================================================
FILE: studio/frontend/package.json
================================================
{
  "name": "unsloth-theme",
  "private": true,
  "version": "0.0.0",
  "type": "module",
  "scripts": {
    "dev": "vite",
    "build": "tsc -b && vite build",
    "lint": "eslint .",
    "preview": "vite preview",
    "typecheck": "tsc -b --pretty false",
    "biome:check": "biome check .",
    "biome:fix": "biome check . --write"
  },
  "dependencies": {
    "@assistant-ui/react": "^0.12.19",
    "@assistant-ui/react-markdown": "^0.12.3",
    "@assistant-ui/react-streamdown": "^0.1.2",
    "@base-ui/react": "^1.2.0",
    "@dagrejs/dagre": "^2.0.4",
    "@dagrejs/graphlib": "^3.0.4",
    "@fontsource-variable/figtree": "^5.2.10",
    "@fontsource-variable/inter": "^5.2.8",
    "@fontsource-variable/space-grotesk": "^5.2.10",
    "@hugeicons/core-free-icons": "^3.1.1",
    "@hugeicons/react": "^1.1.5",
    "@huggingface/hub": "^2.9.0",
    "@langchain/core": "^1.1.27",
    "@radix-ui/react-checkbox": "^1.3.3",
    "@radix-ui/react-label": "^2.1.8",
    "@radix-ui/react-select": "^2.2.6",
    "@radix-ui/react-separator": "^1.1.8",
    "@radix-ui/react-slot": "^1.2.4",
    "@streamdown/cjk": "1.0.2",
    "@streamdown/code": "1.0.2",
    "@streamdown/math": "1.0.2",
    "@streamdown/mermaid": "1.0.2",
    "@tailwindcss/vite": "^4.1.18",
    "@tanstack/react-router": "^1.159.10",
    "@tanstack/react-table": "^8.21.3",
    "@toolwind/corner-shape": "^0.0.8-3",
    "@types/canvas-confetti": "^1.9.0",
    "@xyflow/react": "^12.10.0",
    "assistant-stream": "^0.3.2",
    "canvas-confetti": "^1.9.4",
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
    "cmdk": "^1.1.1",
    "date-fns": "^4.1.0",
    "dexie": "^4.3.0",
    "framer-motion": "^11.18.2",
    "js-yaml": "^4.1.1",
    "katex": "^0.16.28",
    "lucide-react": "^0.577.0",
    "mammoth": "^1.11.0",
    "motion": "^12.34.0",
    "next": "^16.1.6",
    "next-themes": "^0.4.6",
    "radix-ui": "^1.4.3",
    "react": "^19.2.4",
    "react-day-picker": "^9.13.2",
    "react-dom": "^19.2.4",
    "react-resizable-panels": "^4.6.4",
    "recharts": "3.7.0",
    "remark-gfm": "^4.0.1",
    "shadcn": "^3.8.4",
    "sonner": "^2.0.7",
    "streamdown": "2.3.0",
    "tailwind-merge": "^3.4.0",
    "tailwindcss": "^4.1.18",
    "tw-animate-css": "^1.4.0",
    "tw-shimmer": "^0.4.6",
    "unpdf": "^1.4.0",
    "zustand": "^5.0.11"
  },
  "devDependencies": {
    "@biomejs/biome": "^1.9.4",
    "@eslint/js": "^9.39.1",
    "@types/js-yaml": "^4.0.9",
    "@types/node": "^24.10.1",
    "@types/react": "^19.2.5",
    "@types/react-dom": "^19.2.3",
    "@vitejs/plugin-react": "^5.1.1",
    "eslint": "^9.39.1",
    "eslint-plugin-react-hooks": "^7.0.1",
    "eslint-plugin-react-refresh": "^0.4.26",
    "globals": "^16.5.0",
    "typescript": "~5.9.3",
    "typescript-eslint": "^8.55.0",
    "vite": "^7.3.1"
  }
}


================================================
FILE: studio/frontend/src/app/app.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { RouterProvider } from "@tanstack/react-router";
import { router } from "./router";

export function App() {
  return <RouterProvider router={router} />;
}


================================================
FILE: studio/frontend/src/app/auth-guards.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { redirect } from "@tanstack/react-router";
import {
  getPostAuthRoute,
  hasAuthToken,
  hasRefreshToken,
  mustChangePassword,
  refreshSession,
} from "@/features/auth";

async function hasActiveSession(): Promise<boolean> {
  if (hasAuthToken()) return true;
  if (!hasRefreshToken()) return false;
  return refreshSession();
}

async function checkAuthInitialized(): Promise<boolean> {
  try {
    const res = await fetch("/api/auth/status");
    if (!res.ok) return true; // fallback to login on error
    const data = (await res.json()) as { initialized: boolean };
    return data.initialized;
  } catch {
    return true; // fallback to login on error
  }
}

async function checkPasswordChangeRequired(): Promise<boolean> {
  try {
    const res = await fetch("/api/auth/status");
    if (!res.ok) return mustChangePassword();
    const data = (await res.json()) as { requires_password_change: boolean };
    return data.requires_password_change || mustChangePassword();
  } catch {
    return mustChangePassword();
  }
}

export async function requireAuth(): Promise<void> {
  if (await hasActiveSession()) {
    if (await checkPasswordChangeRequired()) {
      throw redirect({ to: "/change-password" });
    }
    return;
  }
  const requiresPasswordChange = await checkPasswordChangeRequired();
  if (requiresPasswordChange) throw redirect({ to: "/change-password" });
  const initialized = await checkAuthInitialized();
  throw redirect({ to: initialized ? "/login" : "/change-password" });
}

export async function requireGuest(): Promise<void> {
  if (!(await hasActiveSession())) return;
  throw redirect({ to: getPostAuthRoute() });
}

export async function requirePasswordChangeFlow(): Promise<void> {
  const requiresPasswordChange = await checkPasswordChangeRequired();

  if (requiresPasswordChange) return;

  if (await hasActiveSession()) {
    throw redirect({ to: getPostAuthRoute() });
  }

  const initialized = await checkAuthInitialized();
  throw redirect({ to: initialized ? "/login" : "/change-password" });
}


================================================
FILE: studio/frontend/src/app/provider.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Toaster } from "@/components/ui/sonner";
import { ThemeProvider } from "next-themes";
import type { ReactNode } from "react";

interface AppProviderProps {
  children: ReactNode;
}

export function AppProvider({ children }: AppProviderProps) {
  return (
    <ThemeProvider attribute="class" defaultTheme="light">
      {children}
      <Toaster position="top-right" visibleToasts={2} expand={true} />
    </ThemeProvider>
  );
}


================================================
FILE: studio/frontend/src/app/router.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { createRouter } from "@tanstack/react-router";
import { Route as rootRoute } from "./routes/__root";
import { Route as dataRecipesRoute } from "./routes/data-recipes";
import { Route as dataRecipeRoute } from "./routes/data-recipes.$recipeId";
import { Route as chatRoute } from "./routes/chat";
import { Route as exportRoute } from "./routes/export";
import { Route as gridTestRoute } from "./routes/grid-test";
import { Route as indexRoute } from "./routes/index";
import { Route as loginRoute } from "./routes/login";
import { Route as onboardingRoute } from "./routes/onboarding";
import { Route as changePasswordRoute } from "./routes/change-password";
import { Route as studioRoute } from "./routes/studio";

const routeTree = rootRoute.addChildren([
  indexRoute,
  onboardingRoute,
  loginRoute,
  changePasswordRoute,
  gridTestRoute,
  studioRoute,
  chatRoute,
  exportRoute,
  dataRecipesRoute,
  dataRecipeRoute,
]);

export const router = createRouter({ routeTree });

declare module "@tanstack/react-router" {
  interface Register {
    router: typeof router;
  }
}


================================================
FILE: studio/frontend/src/app/routes/__root.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Navbar } from "@/components/navbar";
import { usePlatformStore } from "@/config/env";
import {
  Outlet,
  createRootRoute,
  redirect,
  useRouterState,
} from "@tanstack/react-router";
import { AnimatePresence, motion } from "motion/react";
import { Suspense } from "react";
import { AppProvider } from "../provider";

const CHAT_ONLY_ALLOWED = new Set(["/", "/chat", "/login", "/signup", "/change-password"]);

function isChatOnlyAllowed(pathname: string): boolean {
  if (CHAT_ONLY_ALLOWED.has(pathname)) return true;
  if (pathname === "/data-recipes" || pathname.startsWith("/data-recipes/")) return true;
  return false;
}

export const Route = createRootRoute({
  beforeLoad: ({ location }) => {
    const chatOnly = usePlatformStore.getState().isChatOnly();
    if (chatOnly && !isChatOnlyAllowed(location.pathname)) {
      throw redirect({ to: "/chat" });
    }
  },
  component: RootLayout,
});

const HIDDEN_NAVBAR_ROUTES = ["/onboarding", "/login", "/change-password"];

function RootLayout() {
  const pathname = useRouterState({ select: (s) => s.location.pathname });
  const hideNavbar = HIDDEN_NAVBAR_ROUTES.includes(pathname);

  return (
    <AppProvider>
      {!hideNavbar && <Navbar />}
      <AnimatePresence initial={false}>
        <motion.div
          key={pathname}
          initial={{ opacity: 0 }}
          animate={{ opacity: 1 }}
          transition={{ duration: 0.15 }}
          className="flex-1"
        >
          <Suspense fallback={null}>
            <Outlet />
          </Suspense>
        </motion.div>
      </AnimatePresence>
    </AppProvider>
  );
}


================================================
FILE: studio/frontend/src/app/routes/change-password.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { createRoute } from "@tanstack/react-router";
import { lazy } from "react";
import { requirePasswordChangeFlow } from "../auth-guards";
import { Route as rootRoute } from "./__root";

const ChangePasswordPage = lazy(() =>
  import("@/features/auth").then((m) => ({
    default: m.ChangePasswordPage,
  })),
);

export const Route = createRoute({
  getParentRoute: () => rootRoute,
  path: "/change-password",
  beforeLoad: () => requirePasswordChangeFlow(),
  component: ChangePasswordPage,
});


================================================
FILE: studio/frontend/src/app/routes/chat.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { createRoute } from "@tanstack/react-router";
import { lazy } from "react";
import { requireAuth } from "../auth-guards";
import { Route as rootRoute } from "./__root";

const ChatPage = lazy(() =>
  import("@/features/chat/chat-page").then((m) => ({ default: m.ChatPage })),
);

export const Route = createRoute({
  getParentRoute: () => rootRoute,
  path: "/chat",
  beforeLoad: () => requireAuth(),
  component: ChatPage,
});


================================================
FILE: studio/frontend/src/app/routes/data-recipes.$recipeId.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { createRoute } from "@tanstack/react-router";
import type { ReactElement } from "react";
import { lazy } from "react";
import { requireAuth } from "../auth-guards";
import { Route as rootRoute } from "./__root";

const EditRecipePage = lazy(() =>
  import("@/features/data-recipes").then((m) => ({
    default: m.EditRecipePage,
  })),
);

export const Route = createRoute({
  getParentRoute: () => rootRoute,
  path: "/data-recipes/$recipeId",
  beforeLoad: () => requireAuth(),
  component: DataRecipeEditorRoute,
});

function DataRecipeEditorRoute(): ReactElement {
  const { recipeId } = Route.useParams();
  return <EditRecipePage recipeId={recipeId} />;
}


================================================
FILE: studio/frontend/src/app/routes/data-recipes.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { createRoute } from "@tanstack/react-router";
import { lazy } from "react";
import { requireAuth } from "../auth-guards";
import { Route as rootRoute } from "./__root";

const DataRecipesPage = lazy(() =>
  import("@/features/data-recipes").then((m) => ({
    default: m.DataRecipesPage,
  })),
);

export const Route = createRoute({
  getParentRoute: () => rootRoute,
  path: "/data-recipes",
  beforeLoad: () => requireAuth(),
  component: DataRecipesPage,
});


================================================
FILE: studio/frontend/src/app/routes/export.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { createRoute } from "@tanstack/react-router";
import { lazy } from "react";
import { requireAuth } from "../auth-guards";
import { Route as rootRoute } from "./__root";

const ExportPage = lazy(() =>
  import("@/features/export/export-page").then((m) => ({
    default: m.ExportPage,
  })),
);

export const Route = createRoute({
  getParentRoute: () => rootRoute,
  path: "/export",
  beforeLoad: () => requireAuth(),
  component: ExportPage,
});


================================================
FILE: studio/frontend/src/app/routes/grid-test.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { DashboardGrid, DashboardLayout } from "@/components/layout";
import {
  Card,
  CardContent,
  CardDescription,
  CardHeader,
  CardTitle,
} from "@/components/ui/card";
import { createRoute } from "@tanstack/react-router";
import { requireAuth } from "../auth-guards";
import { Route as rootRoute } from "./__root";

export const Route = createRoute({
  getParentRoute: () => rootRoute,
  path: "/grid-test",
  beforeLoad: () => requireAuth(),
  component: GridTestPage,
});

function GridTestPage() {
  return (
    <DashboardLayout>
      <div className="space-y-8">
        <div>
          <h1 className="text-2xl font-semibold">Grid Test - 3 Columns</h1>
          <p className="text-muted-foreground">
            max-w-7xl, gap-6, responsive 1→2→3
          </p>
        </div>

        <DashboardGrid cols={3}>
          {[1, 2, 3].map((i) => (
            <Card key={i}>
              <CardHeader>
                <CardTitle>Card {i}</CardTitle>
                <CardDescription>~400px at 1280px viewport</CardDescription>
              </CardHeader>
              <CardContent>
                <div className="h-24 rounded-lg bg-muted" />
              </CardContent>
            </Card>
          ))}
        </DashboardGrid>

        <div>
          <h2 className="text-xl font-semibold">4 Columns</h2>
          <p className="text-muted-foreground">~296px per card at 1280px</p>
        </div>

        <DashboardGrid cols={4}>
          {[1, 2, 3, 4].map((i) => (
            <Card key={i} size="sm">
              <CardHeader>
                <CardTitle>Card {i}</CardTitle>
                <CardDescription>Smaller cards</CardDescription>
              </CardHeader>
              <CardContent>
                <div className="h-16 rounded-lg bg-muted" />
              </CardContent>
            </Card>
          ))}
        </DashboardGrid>
      </div>
    </DashboardLayout>
  );
}


================================================
FILE: studio/frontend/src/app/routes/index.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { createRoute, redirect } from "@tanstack/react-router";
import { getPostAuthRoute } from "@/features/auth";
import { requireAuth } from "../auth-guards";
import { Route as rootRoute } from "./__root";

export const Route = createRoute({
  getParentRoute: () => rootRoute,
  path: "/",
  beforeLoad: async () => {
    await requireAuth();
    throw redirect({ to: getPostAuthRoute() });
  },
  component: () => null,
});


================================================
FILE: studio/frontend/src/app/routes/login.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { createRoute } from "@tanstack/react-router";
import { lazy } from "react";
import { requireGuest } from "../auth-guards";
import { Route as rootRoute } from "./__root";

const LoginPage = lazy(() =>
  import("@/features/auth").then((m) => ({ default: m.LoginPage })),
);

export const Route = createRoute({
  getParentRoute: () => rootRoute,
  path: "/login",
  beforeLoad: () => requireGuest(),
  component: LoginPage,
});


================================================
FILE: studio/frontend/src/app/routes/onboarding.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { createRoute } from "@tanstack/react-router";
import { lazy } from "react";
import { requireAuth } from "../auth-guards";
import { Route as rootRoute } from "./__root";

const WizardLayout = lazy(() =>
  import("@/features/onboarding/components/wizard-layout").then((m) => ({
    default: m.WizardLayout,
  })),
);

export const Route = createRoute({
  getParentRoute: () => rootRoute,
  path: "/onboarding",
  beforeLoad: () => requireAuth(),
  component: WizardLayout,
});


================================================
FILE: studio/frontend/src/app/routes/studio.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { createRoute } from "@tanstack/react-router";
import { lazy } from "react";
import { requireAuth } from "../auth-guards";
import { Route as rootRoute } from "./__root";

const StudioPage = lazy(() =>
  import("@/features/studio/studio-page").then((m) => ({
    default: m.StudioPage,
  })),
);

export const Route = createRoute({
  getParentRoute: () => rootRoute,
  path: "/studio",
  beforeLoad: () => requireAuth(),
  component: StudioPage,
});


================================================
FILE: studio/frontend/src/components/assistant-ui/attachment.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

// Avatar removed — caused circular crop on image thumbnails
import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button";
import {
  Dialog,
  DialogContent,
  DialogTitle,
  DialogTrigger,
} from "@/components/ui/dialog";
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import { cn } from "@/lib/utils";
import {
  AttachmentPrimitive,
  ComposerPrimitive,
  MessagePrimitive,
  useAui,
  useAuiState,
} from "@assistant-ui/react";
import { FileText, PlusIcon, XIcon } from "lucide-react";
import {
  type FC,
  type PropsWithChildren,
  useEffect,
  useState,
} from "react";
import { useShallow } from "zustand/shallow";

const useFileSrc = (file: File | undefined): string | undefined => {
  const [objectUrl, setObjectUrl] = useState<string | undefined>(undefined);

  useEffect(() => {
    if (!file) {
      setObjectUrl(undefined);
      return;
    }
    const url = URL.createObjectURL(file);
    setObjectUrl(url);
    return () => URL.revokeObjectURL(url);
  }, [file]);

  return objectUrl;
};

const useAttachmentSrc = (): string | undefined => {
  const { file, src } = useAuiState(
    useShallow(({ attachment }): { file?: File; src?: string } => {
      if (attachment.type !== "image") {
        return {};
      }
      if (attachment.file) {
        return { file: attachment.file };
      }
      const src = attachment.content?.filter((c) => c.type === "image")[0]
        ?.image;
      if (!src) {
        return {};
      }
      return { src };
    }),
  );

  return useFileSrc(file) ?? src;
};

type AttachmentPreviewProps = {
  src: string;
};

const AttachmentPreview: FC<AttachmentPreviewProps> = ({ src }) => {
  const [isLoaded, setIsLoaded] = useState(false);
  return (
    <img
      src={src}
      alt="Preview"
      className={cn(
        "block h-auto max-h-[80vh] w-auto max-w-full object-contain",
        isLoaded
          ? "aui-attachment-preview-image-loaded"
          : "aui-attachment-preview-image-loading invisible",
      )}
      onLoad={() => setIsLoaded(true)}
    />
  );
};

const AttachmentPreviewDialog: FC<PropsWithChildren> = ({ children }) => {
  const src = useAttachmentSrc();

  if (!src) {
    return children;
  }

  return (
    <Dialog>
      <DialogTrigger
        className="aui-attachment-preview-trigger cursor-pointer transition-colors hover:bg-accent/50"
        asChild={true}
      >
        {children}
      </DialogTrigger>
      <DialogContent className="aui-attachment-preview-dialog-content p-2 sm:max-w-3xl [&>button]:rounded-full [&>button]:bg-foreground/60 [&>button]:p-1 [&>button]:opacity-100 [&>button]:ring-0! [&_svg]:text-background [&>button]:hover:[&_svg]:text-destructive">
        <DialogTitle className="aui-sr-only sr-only">
          Image Attachment Preview
        </DialogTitle>
        <div className="aui-attachment-preview relative mx-auto flex max-h-[80dvh] w-full items-center justify-center overflow-hidden bg-background">
          <AttachmentPreview src={src} />
        </div>
      </DialogContent>
    </Dialog>
  );
};

const AttachmentThumb: FC = () => {
  const src = useAttachmentSrc();

  if (src) {
    return (
      <img
        src={src}
        alt="Attachment preview"
        className="h-full w-full object-cover"
      />
    );
  }

  return (
    <div className="flex h-full w-full items-center justify-center">
      <FileText className="size-6 text-muted-foreground" />
    </div>
  );
};

const AttachmentUI: FC = () => {
  const aui = useAui();
  const isComposer = aui.attachment.source === "composer";

  const isImage = useAuiState(({ attachment }) => attachment.type === "image");
  const typeLabel = useAuiState(({ attachment }) => {
    const type = attachment.type;
    switch (type) {
      case "image":
        return "Image";
      case "document":
        return "Document";
      case "file":
        return "File";
      default:
        throw new Error(`Unknown attachment type: ${type as string}`);
    }
  });

  return (
    <Tooltip>
      <AttachmentPrimitive.Root
        className={cn(
          "aui-attachment-root relative",
          isImage &&
            "aui-attachment-root-composer only:[&>#attachment-tile]:size-16",
        )}
      >
        <AttachmentPreviewDialog>
          <TooltipTrigger asChild={true}>
            <button
              className={cn(
                "aui-attachment-tile size-14 cursor-pointer overflow-hidden rounded-[14px] border bg-muted transition-opacity hover:opacity-75",
                isComposer &&
                  "aui-attachment-tile-composer border-foreground/20",
              )}
              id="attachment-tile"
              aria-label={`${typeLabel} attachment`}
              type="button"
            >
              <AttachmentThumb />
            </button>
          </TooltipTrigger>
        </AttachmentPreviewDialog>
        {isComposer && <AttachmentRemove />}
      </AttachmentPrimitive.Root>
      <TooltipContent side="top">
        <AttachmentPrimitive.Name />
      </TooltipContent>
    </Tooltip>
  );
};

const AttachmentRemove: FC = () => {
  return (
    <AttachmentPrimitive.Remove asChild={true}>
      <TooltipIconButton
        tooltip="Remove file"
        className="aui-attachment-tile-remove absolute top-1.5 right-1.5 size-3.5 rounded-full bg-white text-muted-foreground opacity-100 shadow-sm hover:bg-white! [&_svg]:text-black hover:[&_svg]:text-destructive"
        side="top"
      >
        <XIcon className="aui-attachment-remove-icon size-3 dark:stroke-[2.5px]" />
      </TooltipIconButton>
    </AttachmentPrimitive.Remove>
  );
};

export const UserMessageAttachments: FC = () => {
  return (
    <div className="aui-user-message-attachments-end col-span-full col-start-1 row-start-1 flex w-full flex-row justify-end gap-2">
      <MessagePrimitive.Attachments components={{ Attachment: AttachmentUI }} />
    </div>
  );
};

export const ComposerAttachments: FC = () => {
  return (
    <div className="aui-composer-attachments mb-2 flex w-full flex-row items-center gap-2 overflow-x-auto px-1.5 pt-0.5 pb-1 empty:hidden">
      <ComposerPrimitive.Attachments
        components={{ Attachment: AttachmentUI }}
      />
    </div>
  );
};

export const ComposerAddAttachment: FC = () => {
  return (
    <ComposerPrimitive.AddAttachment asChild={true}>
      <TooltipIconButton
        tooltip="Add Attachment"
        side="bottom"
        variant="ghost"
        size="icon"
        className="aui-composer-add-attachment size-8.5 rounded-full p-1 font-semibold text-xs hover:bg-muted-foreground/15 dark:border-muted-foreground/15 dark:hover:bg-muted-foreground/30"
        aria-label="Add Attachment"
      >
        <PlusIcon className="aui-attachment-add-icon size-5 stroke-[1.5px]" />
      </TooltipIconButton>
    </ComposerPrimitive.AddAttachment>
  );
};


================================================
FILE: studio/frontend/src/components/assistant-ui/audio-player.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import { Button } from "@/components/ui/button";
import { DownloadIcon, PauseIcon, PlayIcon } from "lucide-react";
import { type FC, useRef, useState } from "react";

interface AudioPlayerProps {
  src: string;
}

export const AudioPlayer: FC<AudioPlayerProps> = ({ src }) => {
  const audioRef = useRef<HTMLAudioElement>(null);
  const [isPlaying, setIsPlaying] = useState(false);
  const [progress, setProgress] = useState(0);
  const [duration, setDuration] = useState(0);

  const togglePlay = () => {
    const audio = audioRef.current;
    if (!audio) return;
    if (isPlaying) {
      audio.pause();
    } else {
      audio.play();
    }
    setIsPlaying(!isPlaying);
  };

  const handleTimeUpdate = () => {
    const audio = audioRef.current;
    if (!audio) return;
    setProgress(audio.currentTime);
  };

  const handleLoadedMetadata = () => {
    const audio = audioRef.current;
    if (!audio) return;
    setDuration(audio.duration);
  };

  const handleEnded = () => {
    setIsPlaying(false);
    setProgress(0);
  };

  const handleSeek = (e: React.ChangeEvent<HTMLInputElement>) => {
    const audio = audioRef.current;
    if (!audio) return;
    const time = parseFloat(e.target.value);
    audio.currentTime = time;
    setProgress(time);
  };

  const handleDownload = () => {
    const link = document.createElement("a");
    link.href = src;
    link.download = "generated-audio.wav";
    link.click();
  };

  const formatTime = (t: number) => {
    const mins = Math.floor(t / 60);
    const secs = Math.floor(t % 60);
    return `${mins}:${secs.toString().padStart(2, "0")}`;
  };

  return (
    <div className="my-2 flex max-w-md items-center gap-3 rounded-xl border bg-muted/50 px-4 py-3">
      <audio
        ref={audioRef}
        src={src}
        onTimeUpdate={handleTimeUpdate}
        onLoadedMetadata={handleLoadedMetadata}
        onEnded={handleEnded}
        preload="metadata"
      />
      <Button
        variant="ghost"
        size="icon"
        className="size-8 shrink-0 rounded-full"
        onClick={togglePlay}
      >
        {isPlaying ? (
          <PauseIcon className="size-4" />
        ) : (
          <PlayIcon className="size-4" />
        )}
      </Button>
      <div className="flex flex-1 flex-col gap-1">
        <input
          type="range"
          min={0}
          max={duration || 0}
          step={0.01}
          value={progress}
          onChange={handleSeek}
          className="h-1.5 w-full cursor-pointer accent-primary"
        />
        <div className="flex justify-between text-[10px] text-muted-foreground">
          <span>{formatTime(progress)}</span>
          <span>{formatTime(duration)}</span>
        </div>
      </div>
      <Button
        variant="ghost"
        size="icon"
        className="size-7 shrink-0 text-muted-foreground"
        onClick={handleDownload}
        title="Download audio"
      >
        <DownloadIcon className="size-3.5" />
      </Button>
    </div>
  );
};


================================================
FILE: studio/frontend/src/components/assistant-ui/badge.tsx
================================================
"use client";

import type { ComponentProps } from "react";
import { Slot } from "radix-ui";
import { cva, type VariantProps } from "class-variance-authority";
import { cn } from "@/lib/utils";

const badgeVariants = cva(
  "inline-flex items-center justify-center gap-1 rounded-md font-medium text-xs transition-colors [&_svg]:size-3 [&_svg]:shrink-0",
  {
    variants: {
      variant: {
        outline:
          "border border-input bg-transparent text-muted-foreground hover:bg-accent hover:text-accent-foreground",
        secondary:
          "bg-secondary text-secondary-foreground hover:bg-secondary/80",
        muted:
          "bg-muted text-muted-foreground hover:bg-muted/80 hover:text-foreground",
        ghost:
          "bg-transparent text-muted-foreground hover:bg-accent hover:text-accent-foreground",
        info: "bg-blue-100 text-blue-700 hover:bg-blue-100/80 dark:bg-blue-900/50 dark:text-blue-300",
        warning:
          "bg-amber-100 text-amber-700 hover:bg-amber-100/80 dark:bg-amber-900/50 dark:text-amber-300",
        success:
          "bg-emerald-100 text-emerald-700 hover:bg-emerald-100/80 dark:bg-emerald-900/50 dark:text-emerald-300",
        destructive:
          "bg-red-100 text-red-700 hover:bg-red-100/80 dark:bg-red-900/50 dark:text-red-300",
      },
      size: {
        sm: "px-1.5 py-0.5",
        default: "px-2 py-1",
        lg: "px-2.5 py-1.5 text-sm",
      },
    },
    defaultVariants: {
      variant: "outline",
      size: "default",
    },
  },
);

export type BadgeProps = ComponentProps<"span"> &
  VariantProps<typeof badgeVariants> & {
    asChild?: boolean;
  };

function Badge({
  className,
  variant,
  size,
  asChild = false,
  ...props
}: BadgeProps) {
  const Comp = asChild ? Slot.Root : "span";

  return (
    <Comp
      data-slot="badge"
      data-variant={variant}
      data-size={size}
      className={cn(badgeVariants({ variant, size }), className)}
      {...props}
    />
  );
}

export { Badge, badgeVariants };


================================================
FILE: studio/frontend/src/components/assistant-ui/markdown-text.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import { copyToClipboard } from "@/lib/copy-to-clipboard";
import { INTERNAL, useMessagePartText } from "@assistant-ui/react";
import { Copy02Icon, Tick02Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { code } from "@streamdown/code";
import { math } from "@streamdown/math";
import { mermaid } from "@streamdown/mermaid";
import { DownloadIcon, Maximize2Icon, Minimize2Icon } from "lucide-react";
import { useEffect, useRef, useState } from "react";
import { Block, type BlockProps, Streamdown } from "streamdown";
import "katex/dist/katex.min.css";
import { AudioPlayer } from "./audio-player";

const { withSmoothContextProvider } = INTERNAL;
const COPY_RESET_MS = 2000;
const MERMAID_SOURCE_RE = /```mermaid\s*([\s\S]*?)```/i;
const CODE_FENCE_RE = /^```([^\r\n`]*)\r?\n([\s\S]*?)\r?\n?```$/;
const ACTION_PANEL_CLASS =
  "pointer-events-auto flex shrink-0 items-center gap-2 rounded-md border border-sidebar bg-sidebar/80 px-1.5 py-1 supports-[backdrop-filter]:bg-sidebar/70 supports-[backdrop-filter]:backdrop-blur";
const ACTION_BUTTON_CLASS =
  "cursor-pointer p-1 text-muted-foreground transition-all hover:text-foreground disabled:cursor-not-allowed disabled:opacity-50";

type CodeFence = {
  language: string | null;
  source: string;
};

function getMermaidSource(blockContent: string): string | null {
  const source = blockContent.match(MERMAID_SOURCE_RE)?.[1]?.trim();
  return source && source.length > 0 ? source : null;
}

function getCodeFence(blockContent: string): CodeFence | null {
  const match = blockContent.trimEnd().match(CODE_FENCE_RE);
  if (!match) {
    return null;
  }

  return {
    language: match[1]?.trim() || null,
    source: match[2],
  };
}

function getCodeFilename(language: string | null) {
  const extByLanguage: Record<string, string> = {
    bash: "sh",
    javascript: "js",
    js: "js",
    json: "json",
    jsx: "jsx",
    markdown: "md",
    md: "md",
    python: "py",
    py: "py",
    shell: "sh",
    sh: "sh",
    sql: "sql",
    ts: "ts",
    tsx: "tsx",
    typescript: "ts",
    svg: "svg",
    yaml: "yml",
    yml: "yml",
  };

  const normalized = language?.toLowerCase();
  const fallbackExt = normalized?.replace(/[^a-z0-9]+/g, "-");
  const ext = normalized
    ? extByLanguage[normalized] || fallbackExt || "txt"
    : "txt";
  return `snippet.${ext}`;
}

function isSvgFence(codeFence: CodeFence): boolean {
  const lang = codeFence.language?.toLowerCase() ?? "";
  if (lang === "svg") return true;
  if ((lang === "xml" || lang === "html") && codeFence.source.trimStart().startsWith("<svg")) return true;
  return false;
}

function isHtmlFence(codeFence: CodeFence): boolean {
  const lang = codeFence.language?.toLowerCase() ?? "";
  return lang === "html" && !codeFence.source.trimStart().startsWith("<svg");
}

const UNSAFE_SVG_RE = /<script[\s>]|on\w+\s*=|javascript:|<foreignObject[\s>]|<iframe[\s>]|<embed[\s>]|<object[\s>]/i;

function sanitizeSvg(source: string): string | null {
  if (UNSAFE_SVG_RE.test(source)) return null;
  return source;
}

function SvgPreview({ source }: { source: string }) {
  const dataUri = `data:image/svg+xml;charset=utf-8,${encodeURIComponent(source)}`;
  return (
    <div className="mt-2 flex justify-center rounded-lg border border-border bg-white p-4 dark:bg-neutral-100">
      <img
        src={dataUri}
        alt="SVG preview"
        style={{ maxWidth: "100%", maxHeight: 512 }}
      />
    </div>
  );
}

const HTML_PREVIEW_DEFAULT_HEIGHT = 400;
const HTML_PREVIEW_MAX_HEIGHT = 800;

function HtmlPreview({ source }: { source: string }) {
  const iframeRef = useRef<HTMLIFrameElement>(null);
  const [height, setHeight] = useState(HTML_PREVIEW_DEFAULT_HEIGHT);
  const [enlarged, setEnlarged] = useState(false);

  useEffect(() => {
    const handler = (e: MessageEvent) => {
      if (e.source !== iframeRef.current?.contentWindow) return;
      if (typeof e.data?.htmlPreviewHeight === "number") {
        setHeight(Math.min(Math.max(e.data.htmlPreviewHeight, 100), HTML_PREVIEW_MAX_HEIGHT));
      }
    };
    window.addEventListener("message", handler);
    return () => window.removeEventListener("message", handler);
  }, []);

  useEffect(() => {
    if (!enlarged) return;
    const handler = (e: KeyboardEvent) => {
      if (e.key === "Escape") setEnlarged(false);
    };
    window.addEventListener("keydown", handler);
    return () => window.removeEventListener("keydown", handler);
  }, [enlarged]);

  const resizeScript = `<script>new ResizeObserver(()=>{
parent.postMessage({htmlPreviewHeight:document.documentElement.scrollHeight},"*");
}).observe(document.documentElement);</script>`;

  const srcDoc = source + resizeScript;

  if (enlarged) {
    return (
      <>
        <div className="mt-2 overflow-hidden rounded-lg border border-border" style={{ height }}>
          {/* Placeholder keeps layout stable while overlay is shown */}
        </div>
        <div
          className="fixed inset-0 z-50 flex flex-col bg-background/80 backdrop-blur-sm"
          onClick={(e) => { if (e.target === e.currentTarget) setEnlarged(false); }}
        >
          <div className="flex items-center justify-end gap-2 px-4 py-2">
            <button
              type="button"
              className="flex items-center gap-1.5 rounded-md border border-border bg-background px-3 py-1.5 text-sm text-muted-foreground transition-colors hover:bg-muted hover:text-foreground"
              onClick={() => setEnlarged(false)}
              title="Exit fullscreen (Esc)"
            >
              <Minimize2Icon className="size-4" />
              Exit fullscreen
            </button>
          </div>
          <div className="mx-4 mb-4 flex-1 overflow-hidden rounded-lg border border-border bg-background">
            <iframe
              ref={iframeRef}
              srcDoc={srcDoc}
              sandbox="allow-scripts"
              style={{ width: "100%", height: "100%", border: "none", display: "block" }}
              title="HTML preview"
            />
          </div>
        </div>
      </>
    );
  }

  return (
    <div className="group/html-preview relative mt-2 overflow-hidden rounded-lg border border-border">
      <button
        type="button"
        className="absolute top-2 right-2 z-10 rounded-md border border-border bg-background/80 p-1.5 text-muted-foreground opacity-0 transition-all hover:bg-muted hover:text-foreground group-hover/html-preview:opacity-100 supports-[backdrop-filter]:backdrop-blur"
        onClick={() => setEnlarged(true)}
        title="Enlarge preview"
      >
        <Maximize2Icon className="size-4" />
      </button>
      <iframe
        ref={iframeRef}
        srcDoc={srcDoc}
        sandbox="allow-scripts"
        style={{ width: "100%", height, border: "none", display: "block" }}
        title="HTML preview"
      />
    </div>
  );
}

function downloadTextFile(filename: string, text: string): void {
  const blob = new Blob([text], { type: "text/plain;charset=utf-8" });
  const url = URL.createObjectURL(blob);
  const anchor = document.createElement("a");
  anchor.href = url;
  anchor.download = filename;
  document.body.appendChild(anchor);
  anchor.click();
  document.body.removeChild(anchor);
  window.setTimeout(() => URL.revokeObjectURL(url), 0);
}

function useCopiedState() {
  const [copied, setCopied] = useState(false);
  const resetTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null);

  useEffect(() => {
    return () => {
      if (resetTimeoutRef.current) {
        clearTimeout(resetTimeoutRef.current);
      }
    };
  }, []);

  const showCopied = () => {
    setCopied(true);
    if (resetTimeoutRef.current) {
      clearTimeout(resetTimeoutRef.current);
    }
    resetTimeoutRef.current = setTimeout(() => {
      setCopied(false);
      resetTimeoutRef.current = null;
    }, COPY_RESET_MS);
  };

  return { copied, showCopied };
}

function MermaidCopyButton({ source }: { source: string }) {
  const { copied, showCopied } = useCopiedState();

  return (
    <button
      type="button"
      className="absolute top-3.5 right-20 z-20 cursor-pointer text-muted-foreground transition-all hover:text-foreground"
      title="Copy Mermaid source"
      onClick={() => {
        if (!copyToClipboard(source)) {
          return;
        }
        showCopied();
      }}
    >
      <HugeiconsIcon
        icon={copied ? Tick02Icon : Copy02Icon}
        className="size-5"
      />
    </button>
  );
}

function CodeBlockActions({
  disabled,
  language,
  source,
}: {
  disabled: boolean;
  language: string | null;
  source: string;
}) {
  const { copied, showCopied } = useCopiedState();

  return (
    <div className="pointer-events-none absolute top-3.5 right-3 z-20 flex items-center justify-end">
      <div className={ACTION_PANEL_CLASS}>
        <button
          type="button"
          className={ACTION_BUTTON_CLASS}
          title="Copy code"
          disabled={disabled}
          onClick={() => {
            if (!copyToClipboard(source)) {
              return;
            }
            showCopied();
          }}
        >
          <HugeiconsIcon
            icon={copied ? Tick02Icon : Copy02Icon}
            className="size-3.5"
          />
        </button>
        <button
          type="button"
          className={ACTION_BUTTON_CLASS}
          title="Download file"
          disabled={disabled}
          onClick={() => {
            downloadTextFile(getCodeFilename(language), source);
          }}
        >
          <DownloadIcon className="size-3.5" />
        </button>
      </div>
    </div>
  );
}

function StreamdownBlock(props: BlockProps) {
  const hasMermaidFence = props.content.includes("```mermaid");
  const mermaidSource = getMermaidSource(props.content);
  const codeFence = getCodeFence(props.content);

  if (props.isIncomplete && hasMermaidFence) {
    return (
      <div className="my-4 flex h-48 items-center justify-center rounded-xl border border-border bg-muted/30 text-sm text-muted-foreground animate-pulse">
        Loading diagram...
      </div>
    );
  }

  if (props.isIncomplete && codeFence && isSvgFence(codeFence)) {
    return (
      <div className="relative isolate">
        <div className="my-4 rounded-xl border border-border bg-muted/30 p-4">
          <div className="mb-2 text-xs font-medium text-muted-foreground">svg</div>
          <pre className="overflow-x-auto text-xs text-muted-foreground whitespace-pre-wrap break-all">
            <code>{codeFence.source}</code>
          </pre>
        </div>
      </div>
    );
  }

  if (props.isIncomplete && codeFence && isHtmlFence(codeFence)) {
    return (
      <div className="my-4 flex h-48 items-center justify-center rounded-xl border border-border bg-muted/30 text-sm text-muted-foreground animate-pulse">
        Loading preview...
      </div>
    );
  }

  if (mermaidSource) {
    return (
      <div className="relative isolate">
        <Block {...props} />
        <MermaidCopyButton source={mermaidSource} />
      </div>
    );
  }

  if (codeFence) {
    const svgSource = !props.isIncomplete && isSvgFence(codeFence) ? sanitizeSvg(codeFence.source) : null;
    const htmlSource = !props.isIncomplete && isHtmlFence(codeFence) ? codeFence.source : null;
    return (
      <>
        <div className="relative isolate">
          <Block {...props} />
          <CodeBlockActions
            disabled={props.isIncomplete}
            language={codeFence.language}
            source={codeFence.source}
          />
        </div>
        {svgSource && <SvgPreview source={svgSource} />}
        {htmlSource && <HtmlPreview source={htmlSource} />}
      </>
    );
  }

  return <Block {...props} />;
}
const AUDIO_PLAYER_RE = /<audio-player\s+src="([^"]+)"\s*\/>/;

const MarkdownTextImpl = () => {
  const { text, status } = useMessagePartText();

  const audioMatch = text.match(AUDIO_PLAYER_RE);
  if (audioMatch) {
    return <AudioPlayer src={audioMatch[1]} />;
  }

  return (
    <div data-status={status.type}>
      <Streamdown
        mode="streaming"
        isAnimating={status.type === "running"}
        plugins={{ code, math, mermaid }}
        controls={{
          code: false,
          mermaid: {
            fullscreen: true,
            download: true,
            copy: false,
            panZoom: true,
          },
        }}
        shikiTheme={["github-light", "github-dark"]}
        BlockComponent={StreamdownBlock}
      >
        {text}
      </Streamdown>
    </div>
  );
};

export const MarkdownText = withSmoothContextProvider(MarkdownTextImpl);


================================================
FILE: studio/frontend/src/components/assistant-ui/message-timing.tsx
================================================
"use client";

import { useMessageTiming } from "@assistant-ui/react";
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import { cn } from "@/lib/utils";
import type { FC } from "react";

const formatTimingMs = (ms: number | undefined): string => {
  if (ms === undefined) return "—";
  if (ms < 1000) return `${Math.round(ms)}ms`;
  return `${(ms / 1000).toFixed(2)}s`;
};

/**
 * Shows streaming stats (TTFT, total time, chunks) as a badge with a
 * hover/focus tooltip. Renders nothing until the stream completes.
 *
 * Place it inside `ActionBarPrimitive.Root` in your `thread.tsx` so it
 * inherits the action bar's autohide behaviour:
 *
 * ```tsx
 * import { MessageTiming } from "@/components/assistant-ui/message-timing";
 *
 * <ActionBarPrimitive.Root >
 *   <ActionBarPrimitive.Copy />
 *   <ActionBarPrimitive.Reload />
 *   <MessageTiming />  // <-- add this
 * </ActionBarPrimitive.Root>
 * ```
 *
 * @param side - Side of the tooltip relative to the badge trigger. Defaults to `"right"`.
 */
export const MessageTiming: FC<{
  className?: string;
  side?: "top" | "right" | "bottom" | "left";
}> = ({ className, side = "right" }) => {
  const timing = useMessageTiming();
  if (timing?.totalStreamTime === undefined) return null;

  return (
    <Tooltip>
      <TooltipTrigger asChild>
        <button
          type="button"
          data-slot="message-timing-trigger"
          aria-label="Message timing"
          className={cn(
            "flex items-center rounded-md p-1 font-mono text-muted-foreground text-xs tabular-nums transition-colors hover:bg-accent hover:text-accent-foreground",
            className,
          )}
        >
          {formatTimingMs(timing.totalStreamTime)}
        </button>
      </TooltipTrigger>
      <TooltipContent
        side={side}
        sideOffset={8}
        data-slot="message-timing-popover"
        className="[&_span>svg]:hidden! rounded-lg border bg-popover px-3 py-2 text-popover-foreground shadow-md"
      >
        <div className="grid min-w-35 gap-1.5 text-xs">
          {timing.firstTokenTime !== undefined && (
            <div className="flex items-center justify-between gap-4">
              <span className="text-muted-foreground">First token</span>
              <span className="font-mono tabular-nums">
                {formatTimingMs(timing.firstTokenTime)}
              </span>
            </div>
          )}
          <div className="flex items-center justify-between gap-4">
            <span className="text-muted-foreground">Total</span>
            <span className="font-mono tabular-nums">
              {formatTimingMs(timing.totalStreamTime)}
            </span>
          </div>
          <div className="flex items-center justify-between gap-4">
            <span className="text-muted-foreground">Chunks</span>
            <span className="font-mono tabular-nums">{timing.totalChunks}</span>
          </div>
        </div>
      </TooltipContent>
    </Tooltip>
  );
};


================================================
FILE: studio/frontend/src/components/assistant-ui/model-selector/pickers.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  AlertDialog,
  AlertDialogAction,
  AlertDialogCancel,
  AlertDialogContent,
  AlertDialogDescription,
  AlertDialogFooter,
  AlertDialogHeader,
  AlertDialogTitle,
} from "@/components/ui/alert-dialog";
import { Input } from "@/components/ui/input";
import { Spinner } from "@/components/ui/spinner";
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import { deleteCachedModel, listCachedGguf, listCachedModels, listGgufVariants } from "@/features/chat/api/chat-api";
import type { CachedGgufRepo, CachedModelRepo } from "@/features/chat/api/chat-api";
import type { GgufVariantDetail } from "@/features/chat/types/api";
import { usePlatformStore } from "@/config/env";
import {
  useDebouncedValue,
  useGpuInfo,
  useHfModelSearch,
  useInfiniteScroll,
  useRecommendedModelVram,
} from "@/hooks";
import { cn, formatCompact } from "@/lib/utils";
import type { VramFitStatus } from "@/lib/vram";
import { checkVramFit, estimateLoadingVram } from "@/lib/vram";
import { Search01Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { Trash2Icon } from "lucide-react";
import { useCallback, useEffect, useMemo, useRef, useState, type ReactNode } from "react";
import { toast } from "sonner";
import type {
  LoraModelOption,
  ModelOption,
  ModelSelectorChangeMeta,
} from "./types";

function dedupe(values: string[]): string[] {
  return [...new Set(values.filter(Boolean))];
}

function ListLabel({ children }: { children: ReactNode }) {
  return (
    <div className="px-2.5 py-1.5 text-[10px] font-semibold uppercase tracking-wider text-muted-foreground">
      {children}
    </div>
  );
}

/** Format bytes to a human-readable size string. */
function formatBytes(bytes: number): string {
  if (bytes === 0) return "0 B";
  const units = ["B", "KB", "MB", "GB", "TB"];
  const i = Math.floor(Math.log(bytes) / Math.log(1024));
  const value = bytes / 1024 ** i;
  return `${value.toFixed(value < 10 ? 1 : 0)} ${units[i]}`;
}

function ModelRow({
  label,
  meta,
  selected,
  onClick,
  vramStatus,
  vramEst,
  gpuGb,
  tooltipText,
}: {
  label: string;
  meta?: string;
  selected?: boolean;
  onClick: () => void;
  vramStatus?: VramFitStatus | null;
  vramEst?: number;
  gpuGb?: number;
  tooltipText?: ReactNode;
}) {
  const exceeds = vramStatus === "exceeds";
  const showVramTooltip =
    vramEst != null && vramEst > 0 && gpuGb != null && gpuGb > 0;
  const vramTooltipText =
    showVramTooltip && vramStatus
      ? exceeds
        ? `Needs ~${vramEst}GB VRAM (GPU: ${gpuGb}GB)`
        : vramStatus === "tight"
          ? `~${vramEst}GB VRAM (tight fit on ${gpuGb}GB)`
          : `~${vramEst}GB VRAM`
      : null;

  const content = (
    <button
      type="button"
      onClick={onClick}
      className={cn(
        "flex w-full items-center gap-2 rounded-md px-2.5 py-1.5 text-left text-sm transition-colors hover:bg-accent",
        selected && "bg-accent/60",
        exceeds && "opacity-50",
      )}
    >
      <span
        className={cn(
          "block min-w-0 flex-1 truncate",
          exceeds && "line-through decoration-muted-foreground/50",
        )}
      >
        {label}
      </span>
      <span className="ml-auto flex items-center gap-1.5 shrink-0">
        {vramStatus === "exceeds" && (
          <span className="text-[9px] font-medium text-red-400">OOM</span>
        )}
        {vramStatus === "tight" && (
          <span className="text-[9px] font-medium text-amber-400">TIGHT</span>
        )}
        {meta ? (
          <span className="text-[10px] text-muted-foreground">{meta}</span>
        ) : null}
      </span>
    </button>
  );

  if (vramTooltipText) {
    return (
      <Tooltip>
        <TooltipTrigger asChild>{content}</TooltipTrigger>
        <TooltipContent side="left" className="max-w-xs break-all">
          {label}
          <span className="block text-[10px] mt-1">{vramTooltipText}</span>
        </TooltipContent>
      </Tooltip>
    );
  }

  if (tooltipText) {
    return (
      <Tooltip>
        <TooltipTrigger asChild>{content}</TooltipTrigger>
        <TooltipContent side="left" className="max-w-xs break-all">
          {tooltipText}
        </TooltipContent>
      </Tooltip>
    );
  }
  return content;
}

// ── GGUF Variant Expander ────────────────────────────────────

function GgufVariantExpander({
  repoId,
  onSelect,
  gpuGb,
  systemRamGb,
  onDeleteVariant,
}: {
  repoId: string;
  onSelect: (id: string, meta: ModelSelectorChangeMeta) => void;
  gpuGb?: number;
  systemRamGb?: number;
  onDeleteVariant?: (quant: string) => void;
}) {
  const [variants, setVariants] = useState<GgufVariantDetail[] | null>(null);
  const [defaultVariant, setDefaultVariant] = useState<string | null>(null);
  const [hasVision, setHasVision] = useState(false);
  const [loading, setLoading] = useState(true);
  const [error, setError] = useState<string | null>(null);

  useEffect(() => {
    let canceled = false;
    setLoading(true);
    setError(null);

    listGgufVariants(repoId)
      .then((res) => {
        if (canceled) return;
        setVariants(res.variants);
        setDefaultVariant(res.default_variant);
        setHasVision(res.has_vision);
      })
      .catch((err) => {
        if (canceled) return;
        setError(err instanceof Error ? err.message : "Failed to load variants");
      })
      .finally(() => {
        if (!canceled) setLoading(false);
      });

    return () => {
      canceled = true;
    };
  }, [repoId]);

  const handleVariantClick = useCallback(
    (quant: string, downloaded?: boolean, sizeBytes?: number) => {
      onSelect(repoId, {
        source: "hub",
        isLora: false,
        ggufVariant: quant,
        isDownloaded: downloaded,
        expectedBytes: sizeBytes,
      });
    },
    [repoId, onSelect],
  );

  // GGUF fit classification matching llama-server's _select_gpus logic:
  //   fits  = model <= 0.7 * total GPU memory
  //   tight = model > 0.7 * GPU but <= 0.7 * GPU + 0.7 * system RAM (--fit uses CPU offload)
  //   oom   = model > 0.7 * GPU + 0.7 * system RAM
  const gpuBudgetGb = (gpuGb ?? 0) * 0.70;
  const totalBudgetGb = gpuBudgetGb + (systemRamGb ?? 0) * 0.70;

  const getGgufFit = useCallback(
    (sizeBytes: number): "fits" | "tight" | "oom" => {
      if (!gpuGb || gpuGb <= 0) return "fits";
      const gb = sizeBytes / (1024 ** 3);
      if (gb <= 0 || gb <= gpuBudgetGb) return "fits";
      if (gb <= totalBudgetGb) return "tight";
      return "oom";
    },
    [gpuGb, gpuBudgetGb, totalBudgetGb],
  );

  // If the backend-recommended variant is OOM, pick the largest fitting
  // variant instead; if all are OOM, recommend the smallest one.
  const effectiveRecommended = useMemo(() => {
    if (!variants || !gpuGb || gpuGb <= 0) return defaultVariant;
    const defaultV = variants.find((v) => v.quant === defaultVariant);
    if (defaultV && getGgufFit(defaultV.size_bytes) !== "oom") return defaultVariant;
    // Default is OOM -- pick largest non-OOM variant (best quality that fits)
    const fitting = variants.filter((v) => getGgufFit(v.size_bytes) !== "oom");
    if (fitting.length > 0) {
      fitting.sort((a, b) => b.size_bytes - a.size_bytes);
      return fitting[0].quant;
    }
    // All OOM -- recommend smallest (most likely to partially run)
    const sorted = [...variants].sort((a, b) => a.size_bytes - b.size_bytes);
    return sorted[0].quant;
  }, [variants, defaultVariant, gpuGb, getGgufFit]);

  const sortedVariants = useMemo(() => {
    if (!variants) return variants;
    // Tier: 0 = downloaded+fits, 1 = downloaded+tight, 2 = fits, 3 = tight, 4 = OOM
    const tierOf = (v: GgufVariantDetail) => {
      const f = getGgufFit(v.size_bytes);
      if (f === "oom") return 4;
      const base = f === "fits" ? 0 : 1;
      return v.downloaded ? base : base + 2;
    };
    return [...variants].sort((a, b) => {
      const aTier = tierOf(a);
      const bTier = tierOf(b);
      if (aTier !== bTier) return aTier - bTier;

      // Within the same tier, recommended goes first
      const aIsRec = a.quant === effectiveRecommended;
      const bIsRec = b.quant === effectiveRecommended;
      if (aIsRec !== bIsRec) return aIsRec ? -1 : 1;

      // fits: largest first (best quality that fits in GPU)
      // tight/OOM: smallest first (closest to fitting, fastest to run)
      const fitsInGpu = aTier === 0 || aTier === 2;
      return fitsInGpu ? b.size_bytes - a.size_bytes : a.size_bytes - b.size_bytes;
    });
  }, [variants, effectiveRecommended, getGgufFit]);

  if (loading) {
    return (
      <div className="flex items-center gap-2 px-5 py-2">
        <Spinner className="size-3 text-muted-foreground" />
        <span className="text-xs text-muted-foreground">Loading variants…</span>
      </div>
    );
  }

  if (error) {
    return (
      <div className="px-5 py-2 text-xs text-destructive">{error}</div>
    );
  }

  if (!sortedVariants || sortedVariants.length === 0) {
    return (
      <div className="px-5 py-2 text-xs text-muted-foreground">
        No GGUF variants found.
      </div>
    );
  }

  return (
    <div className="pl-4 border-l-2 border-accent/50 ml-3 my-1">
      <div className="px-2 py-1 flex items-center gap-1.5">
        <span className="text-[10px] font-semibold uppercase tracking-wider text-muted-foreground">
          Quantizations
        </span>
        {hasVision && (
          <span className="text-[9px] font-medium text-blue-400">Vision</span>
        )}
      </div>
      {sortedVariants.map((v) => {
        const fit = getGgufFit(v.size_bytes);
        const oom = fit === "oom";
        const tight = fit === "tight";
        return (
          <div key={v.filename} className="flex items-center gap-0.5">
            <button
              type="button"
              onClick={() => handleVariantClick(v.quant, v.downloaded, v.size_bytes)}
              className={cn(
                "flex min-w-0 flex-1 items-center justify-between gap-2 rounded-md px-2.5 py-1 text-left text-sm transition-colors hover:bg-accent",
              )}
            >
              <span className="min-w-0 flex-1 truncate font-mono text-xs">
                {v.quant}
                {v.downloaded ? (
                  <span className="ml-1.5 text-[9px] font-sans font-medium text-green-400">
                    downloaded
                  </span>
                ) : v.quant === effectiveRecommended ? (
                  <span className="ml-1.5 text-[9px] font-sans font-medium text-primary/70">
                    recommended
                  </span>
                ) : null}
              </span>
              <span className="flex items-center gap-1.5 shrink-0">
                {oom && (
                  <span className="text-[9px] font-medium text-red-400">OOM</span>
                )}
                {tight && (
                  <span className="text-[9px] font-medium text-amber-400">TIGHT</span>
                )}
                <span className="text-[10px] text-muted-foreground">
                  {formatBytes(v.size_bytes)}
                </span>
              </span>
            </button>
            {v.downloaded && onDeleteVariant && (
              <button
                type="button"
                onClick={(e) => { e.stopPropagation(); onDeleteVariant(v.quant); }}
                className="shrink-0 rounded-md p-1 text-muted-foreground/60 transition-colors hover:bg-destructive/10 hover:text-destructive"
              >
                <Trash2Icon className="size-3" />
              </button>
            )}
          </div>
        );
      })}
    </div>
  );
}

// ── Detect GGUF repos by naming convention ────────────────────

function isGgufRepo(id: string): boolean {
  return id.toUpperCase().includes("-GGUF");
}

/** Extract param count label from model name (e.g. "Qwen3-0.6B" -> "0.6B"). */
function extractParamLabel(id: string): string | undefined {
  // Match patterns like "0.6B", "1B", "4B", "3.5B", "70B", "1.5B" etc.
  const name = id.split("/").pop() ?? id;
  const match = name.match(/(?:^|[-_])(\d+(?:\.\d+)?)[Bb](?:[-_]|$)/);
  return match ? `${match[1]}B` : undefined;
}

// Module-level caches so re-mounting the popover shows results instantly
let _cachedGgufCache: CachedGgufRepo[] = [];
let _cachedModelsCache: CachedModelRepo[] = [];

// ── Hub Model Picker ──────────────────────────────────────────

export function HubModelPicker({
  models,
  value,
  onSelect,
}: {
  models: ModelOption[];
  value?: string;
  onSelect: (id: string, meta: ModelSelectorChangeMeta) => void;
}) {
  const gpu = useGpuInfo();
  const [query, setQuery] = useState("");
  const debouncedQuery = useDebouncedValue(query);
  const { results, isLoading, isLoadingMore, fetchMore } = useHfModelSearch(
    debouncedQuery,
  );

  // Track which GGUF repo is expanded for variant selection
  const [expandedGguf, setExpandedGguf] = useState<string | null>(null);

  // Delete confirmation dialog state
  const [deleteTarget, setDeleteTarget] = useState<string | null>(null);
  const [deleting, setDeleting] = useState(false);

  // Cached (already downloaded) repos -- use module-level cache so
  // re-mounting the popover does not flash an empty "Downloaded" section.
  const [cachedGguf, setCachedGguf] = useState<CachedGgufRepo[]>(_cachedGgufCache);
  const [cachedModels, setCachedModels] = useState<CachedModelRepo[]>(_cachedModelsCache);
  const alreadyCached = _cachedGgufCache.length > 0 || _cachedModelsCache.length > 0;
  const [cachedReady, setCachedReady] = useState(alreadyCached);

  const refreshCachedLists = useCallback(() => {
    listCachedGguf().then((v) => { _cachedGgufCache = v; setCachedGguf(v); }).catch(() => {});
    listCachedModels().then((v) => { _cachedModelsCache = v; setCachedModels(v); }).catch(() => {});
  }, []);

  useEffect(() => {
    if (alreadyCached) return;
    let done = 0;
    const check = () => { if (++done >= 2) setCachedReady(true); };
    listCachedGguf().then((v) => { _cachedGgufCache = v; setCachedGguf(v); }).catch(() => {}).finally(check);
    listCachedModels().then((v) => { _cachedModelsCache = v; setCachedModels(v); }).catch(() => {}).finally(check);
  }, [alreadyCached]);

  const handleDeleteConfirm = useCallback(async () => {
    if (!deleteTarget) return;
    setDeleting(true);
    try {
      // deleteTarget is "repo_id" or "repo_id::variant"
      const sepIdx = deleteTarget.indexOf("::");
      const repoId = sepIdx >= 0 ? deleteTarget.slice(0, sepIdx) : deleteTarget;
      const variant = sepIdx >= 0 ? deleteTarget.slice(sepIdx + 2) : undefined;
      await deleteCachedModel(repoId, variant);
      toast.success(`Deleted ${variant ? `${repoId} ${variant}` : repoId}`);
      refreshCachedLists();
    } catch (err) {
      toast.error(err instanceof Error ? err.message : "Failed to delete model");
    } finally {
      setDeleting(false);
      setDeleteTarget(null);
    }
  }, [deleteTarget, refreshCachedLists]);

  // Deduplicate: don't show downloaded models in the recommended list.
  // Compare case-insensitively since HF cache lowercases repo IDs.
  const downloadedSet = useMemo(() => {
    const s = new Set<string>();
    for (const c of cachedGguf) s.add(c.repo_id.toLowerCase());
    for (const c of cachedModels) s.add(c.repo_id.toLowerCase());
    return s;
  }, [cachedGguf, cachedModels]);

  const chatOnly = usePlatformStore((s) => s.isChatOnly());

  const recommendedIds = useMemo(() => {
    const all = dedupe([...models.map((model) => model.id), value ?? ""])
      .filter((id) => !downloadedSet.has(id.toLowerCase()))
      .filter((id) => !chatOnly || isGgufRepo(id));
    // Sort: GGUFs first, then hub models
    const gguf: string[] = [];
    const hub: string[] = [];
    for (const id of all) {
      if (isGgufRepo(id)) gguf.push(id);
      else hub.push(id);
    }
    return [...gguf, ...hub];
  }, [models, value, downloadedSet, chatOnly]);

  // Infinite scroll paging for the recommended section
  const [recommendedPage, setRecommendedPage] = useState(1);
  // Reset page when the underlying list changes
  useEffect(() => { setRecommendedPage(1); }, [models, chatOnly]);

  const visibleRecommendedIds = useMemo(() => {
    const hubStartIndex = recommendedIds.findIndex((id) => !isGgufRepo(id));
    const allGguf = hubStartIndex === -1 ? recommendedIds : recommendedIds.slice(0, hubStartIndex);
    const allHub = hubStartIndex === -1 ? [] : recommendedIds.slice(hubStartIndex);
    // Interleave in chunks of 4: [4 gguf, 4 hub, 4 gguf, 4 hub, ...]
    const result: string[] = [];
    for (let p = 0; p < recommendedPage; p++) {
      result.push(...allGguf.slice(p * 4, (p + 1) * 4));
      result.push(...allHub.slice(p * 4, (p + 1) * 4));
    }
    return result;
  }, [recommendedIds, recommendedPage]);

  const hasMoreRecommended = visibleRecommendedIds.length < recommendedIds.length;

  // Fetch VRAM info for the full pool once (recommendedIds is stable across
  // page increments) so we don't re-fetch on every scroll.
  const { paramCountById: recommendedParamCountById } =
    useRecommendedModelVram(recommendedIds);

  const showHfSection = debouncedQuery.trim().length > 0;
  const recommendedSet = useMemo(() => new Set(visibleRecommendedIds), [visibleRecommendedIds]);

  const hfIds = useMemo(() => {
    if (!showHfSection) return [];
    return results
      .map((result) => result.id)
      .filter((id) => !recommendedSet.has(id))
      .filter((id) => !chatOnly || isGgufRepo(id));
  }, [recommendedSet, results, showHfSection, chatOnly]);

  const metricsById = useMemo(
    () =>
      new Map(
        results
          .filter((result) => result.totalParams || result.estimatedSizeBytes)
          .map((result) => [
            result.id,
            result.estimatedSizeBytes
              ? `~${formatBytes(result.estimatedSizeBytes)}`
              : formatCompact(result.totalParams!),
          ]),
      ),
    [results],
  );

  const vramMap = useMemo(() => {
    const map = new Map<
      string,
      { est: number; status: VramFitStatus | null; detail: string | null }
    >();
    for (const r of results) {
      const detail = r.totalParams ? formatCompact(r.totalParams) : null;
      if (r.totalParams) {
        const est = estimateLoadingVram(r.totalParams, "qlora");
        const status = gpu.available
          ? checkVramFit(est, gpu.memoryTotalGb)
          : null;
        map.set(r.id, { est, status, detail });
      } else {
        map.set(r.id, { est: 0, status: null, detail });
      }
    }
    return map;
  }, [results, gpu]);

  const recommendedVramMap = useMemo(() => {
    const map = new Map<
      string,
      { est: number; status: VramFitStatus | null; detail: string | null }
    >();
    for (const id of visibleRecommendedIds) {
      const totalParams = recommendedParamCountById.get(id);
      if (totalParams) {
        const est = estimateLoadingVram(totalParams, "qlora");
        const status = gpu.available
          ? checkVramFit(est, gpu.memoryTotalGb)
          : null;
        const detail = formatCompact(totalParams);
        map.set(id, { est, status, detail });
      }
    }
    return map;
  }, [visibleRecommendedIds, recommendedParamCountById, gpu]);

  const { scrollRef, sentinelRef } = useInfiniteScroll(fetchMore, results.length);

  // Sentinel + IntersectionObserver for recommended infinite scroll.
  // We disconnect after each fire so the observer doesn't loop while
  // React re-renders; the effect re-creates it on the next page.
  // Uses a callback ref for the sentinel so we detect mount/unmount reliably.
  const [recommendedSentinel, setRecommendedSentinel] = useState<HTMLDivElement | null>(null);
  const recommendedSentinelRef = useCallback((node: HTMLDivElement | null) => {
    setRecommendedSentinel(node);
  }, []);
  useEffect(() => {
    if (!recommendedSentinel || !hasMoreRecommended) return;
    const root = scrollRef.current;
    if (!root) return;
    const obs = new IntersectionObserver(
      ([e]) => {
        if (e.isIntersecting) {
          obs.disconnect();
          setRecommendedPage((p) => p + 1);
        }
      },
      { threshold: 0, root },
    );
    // Small delay so the browser finishes layout after the previous page render
    const timer = setTimeout(() => obs.observe(recommendedSentinel), 100);
    return () => { clearTimeout(timer); obs.disconnect(); };
  }, [recommendedSentinel, hasMoreRecommended, recommendedPage, scrollRef]);

  /** Handle clicking a model row — GGUF repos expand, others load directly. */
  const handleModelClick = useCallback(
    (id: string) => {
      if (isGgufRepo(id)) {
        // Toggle GGUF variant expander
        setExpandedGguf((prev) => (prev === id ? null : id));
      } else {
        onSelect(id, { source: "hub", isLora: false });
      }
    },
    [onSelect],
  );

  return (
    <div className="space-y-2">
      <div className="relative">
        <HugeiconsIcon
          icon={Search01Icon}
          className="pointer-events-none absolute left-2.5 top-2.5 size-4 text-muted-foreground"
        />
        <Input
          value={query}
          onChange={(event) => setQuery(event.target.value)}
          placeholder="Search Hugging Face models"
          className="h-9 pl-8 pr-8"
        />
        {isLoading && (
          <Spinner className="pointer-events-none absolute right-2.5 top-2.5 size-4 text-muted-foreground" />
        )}
      </div>

      <div ref={scrollRef} className="max-h-64 overflow-y-auto">
        <div className="p-1">
          {!cachedReady && !showHfSection ? (
            <div className="flex items-center gap-2 px-5 py-3">
              <Spinner className="size-3 text-muted-foreground" />
              <span className="text-xs text-muted-foreground">Loading models…</span>
            </div>
          ) : !showHfSection && (cachedGguf.length > 0 || (!chatOnly && cachedModels.length > 0)) ? (
            <>
              <ListLabel>{"\uD83E\uDDA5"} Downloaded</ListLabel>
              {cachedGguf.map((c) => (
                <div key={c.repo_id}>
                  <ModelRow
                    label={c.repo_id}
                    meta={`GGUF · ${formatBytes(c.size_bytes)}`}
                    selected={value === c.repo_id}
                    onClick={() => handleModelClick(c.repo_id)}
                    vramStatus={null}
                  />
                  {expandedGguf === c.repo_id && (
                    <GgufVariantExpander
                      repoId={c.repo_id}
                      onSelect={onSelect}
                      gpuGb={gpu.available ? gpu.memoryTotalGb : undefined}
                      systemRamGb={gpu.available ? gpu.systemRamAvailableGb : undefined}
                      onDeleteVariant={(quant) => setDeleteTarget(`${c.repo_id}::${quant}`)}
                    />
                  )}
                </div>
              ))}
              {!chatOnly && cachedModels.map((c) => (
                <div key={c.repo_id} className="flex items-center gap-0.5">
                  <div className="min-w-0 flex-1">
                    <ModelRow
                      label={c.repo_id}
                      meta={formatBytes(c.size_bytes)}
                      selected={value === c.repo_id}
                      onClick={() => onSelect(c.repo_id, { source: "hub", isLora: false, isDownloaded: true })}
                      vramStatus={null}
                    />
                  </div>
                  <button
                    type="button"
                    onClick={(e) => { e.stopPropagation(); setDeleteTarget(c.repo_id); }}
                    className="shrink-0 rounded-md p-1.5 text-muted-foreground/60 transition-colors hover:bg-destructive/10 hover:text-destructive"
                  >
                    <Trash2Icon className="size-3.5" />
                  </button>
                </div>
              ))}
            </>
          ) : null}

          {!showHfSection && cachedReady ? (
            <>
              <ListLabel>{"\uD83E\uDDA5"} Recommended</ListLabel>
              {visibleRecommendedIds.length === 0 ? (
                <div className="px-2.5 py-2 text-xs text-muted-foreground">
                  No default models.
                </div>
              ) : (
                visibleRecommendedIds.map((id) => {
                  const vram = recommendedVramMap.get(id);
                  return (
                    <div key={id}>
                      <ModelRow
                        label={id}
                        meta={
                          isGgufRepo(id)
                            ? "GGUF"
                            : vram?.detail ?? extractParamLabel(id)
                        }
                        selected={value === id}
                        onClick={() => handleModelClick(id)}
                        vramStatus={isGgufRepo(id) ? null : vram?.status ?? null}
                        vramEst={isGgufRepo(id) ? undefined : vram?.est}
                        gpuGb={gpu.available ? gpu.memoryTotalGb : undefined}
                      />
                      {expandedGguf === id && (
                        <GgufVariantExpander repoId={id} onSelect={onSelect} gpuGb={gpu.available ? gpu.memoryTotalGb : undefined} systemRamGb={gpu.available ? gpu.systemRamAvailableGb : undefined} />
                      )}
                    </div>
                  );
                })
              )}
              {hasMoreRecommended && (
                <>
                  <div ref={recommendedSentinelRef} className="h-px" />
                  <div className="flex items-center justify-center py-2">
                    <Spinner className="size-3.5 text-muted-foreground" />
                  </div>
                </>
              )}
            </>
          ) : null}

          {showHfSection ? (
            <>
              <ListLabel>Hugging Face</ListLabel>
              {hfIds.length === 0 && !isLoading ? (
                <div className="px-2.5 py-2 text-xs text-muted-foreground">
                  No matching models.
                </div>
              ) : (
                hfIds.map((id) => {
                  const vram = vramMap.get(id);
                  return (
                    <div key={id}>
                      <ModelRow
                        label={id}
                        meta={
                          isGgufRepo(id)
                            ? "GGUF"
                            : metricsById.get(id) ?? extractParamLabel(id)
                        }
                        selected={value === id}
                        onClick={() => handleModelClick(id)}
                        vramStatus={isGgufRepo(id) ? null : vram?.status ?? null}
                        vramEst={isGgufRepo(id) ? undefined : vram?.est}
                        gpuGb={gpu.available ? gpu.memoryTotalGb : undefined}
                      />
                      {expandedGguf === id && (
                        <GgufVariantExpander repoId={id} onSelect={onSelect} gpuGb={gpu.available ? gpu.memoryTotalGb : undefined} systemRamGb={gpu.available ? gpu.systemRamAvailableGb : undefined} />
                      )}
                    </div>
                  );
                })
              )}
              <div ref={sentinelRef} className="h-px" />
              {isLoadingMore ? (
                <div className="flex items-center justify-center py-2">
                  <Spinner className="size-3.5 text-muted-foreground" />
                </div>
              ) : null}
            </>
          ) : null}
        </div>
      </div>

      <AlertDialog open={deleteTarget !== null} onOpenChange={(open) => { if (!open && !deleting) setDeleteTarget(null); }}>
        <AlertDialogContent size="sm">
          <AlertDialogHeader>
            <AlertDialogTitle>Delete cached model?</AlertDialogTitle>
            <AlertDialogDescription>
              This will remove <span className="font-medium text-foreground">{deleteTarget?.includes("::") ? `${deleteTarget.split("::")[0]} (${deleteTarget.split("::")[1]})` : deleteTarget}</span> from disk. You can re-download it later.
            </AlertDialogDescription>
          </AlertDialogHeader>
          <AlertDialogFooter>
            <AlertDialogCancel disabled={deleting}>No</AlertDialogCancel>
            <AlertDialogAction
              variant="destructive"
              disabled={deleting}
              onClick={(e) => { e.preventDefault(); handleDeleteConfirm(); }}
            >
              {deleting ? "Deleting..." : "Yes"}
            </AlertDialogAction>
          </AlertDialogFooter>
        </AlertDialogContent>
      </AlertDialog>
    </div>
  );
}

export function LoraModelPicker({
  loraModels,
  value,
  onSelect,
}: {
  loraModels: LoraModelOption[];
  value?: string;
  onSelect: (id: string, meta: ModelSelectorChangeMeta) => void;
}) {
  const [query, setQuery] = useState("");

  const normalized = useMemo(
    () =>
      loraModels
        .map((model) => ({
          ...model,
          baseModel: model.baseModel || model.description || "Unknown base model",
        }))
        .sort((a, b) => {
          const aTime = a.updatedAt ?? -1;
          const bTime = b.updatedAt ?? -1;
          if (aTime !== bTime) return bTime - aTime;
          const baseCmp = a.baseModel.localeCompare(b.baseModel);
          if (baseCmp !== 0) return baseCmp;
          return a.name.localeCompare(b.name);
        }),
    [loraModels],
  );

  const grouped = useMemo(() => {
    const needle = query.trim().toLowerCase();
    const out = new Map<string, LoraModelOption[]>();

    for (const model of normalized) {
      const searchText = `${model.name} ${model.baseModel} ${model.id}`.toLowerCase();
      if (needle && !searchText.includes(needle)) continue;

      const key = model.baseModel || "Unknown base model";
      const prev = out.get(key) ?? [];
      prev.push(model);
      out.set(key, prev);
    }

    return [...out.entries()].sort((a, b) => {
      const aLatest = Math.max(...a[1].map((model) => model.updatedAt ?? -1));
      const bLatest = Math.max(...b[1].map((model) => model.updatedAt ?? -1));
      if (aLatest !== bLatest) return bLatest - aLatest;
      return a[0].localeCompare(b[0]);
    });
  }, [normalized, query]);

  return (
    <div className="space-y-2">
      <div className="relative">
        <HugeiconsIcon
          icon={Search01Icon}
          className="pointer-events-none absolute left-2.5 top-2.5 size-4 text-muted-foreground"
        />
        <Input
          value={query}
          onChange={(event) => setQuery(event.target.value)}
          placeholder="Search local adapters"
          className="h-9 pl-8"
        />
      </div>

      <div className="max-h-64 overflow-y-auto">
        <div className="p-1">
          {grouped.length === 0 ? (
            <div className="px-2.5 py-2 text-xs text-muted-foreground">
              No adapters found.
            </div>
          ) : (
            grouped.map(([baseModel, adapters], index) => (
              <div key={baseModel}>
                {index > 0 ? <div className="my-1" /> : null}
                <ListLabel>{baseModel}</ListLabel>
                {adapters.map((adapter) => {
                  const isExported = adapter.source === "exported";
                  const isMerged = adapter.exportType === "merged";
                  const isGguf = adapter.exportType === "gguf";
                  const tag = isGguf
                    ? "GGUF"
                    : isExported
                      ? isMerged ? "Merged" : "LoRA"
                      : "LoRA";
                  const meta = isExported ? `${tag} · Exported` : tag;
                  return (
                    <ModelRow
                      key={adapter.id}
                      label={adapter.name}
                      meta={meta}
                      selected={value === adapter.id}
                      onClick={() => onSelect(adapter.id, {
                        source: isExported ? "exported" : "lora",
                        isLora: !isMerged && !isGguf,
                      })}
                      tooltipText={
                        <>
                          <span className="block break-words">{adapter.name}</span>
                          <span className="block mt-1 text-[10px] text-muted-foreground break-all">
                            {adapter.id}
                          </span>
                        </>
                      }
                    />
                  );
                })}
              </div>
            ))
          )}
        </div>
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/components/assistant-ui/model-selector/types.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ReactNode } from "react";

export interface ModelOption {
  id: string;
  name: string;
  description?: string;
  icon?: ReactNode;
}

export interface LoraModelOption extends ModelOption {
  baseModel?: string;
  updatedAt?: number;
  source?: "training" | "exported";
  exportType?: "lora" | "merged" | "gguf";
}

export interface ModelSelectorChangeMeta {
  source: "hub" | "lora" | "exported";
  isLora: boolean;
  ggufVariant?: string;
  isDownloaded?: boolean;
  expectedBytes?: number;
}


================================================
FILE: studio/frontend/src/components/assistant-ui/model-selector.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import {
  Popover,
  PopoverContent,
  PopoverTrigger,
} from "@/components/ui/popover";
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
import { usePlatformStore } from "@/config/env";
import { cn } from "@/lib/utils";
import {
  ArrowDown01Icon,
  Logout01Icon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { useMemo, useState } from "react";
import type {
  LoraModelOption,
  ModelOption,
  ModelSelectorChangeMeta,
} from "./model-selector/types";
import { HubModelPicker, LoraModelPicker } from "./model-selector/pickers";

export type { LoraModelOption, ModelOption, ModelSelectorChangeMeta } from "./model-selector/types";

interface ModelSelectorProps {
  models: ModelOption[];
  loraModels?: LoraModelOption[];
  value?: string;
  defaultValue?: string;
  activeGgufVariant?: string | null;
  onValueChange?: (value: string, meta: ModelSelectorChangeMeta) => void;
  onEject?: () => void;
  variant?: "outline" | "ghost" | "muted";
  size?: "sm" | "default" | "lg";
  className?: string;
  contentClassName?: string;
  open?: boolean;
  onOpenChange?: (open: boolean) => void;
  triggerDataTour?: string;
  contentDataTour?: string;
}

function ModelSelectorTrigger({
  currentModel,
  isLoaded,
  variant = "outline",
  size = "default",
  className,
  dataTour,
}: {
  currentModel?: ModelOption;
  isLoaded: boolean;
  variant?: "outline" | "ghost" | "muted";
  size?: "sm" | "default" | "lg";
  className?: string;
  dataTour?: string;
}) {
  return (
    <PopoverTrigger asChild={true}>
      <button
        type="button"
        data-tour={dataTour}
        className={cn(
          "flex items-center gap-2 transition-colors",
          variant === "outline" &&
          "rounded-full border border-border/60 hover:bg-accent",
          variant === "ghost" && "rounded-md hover:bg-accent",
          variant === "muted" && "rounded-md bg-muted hover:bg-muted/80",
          size === "sm" && "h-8 px-3 text-xs",
          size === "default" && "h-9 px-3.5 text-sm",
          size === "lg" && "h-10 px-4 text-sm",
          className,
        )}
      >
        {isLoaded && (
          <span className="size-2 shrink-0 rounded-full bg-emerald-500" />
        )}
        <span className={isLoaded ? "text-foreground" : "text-muted-foreground"}>
          {currentModel?.name ?? "Select model..."}
        </span>
        {currentModel?.description && (
          <span className="text-muted-foreground text-xs">{currentModel.description}</span>
        )}
        <HugeiconsIcon
          icon={ArrowDown01Icon}
          className="size-3 shrink-0 text-muted-foreground"
        />
      </button>
    </PopoverTrigger>
  );
}

function ModelSelectorContent({
  models,
  loraModels,
  value,
  onSelect,
  onEject,
  className,
  dataTour,
}: {
  models: ModelOption[];
  loraModels: LoraModelOption[];
  value?: string;
  onSelect: (id: string, meta: ModelSelectorChangeMeta) => void;
  onEject?: () => void;
  className?: string;
  dataTour?: string;
}) {
  const hasSelection = Boolean(value);
  const chatOnly = usePlatformStore((s) => s.isChatOnly());

  return (
    <PopoverContent
      align="start"
      data-tour={dataTour}
      className={cn(
        "w-[min(440px,calc(100vw-1rem))] max-w-[calc(100vw-1rem)] min-w-0 gap-0 p-2",
        className,
      )}
    >
      {chatOnly ? (
        <HubModelPicker models={models} value={value} onSelect={onSelect} />
      ) : (
        <Tabs defaultValue="hub" className="w-full">
          <TabsList className="mb-2 w-full">
            <TabsTrigger value="hub">Hub models</TabsTrigger>
            <TabsTrigger value="lora">Fine-tuned</TabsTrigger>
          </TabsList>

          <TabsContent value="hub" className="m-0">
            <HubModelPicker models={models} value={value} onSelect={onSelect} />
          </TabsContent>

          <TabsContent value="lora" className="m-0">
            <LoraModelPicker
              loraModels={loraModels}
              value={value}
              onSelect={onSelect}
            />
          </TabsContent>
        </Tabs>
      )}

      {hasSelection && onEject ? (
        <div className="mt-2 border-t border-border/70 pt-2">
          <button
            type="button"
            onClick={onEject}
            className="flex w-full items-center justify-center gap-1.5 rounded-md px-2 py-1.5 text-xs text-destructive transition-colors hover:bg-destructive/10"
            title="Eject model"
          >
            <HugeiconsIcon icon={Logout01Icon} className="size-3.5" />
            Eject loaded model
          </button>
        </div>
      ) : null}
    </PopoverContent>
  );
}

export function ModelSelector({
  models,
  loraModels = [],
  value,
  defaultValue,
  activeGgufVariant,
  onValueChange,
  onEject,
  variant = "outline",
  size = "default",
  className,
  contentClassName,
  open: controlledOpen,
  onOpenChange,
  triggerDataTour,
  contentDataTour,
}: ModelSelectorProps) {
  const [uncontrolledOpen, setUncontrolledOpen] = useState(false);
  const open = controlledOpen ?? uncontrolledOpen;
  const setOpen = onOpenChange ?? setUncontrolledOpen;
  const [uncontrolled, setUncontrolled] = useState(defaultValue ?? "");

  const selected = value ?? uncontrolled;
  const isLoaded = selected !== "";

  const optionById = useMemo(() => {
    const all = new Map<string, ModelOption>();
    for (const model of models) {
      all.set(model.id, model);
    }
    for (const lora of loraModels) {
      // Strip "/ suffix" from display name (e.g. "foo_123/foo" → "foo_123")
      const displayName = lora.name.includes("/")
        ? lora.name.split("/")[0].trim()
        : lora.name;
      // Show type tag instead of base model name
      const isExported = lora.source === "exported";
      const isMerged = lora.exportType === "merged";
      const tag = isExported
        ? isMerged ? "Merged · Exported" : "LoRA"
        : "LoRA";
      all.set(lora.id, {
        ...lora,
        name: displayName,
        description: tag,
      });
    }
    return all;
  }, [loraModels, models]);

  const currentModel = useMemo(() => {
    if (!selected) return undefined;
    const found = optionById.get(selected);
    if (activeGgufVariant) {
      const desc = `GGUF · ${activeGgufVariant}`;
      return found ? { ...found, description: desc } : { id: selected, name: selected, description: desc };
    }
    return found ?? { id: selected, name: selected };
  }, [selected, optionById, activeGgufVariant]);

  function handleSelect(id: string, meta: ModelSelectorChangeMeta) {
    if (onValueChange) {
      onValueChange(id, meta);
    } else {
      setUncontrolled(id);
    }
    setOpen(false);
  }

  function handleEject() {
    onEject?.();
    setOpen(false);
  }

  return (
    <Popover open={open} onOpenChange={setOpen}>
      <ModelSelectorTrigger
        currentModel={currentModel}
        isLoaded={isLoaded}
        variant={variant}
        size={size}
        className={className}
        dataTour={triggerDataTour}
      />
      <ModelSelectorContent
        models={models}
        loraModels={loraModels}
        value={selected}
        onSelect={handleSelect}
        onEject={onEject ? handleEject : undefined}
        className={contentClassName}
        dataTour={contentDataTour}
      />
    </Popover>
  );
}

ModelSelector.Trigger = ModelSelectorTrigger;
ModelSelector.Content = ModelSelectorContent;


================================================
FILE: studio/frontend/src/components/assistant-ui/reasoning.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

/* eslint-disable react-refresh/only-export-components */

import { MarkdownText } from "@/components/assistant-ui/markdown-text";
import { AnimatedShinyText } from "@/components/ui/animated-shiny-text";
import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/components/ui/collapsible";
import { cn } from "@/lib/utils";
import {
  type ReasoningGroupComponent,
  type ReasoningMessagePartComponent,
  useAuiState,
  useScrollLock,
} from "@assistant-ui/react";
import { copyToClipboard } from "@/lib/copy-to-clipboard";
import { Idea01Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { type VariantProps, cva } from "class-variance-authority";
import { ChevronDownIcon, CopyIcon, CheckIcon } from "lucide-react";
import {
  type CSSProperties,
  type ComponentProps,
  memo,
  useCallback,
  useEffect,
  useRef,
  useState,
} from "react";
const ANIMATION_DURATION = 200;

export const reasoningVariants = cva("aui-reasoning-root mb-4 w-full", {
  variants: {
    variant: {
      outline: "rounded-lg border px-3 py-2",
      ghost: "",
      muted: "rounded-lg bg-muted/50 px-3 py-2",
    },
  },
  defaultVariants: {
    variant: "outline",
  },
});

export type ReasoningRootProps = Omit<
  ComponentProps<typeof Collapsible>,
  "open" | "onOpenChange"
> &
  VariantProps<typeof reasoningVariants> & {
    open?: boolean;
    onOpenChange?: (open: boolean) => void;
    defaultOpen?: boolean;
  };

function ReasoningRoot({
  className,
  variant,
  open: controlledOpen,
  onOpenChange: controlledOnOpenChange,
  defaultOpen = false,
  children,
  ...props
}: ReasoningRootProps) {
  const collapsibleRef = useRef<HTMLDivElement>(null);
  const [uncontrolledOpen, setUncontrolledOpen] = useState(defaultOpen);
  const lockScroll = useScrollLock(collapsibleRef, ANIMATION_DURATION);

  const isControlled = controlledOpen !== undefined;
  const isOpen = isControlled ? controlledOpen : uncontrolledOpen;

  const handleOpenChange = useCallback(
    (open: boolean) => {
      if (!open) {
        lockScroll();
      }
      if (!isControlled) {
        setUncontrolledOpen(open);
      }
      controlledOnOpenChange?.(open);
    },
    [lockScroll, isControlled, controlledOnOpenChange],
  );

  return (
    <Collapsible
      ref={collapsibleRef}
      data-slot="reasoning-root"
      data-variant={variant}
      open={isOpen}
      onOpenChange={handleOpenChange}
      className={cn(
        "group/reasoning-root",
        reasoningVariants({ variant, className }),
      )}
      style={
        {
          "--animation-duration": `${ANIMATION_DURATION}ms`,
        } as CSSProperties
      }
      {...props}
    >
      {children}
    </Collapsible>
  );
}

function ReasoningFade({ className, ...props }: ComponentProps<"div">) {
  return (
    <div
      data-slot="reasoning-fade"
      className={cn(
        "aui-reasoning-fade pointer-events-none absolute inset-x-0 bottom-0 z-10 h-8",
        "bg-gradient-to-t from-background to-transparent",
        className,
      )}
      {...props}
    />
  );
}

function ReasoningFadeTop({ className, ...props }: ComponentProps<"div">) {
  return (
    <div
      data-slot="reasoning-fade-top"
      className={cn(
        "aui-reasoning-fade-top pointer-events-none absolute inset-x-0 top-0 z-10 h-8",
        "bg-gradient-to-b from-background to-transparent",
        className,
      )}
      {...props}
    />
  );
}

function ReasoningTrigger({
  active,
  duration,
  className,
  ...props
}: ComponentProps<typeof CollapsibleTrigger> & {
  active?: boolean;
  duration?: number;
}) {
  return (
    <CollapsibleTrigger
      data-slot="reasoning-trigger"
      className={cn(
        "aui-reasoning-trigger group/trigger flex max-w-[75%] items-center gap-2 py-1 text-muted-foreground text-sm transition-colors hover:text-foreground",
        className,
      )}
      {...props}
    >
      <HugeiconsIcon
        icon={Idea01Icon}
        className="aui-reasoning-trigger-icon size-4 shrink-0"
      />
      <span
        data-slot="reasoning-trigger-label"
        className="aui-reasoning-trigger-label-wrapper relative inline-block leading-none"
      >
        {active ? (
          <AnimatedShinyText className="text-sm">Thinking...</AnimatedShinyText>
        ) : (
          <span>Thought for {duration ?? 0} seconds</span>
        )}
      </span>
      <ChevronDownIcon
        data-slot="reasoning-trigger-chevron"
        className={cn(
          "aui-reasoning-trigger-chevron mt-0.5 size-4 shrink-0",
          "transition-transform duration-(--animation-duration) ease-out",
          "group-data-[state=closed]/trigger:-rotate-90",
          "group-data-[state=open]/trigger:rotate-0",
        )}
      />
    </CollapsibleTrigger>
  );
}

function ReasoningContent({
  className,
  children,
  streaming,
  ...props
}: ComponentProps<typeof CollapsibleContent> & { streaming?: boolean }) {
  return (
    <CollapsibleContent
      data-slot="reasoning-content"
      className={cn(
        "aui-reasoning-content relative overflow-hidden text-muted-foreground text-sm outline-none",
        "group/collapsible-content ease-out",
        "data-[state=closed]:animate-collapsible-up",
        "data-[state=open]:animate-collapsible-down",
        "data-[state=closed]:fill-mode-forwards",
        "data-[state=closed]:pointer-events-none",
        "data-[state=open]:duration-(--animation-duration)",
        "data-[state=closed]:duration-(--animation-duration)",
        className,
      )}
      {...props}
    >
      {streaming && <ReasoningFadeTop />}
      {children}
      <ReasoningFade />
    </CollapsibleContent>
  );
}

function ReasoningText({
  className,
  streaming,
  children,
  ...props
}: ComponentProps<"div"> & { streaming?: boolean }) {
  const scrollRef = useRef<HTMLDivElement>(null);

  useEffect(() => {
    if (!(streaming && scrollRef.current)) {
      return;
    }
    const el = scrollRef.current;
    const observer = new MutationObserver(() => {
      el.scrollTop = el.scrollHeight;
    });
    observer.observe(el, {
      childList: true,
      subtree: true,
      characterData: true,
    });
    el.scrollTop = el.scrollHeight;
    return () => observer.disconnect();
  }, [streaming]);

  return (
    <div
      ref={scrollRef}
      data-slot="reasoning-text"
      className={cn(
        "aui-reasoning-text relative z-0 overflow-y-auto pt-2 pb-2 pl-0 leading-relaxed",
        streaming ? "max-h-32" : "max-h-64",
        "transform-gpu transition-[transform,opacity]",
        "group-data-[state=open]/collapsible-content:animate-in",
        "group-data-[state=closed]/collapsible-content:animate-out",
        "group-data-[state=open]/collapsible-content:fade-in-0",
        "group-data-[state=closed]/collapsible-content:fade-out-0",
        "group-data-[state=open]/collapsible-content:slide-in-from-top-4",
        "group-data-[state=closed]/collapsible-content:slide-out-to-top-4",
        "group-data-[state=open]/collapsible-content:duration-(--animation-duration)",
        "group-data-[state=closed]/collapsible-content:duration-(--animation-duration)",
        className,
      )}
      {...props}
    >
      {children}
    </div>
  );
}

const ReasoningImpl: ReasoningMessagePartComponent = () => <MarkdownText />;

const COPY_RESET_MS = 2000;

function ReasoningCopyButton({ startIndex, endIndex }: { startIndex: number; endIndex: number }) {
  const [copied, setCopied] = useState(false);
  const resetRef = useRef<ReturnType<typeof setTimeout> | null>(null);

  const reasoningText = useAuiState(({ message }) => {
    return message.parts
      .slice(startIndex, endIndex + 1)
      .filter((p) => p.type === "reasoning")
      .map((p) => ("text" in p ? (p as { text: string }).text : ""))
      .join("\n");
  });

  const handleCopy = useCallback(() => {
    if (copyToClipboard(reasoningText)) {
      setCopied(true);
      if (resetRef.current) clearTimeout(resetRef.current);
      resetRef.current = setTimeout(() => setCopied(false), COPY_RESET_MS);
    }
  }, [reasoningText]);

  return (
    <button
      type="button"
      onClick={handleCopy}
      className="inline-flex items-center gap-1 rounded px-1.5 py-0.5 text-xs text-muted-foreground transition-colors hover:text-foreground hover:bg-muted"
      aria-label="Copy reasoning"
    >
      {copied ? (
        <CheckIcon className="size-3" />
      ) : (
        <CopyIcon className="size-3" />
      )}
      {copied ? "Copied" : "Copy"}
    </button>
  );
}

const ReasoningGroupImpl: ReasoningGroupComponent = ({
  children,
  startIndex,
  endIndex,
}) => {
  const isReasoningStreaming = useAuiState(({ message }) => {
    if (message.status?.type !== "running") {
      return false;
    }
    const lastIndex = message.parts.length - 1;
    if (lastIndex < 0) {
      return false;
    }
    const lastType = message.parts[lastIndex]?.type;
    if (lastType !== "reasoning") {
      return false;
    }
    return lastIndex >= startIndex && lastIndex <= endIndex;
  });

  const persistedDuration = useAuiState(({ message }) => {
    const d = (message.metadata?.custom as Record<string, unknown>)
      ?.reasoningDuration;
    return typeof d === "number" ? d : 0;
  });

  const [manualOpen, setManualOpen] = useState(false);
  const [duration, setDuration] = useState<number>(0);
  const startTimeRef = useRef<number | null>(null);

  useEffect(() => {
    if (isReasoningStreaming) {
      if (startTimeRef.current === null) {
        startTimeRef.current = Date.now();
      }
    } else if (startTimeRef.current !== null) {
      const elapsed = Math.round((Date.now() - startTimeRef.current) / 1000);
      setDuration(elapsed);
      startTimeRef.current = null;
    }
  }, [isReasoningStreaming]);

  const isOpen = isReasoningStreaming || manualOpen;

  const variant = isReasoningStreaming
    ? "outline"
    : manualOpen
      ? "outline"
      : "ghost";

  const handleOpenChange = useCallback(
    (open: boolean) => {
      if (!isReasoningStreaming) {
        setManualOpen(open);
      }
    },
    [isReasoningStreaming],
  );

  return (
    <ReasoningRoot
      open={isOpen}
      onOpenChange={handleOpenChange}
      variant={variant}
    >
      <div className="flex items-center justify-between">
        <ReasoningTrigger
          active={isReasoningStreaming}
          duration={duration || persistedDuration}
        />
        {isOpen && !isReasoningStreaming && (
          <ReasoningCopyButton startIndex={startIndex} endIndex={endIndex} />
        )}
      </div>
      <ReasoningContent
        aria-busy={isReasoningStreaming}
        streaming={isReasoningStreaming}
      >
        <ReasoningText streaming={isReasoningStreaming}>
          {children}
        </ReasoningText>
      </ReasoningContent>
    </ReasoningRoot>
  );
};

const Reasoning = memo(
  ReasoningImpl,
) as unknown as ReasoningMessagePartComponent & {
  Root: typeof ReasoningRoot;
  Trigger: typeof ReasoningTrigger;
  Content: typeof ReasoningContent;
  Text: typeof ReasoningText;
  Fade: typeof ReasoningFade;
  FadeTop: typeof ReasoningFadeTop;
};

Reasoning.displayName = "Reasoning";
Reasoning.Root = ReasoningRoot;
Reasoning.Trigger = ReasoningTrigger;
Reasoning.Content = ReasoningContent;
Reasoning.Text = ReasoningText;
Reasoning.Fade = ReasoningFade;
Reasoning.FadeTop = ReasoningFadeTop;

const ReasoningGroup = memo(ReasoningGroupImpl);
ReasoningGroup.displayName = "ReasoningGroup";

export {
  Reasoning,
  ReasoningGroup,
  ReasoningRoot,
  ReasoningTrigger,
  ReasoningContent,
  ReasoningText,
  ReasoningFade,
  ReasoningFadeTop,
};


================================================
FILE: studio/frontend/src/components/assistant-ui/sources.tsx
================================================
"use client";

import { memo, useState, type ComponentProps } from "react";
import type { SourceMessagePartComponent } from "@assistant-ui/react";
import { cn } from "@/lib/utils";
import { Badge, badgeVariants, type BadgeProps } from "./badge";

const extractDomain = (url: string): string => {
  try {
    return new URL(url).hostname.replace(/^www\./, "");
  } catch {
    return url;
  }
};

const getDomainInitial = (url: string): string => {
  const domain = extractDomain(url);
  return domain.charAt(0).toUpperCase();
};

function SourceIcon({
  url,
  className,
  ...props
}: ComponentProps<"span"> & { url: string }) {
  const [hasError, setHasError] = useState(false);
  const domain = extractDomain(url);

  if (hasError) {
    return (
      <span
        data-slot="source-icon-fallback"
        className={cn(
          "flex size-3 shrink-0 items-center justify-center rounded-sm bg-muted font-medium text-[10px]",
          className,
        )}
        {...props}
      >
        {getDomainInitial(url)}
      </span>
    );
  }

  return (
    <img
      data-slot="source-icon"
      src={`https://www.google.com/s2/favicons?domain=${domain}&sz=32`}
      alt=""
      className={cn("size-3 shrink-0 rounded-sm", className)}
      onError={() => setHasError(true)}
      {...(props as ComponentProps<"img">)}
    />
  );
}

function SourceTitle({ className, ...props }: ComponentProps<"span">) {
  return (
    <span
      data-slot="source-title"
      className={cn("max-w-37.5 truncate", className)}
      {...props}
    />
  );
}

export type SourceProps = Omit<BadgeProps, "asChild"> &
  ComponentProps<"a"> & {
    asChild?: boolean;
  };

function Source({
  className,
  variant,
  size,
  asChild = false,
  target = "_blank",
  rel = "noopener noreferrer",
  ...props
}: SourceProps) {
  return (
    <Badge
      asChild
      variant={variant}
      size={size}
      className={cn(
        "cursor-pointer outline-none focus-visible:border-ring focus-visible:ring-[3px] focus-visible:ring-ring/50",
        className,
      )}
    >
      <a
        data-slot="source"
        target={target}
        rel={rel}
        {...(props as ComponentProps<"a">)}
      />
    </Badge>
  );
}

const SourcesImpl: SourceMessagePartComponent = ({
  url,
  title,
  sourceType,
}) => {
  if (sourceType !== "url" || !url) return null;

  const domain = extractDomain(url);
  const displayTitle = title || domain;

  return (
    <span className="mr-1 mt-1 inline-block first:mt-2">
      <Source href={url}>
        <SourceIcon url={url} />
        <SourceTitle>{displayTitle}</SourceTitle>
      </Source>
    </span>
  );
};

const Sources = memo(SourcesImpl) as unknown as SourceMessagePartComponent & {
  Root: typeof Source;
  Icon: typeof SourceIcon;
  Title: typeof SourceTitle;
};

Sources.displayName = "Sources";
Sources.Root = Source;
Sources.Icon = SourceIcon;
Sources.Title = SourceTitle;

export {
  Sources,
  Source,
  SourceIcon,
  SourceTitle,
  badgeVariants as sourceVariants,
};


================================================
FILE: studio/frontend/src/components/assistant-ui/thread.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  ComposerAddAttachment,
  ComposerAttachments,
  UserMessageAttachments,
} from "@/components/assistant-ui/attachment";
import { MessageTiming } from "@/components/assistant-ui/message-timing";
import { MarkdownText } from "@/components/assistant-ui/markdown-text";
import { Reasoning, ReasoningGroup } from "@/components/assistant-ui/reasoning";
import { Sources } from "@/components/assistant-ui/sources";
import { ToolFallback } from "@/components/assistant-ui/tool-fallback";
import { ToolGroup } from "@/components/assistant-ui/tool-group";
import { WebSearchToolUI } from "@/components/assistant-ui/tool-ui-web-search";
import { PythonToolUI } from "@/components/assistant-ui/tool-ui-python";
import { TerminalToolUI } from "@/components/assistant-ui/tool-ui-terminal";
import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button";
import { Button } from "@/components/ui/button";
import { sentAudioNames } from "@/features/chat/api/chat-adapter";
import { AUDIO_ACCEPT, MAX_AUDIO_SIZE, fileToBase64 } from "@/lib/audio-utils";
import { copyToClipboard } from "@/lib/copy-to-clipboard";
import { cn } from "@/lib/utils";
import {
  ActionBarMorePrimitive,
  ActionBarPrimitive,
  AuiIf,
  BranchPickerPrimitive,
  ComposerPrimitive,
  ErrorPrimitive,
  MessagePrimitive,
  SuggestionPrimitive,
  ThreadPrimitive,
  useAui,
  useAuiEvent,
  useAuiState,
} from "@assistant-ui/react";
import { motion } from "framer-motion";
import {
  ArrowDownIcon,
  ArrowUpIcon,
  CheckIcon,
  ChevronLeftIcon,
  ChevronRightIcon,
  CopyIcon,
  DownloadIcon,
  GlobeIcon,
  HeadphonesIcon,
  LightbulbIcon,
  LightbulbOffIcon,
  MicIcon,
  MoreHorizontalIcon,
  LoaderIcon,
  PencilIcon,
  RefreshCwIcon,
  SquareIcon,
  TerminalIcon,
  XIcon,
} from "lucide-react";
import { type FC, useCallback, useEffect, useRef, useState } from "react";
import { useChatRuntimeStore } from "@/features/chat/stores/chat-runtime-store";

export const Thread: FC<{ hideComposer?: boolean; hideWelcome?: boolean }> = ({
  hideComposer,
  hideWelcome,
}) => {
  return (
    <ThreadPrimitive.Root
      className="aui-root aui-thread-root @container flex h-full flex-col "
      style={{
        ["--thread-max-width" as string]: "44rem",
      }}
    >
      <ThreadPrimitive.Viewport
        className="aui-thread-viewport relative flex flex-1 flex-col overflow-x-auto overflow-y-scroll scroll-smooth px-4 pt-4"
      >
        {!hideWelcome && (
          <AuiIf condition={({ thread }) => thread.isEmpty}>
            <ThreadWelcome hideComposer={hideComposer} />
          </AuiIf>
        )}

        <ThreadPrimitive.Messages
          components={{
            UserMessage,
            EditComposer,
            AssistantMessage,
          }}
        />

        <ThreadPrimitive.ViewportFooter className="aui-thread-viewport-footer sticky bottom-0 mt-auto flex w-full flex-col gap-4 overflow-visible bg-background pb-4 md:pb-4">
          <ThreadScrollToBottom />
          <GeneratingSpinner />
          <AuiIf condition={({ thread }) => !thread.isEmpty}>
            {!hideComposer && <ComposerAnimated />}
          </AuiIf>
        </ThreadPrimitive.ViewportFooter>
      </ThreadPrimitive.Viewport>
    </ThreadPrimitive.Root>
  );
};

const ThreadScrollToBottom: FC = () => {
  return (
    <ThreadPrimitive.ScrollToBottom asChild={true}>
      <TooltipIconButton
        tooltip="Scroll to bottom"
        variant="outline"
        className="aui-thread-scroll-to-bottom absolute -top-12 z-10 self-center rounded-full p-4 disabled:invisible dark:bg-background dark:hover:bg-accent"
      >
        <ArrowDownIcon />
      </TooltipIconButton>
    </ThreadPrimitive.ScrollToBottom>
  );
};

const SuggestionItem: FC = () => {
  const aui = useAui();
  const prompt = useAuiState(({ suggestion }) => suggestion.prompt);
  const isDisabled = useAuiState(({ thread }) => thread.isDisabled);
  const isRunning = useAuiState(({ thread }) => thread.isRunning);

  return (
    <button
      type="button"
      onClick={() => {
        if (!isDisabled && !isRunning) {
          aui.thread().append(prompt);
          aui.composer().setText("");
          return;
        }
        aui.composer().setText(prompt);
      }}
      className="fade-in slide-in-from-bottom-1 animate-in cursor-pointer corner-squircle rounded-xl border bg-background px-4 py-2.5 text-left text-sm text-foreground shadow-sm transition-colors duration-150 hover:bg-accent"
    >
      <SuggestionPrimitive.Title />
    </button>
  );
};

const ThreadWelcome: FC<{ hideComposer?: boolean }> = ({ hideComposer }) => {
  return (
    <div className="aui-thread-welcome-root mx-auto my-auto flex w-full max-w-(--thread-max-width) grow flex-col">
      <div className="aui-thread-welcome-center flex w-full grow flex-col items-center justify-center">
        <div className="aui-thread-welcome-message flex w-full flex-col justify-center gap-6 px-4">
          <div className="flex flex-col items-center gap-2 text-center">
            <img
              src="/Sloth emojis/sloth pc square.png"
              alt="Sloth mascot"
              className="size-20"
            />
            <h1 className="aui-thread-welcome-message-inner fade-in slide-in-from-bottom-1 animate-in font-semibold text-2xl duration-200">
              Chat with your model
            </h1>
            <p className="aui-thread-welcome-message-inner fade-in slide-in-from-bottom-1 animate-in text-muted-foreground text-base delay-75 duration-200">
              Run GGUFs, safetensors, vision and audio models!
            </p>
          </div>
          <div className="grid grid-cols-2 gap-2">
            <ThreadPrimitive.Suggestions
              components={{ Suggestion: SuggestionItem }}
            />
          </div>
          <GeneratingSpinner />
          {!hideComposer && <ComposerAnimated />}
        </div>
      </div>
    </div>
  );
};

const GeneratingSpinner: FC = () => {
  const status = useChatRuntimeStore((s) => s.generatingStatus);
  if (!status) return null;
  return (
    <div className="mx-auto flex w-full max-w-(--thread-max-width) items-center justify-center py-2">
      <div className="flex items-center gap-2 text-xs text-muted-foreground">
        <LoaderIcon className="size-3.5 animate-spin" />
        <span>Generating</span>
      </div>
    </div>
  );
};

const ComposerAnimated: FC = () => {
  return (
    <motion.div
      layout={true}
      layoutId="composer"
      transition={{ type: "spring", bounce: 0.15, duration: 0.5 }}
      className="mx-auto w-full max-w-(--thread-max-width)"
    >
      <Composer />
    </motion.div>
  );
};

const PendingAudioChip: FC = () => {
  const audioName = useChatRuntimeStore((s) => s.pendingAudioName);
  const clearPendingAudio = useChatRuntimeStore((s) => s.clearPendingAudio);
  if (!audioName) return null;
  return (
    <div className="mb-2 flex w-full flex-row items-center gap-2 px-1.5 pt-0.5 pb-1">
      <div className="flex items-center gap-2 rounded-lg border border-foreground/20 bg-muted px-3 py-1.5 text-xs">
        <HeadphonesIcon className="size-3.5 text-muted-foreground" />
        <span className="max-w-48 truncate">{audioName}</span>
        <button
          type="button"
          onClick={clearPendingAudio}
          className="flex size-4 items-center justify-center rounded-full hover:bg-destructive hover:text-destructive-foreground"
          aria-label="Remove audio"
        >
          <XIcon className="size-3" />
        </button>
      </div>
    </div>
  );
};

const Composer: FC = () => {
  return (
    <ComposerPrimitive.Root className="aui-composer-root relative flex w-full flex-col">
      <ComposerPrimitive.AttachmentDropzone className="aui-composer-attachment-dropzone shadow-border ring-1 ring-border flex w-full flex-col rounded-2xl bg-background px-1 pt-2 outline-none transition-shadow data-[dragging=true]:ring-ring data-[dragging=true]:bg-accent/50">
        <ComposerAttachments />
        <PendingAudioChip />
        <ToolStatusDisplay />
        <ComposerPrimitive.Input
          placeholder="Send a message..."
          className="aui-composer-input mb-1 max-h-32 min-h-12 w-full resize-none bg-transparent px-4 pt-2 pb-3 text-sm outline-none placeholder:text-muted-foreground focus-visible:ring-0"
          rows={1}
          autoFocus={true}
          aria-label="Message input"
        />
        <ComposerAction />
      </ComposerPrimitive.AttachmentDropzone>
    </ComposerPrimitive.Root>
  );
};

const ComposerAudioUpload: FC = () => {
  const audioInputRef = useRef<HTMLInputElement>(null);
  const setPendingAudio = useChatRuntimeStore((s) => s.setPendingAudio);
  const activeModel = useChatRuntimeStore((s) => {
    const checkpoint = s.params.checkpoint;
    return s.models.find((m) => m.id === checkpoint);
  });

  const handleAudioFile = useCallback(
    async (file: File) => {
      if (file.size > MAX_AUDIO_SIZE) return;
      try {
        const base64 = await fileToBase64(file);
        setPendingAudio(base64, file.name);
      } catch {
        // skip
      }
    },
    [setPendingAudio],
  );

  if (!activeModel?.hasAudioInput) return null;

  return (
    <>
      <input
        ref={audioInputRef}
        type="file"
        accept={AUDIO_ACCEPT}
        className="hidden"
        onChange={(e) => {
          const file = e.target.files?.[0];
          if (file) handleAudioFile(file);
          e.target.value = "";
        }}
      />
      <TooltipIconButton
        tooltip="Upload audio"
        side="bottom"
        variant="ghost"
        size="icon"
        className="size-8.5 rounded-full p-1 text-muted-foreground hover:bg-muted-foreground/15"
        onClick={() => audioInputRef.current?.click()}
        aria-label="Upload audio"
      >
        <HeadphonesIcon className="size-4.5 stroke-[1.5px]" />
      </TooltipIconButton>
    </>
  );
};

/** Qwen3/3.5 recommended params differ between thinking on/off. */
function applyQwenThinkingParams(thinkingOn: boolean): void {
  const store = useChatRuntimeStore.getState();
  const checkpoint = store.params.checkpoint?.toLowerCase() ?? "";
  if (!checkpoint.includes("qwen3")) return;
  // Qwen3 & Qwen3.5 share the same recommended settings:
  // Thinking ON (general): temp=1.0, top_p=0.95, top_k=20
  // Thinking OFF (general): temp=0.7, top_p=0.8, top_k=20
  const params = thinkingOn
    ? { temperature: 0.6, topP: 0.95, topK: 20, minP: 0.0 }
    : { temperature: 0.7, topP: 0.8, topK: 20, minP: 0.0 };
  store.setParams({ ...store.params, ...params });
}

const ReasoningToggle: FC = () => {
  const supportsReasoning = useChatRuntimeStore((s) => s.supportsReasoning);
  const reasoningEnabled = useChatRuntimeStore((s) => s.reasoningEnabled);
  const setReasoningEnabled = useChatRuntimeStore((s) => s.setReasoningEnabled);

  if (!supportsReasoning) return null;

  return (
    <button
      type="button"
      onClick={() => {
        const next = !reasoningEnabled;
        setReasoningEnabled(next);
        applyQwenThinkingParams(next);
      }}
      className={cn(
        "flex items-center gap-1.5 rounded-full px-2.5 py-1 text-xs font-medium transition-colors",
        reasoningEnabled
          ? "bg-primary/10 text-primary hover:bg-primary/20"
          : "bg-muted text-muted-foreground hover:bg-muted-foreground/15",
      )}
      aria-label={reasoningEnabled ? "Disable thinking" : "Enable thinking"}
    >
      {reasoningEnabled ? (
        <LightbulbIcon className="size-3.5" />
      ) : (
        <LightbulbOffIcon className="size-3.5" />
      )}
      <span>Think</span>
    </button>
  );
};

const WebSearchToggle: FC = () => {
  const supportsTools = useChatRuntimeStore((s) => s.supportsTools);
  const toolsEnabled = useChatRuntimeStore((s) => s.toolsEnabled);
  const setToolsEnabled = useChatRuntimeStore((s) => s.setToolsEnabled);

  if (!supportsTools) return null;

  return (
    <button
      type="button"
      onClick={() => setToolsEnabled(!toolsEnabled)}
      className={cn(
        "flex items-center gap-1.5 rounded-full px-2.5 py-1 text-xs font-medium transition-colors",
        toolsEnabled
          ? "bg-primary/10 text-primary hover:bg-primary/20"
          : "bg-muted text-muted-foreground hover:bg-muted-foreground/15",
      )}
      aria-label={toolsEnabled ? "Disable web search" : "Enable web search"}
    >
      <GlobeIcon className="size-3.5" />
      <span>Search</span>
    </button>
  );
};

const CodeToolsToggle: FC = () => {
  const supportsTools = useChatRuntimeStore((s) => s.supportsTools);
  const codeToolsEnabled = useChatRuntimeStore((s) => s.codeToolsEnabled);
  const setCodeToolsEnabled = useChatRuntimeStore(
    (s) => s.setCodeToolsEnabled,
  );

  if (!supportsTools) return null;

  return (
    <button
      type="button"
      onClick={() => setCodeToolsEnabled(!codeToolsEnabled)}
      className={cn(
        "flex items-center gap-1.5 rounded-full px-2.5 py-1 text-xs font-medium transition-colors",
        codeToolsEnabled
          ? "bg-primary/10 text-primary hover:bg-primary/20"
          : "bg-muted text-muted-foreground hover:bg-muted-foreground/15",
      )}
      aria-label={codeToolsEnabled ? "Disable code execution" : "Enable code execution"}
    >
      <TerminalIcon className="size-3.5" />
      <span>Code</span>
    </button>
  );
};

const ToolStatusDisplay: FC = () => {
  const toolStatus = useChatRuntimeStore((s) => s.toolStatus);
  const [elapsed, setElapsed] = useState(0);

  useEffect(() => {
    if (!toolStatus) {
      setElapsed(0);
      return;
    }
    setElapsed(0);
    const interval = setInterval(() => {
      setElapsed((prev) => prev + 1);
    }, 1000);
    return () => clearInterval(interval);
  }, [toolStatus]);

  if (!toolStatus) return null;
  const isRunning = toolStatus.startsWith("Running");
  const StatusIcon = isRunning ? TerminalIcon : GlobeIcon;
  return (
    <div className="mb-2 flex w-full flex-row items-center gap-2 px-1.5 pt-0.5 pb-1">
      <div className="flex animate-pulse items-center gap-2 rounded-full border border-primary/20 bg-primary/5 px-3 py-1.5 text-xs text-primary">
        <StatusIcon className="size-3.5" />
        <span>{toolStatus}</span>
        <span className="tabular-nums opacity-60">{elapsed}s</span>
      </div>
    </div>
  );
};

const ComposerAction: FC = () => {
  return (
    <div className="aui-composer-action-wrapper relative mx-2 mb-2 flex items-center justify-between">
      <div className="flex items-center gap-1">
        <ComposerAddAttachment />
        <ComposerAudioUpload />
        <ReasoningToggle />
        <WebSearchToggle />
        <CodeToolsToggle />
      </div>
      <div className="flex items-center gap-1">
        <ComposerPrimitive.If dictation={false}>
          <ComposerPrimitive.Dictate asChild={true}>
            <TooltipIconButton
              tooltip="Dictate"
              variant="ghost"
              className="size-8 rounded-full text-muted-foreground"
            >
              <MicIcon className="size-4" />
            </TooltipIconButton>
          </ComposerPrimitive.Dictate>
        </ComposerPrimitive.If>
        <ComposerPrimitive.If dictation={true}>
          <ComposerPrimitive.StopDictation asChild={true}>
            <TooltipIconButton
              tooltip="Stop dictation"
              variant="ghost"
              className="size-8 rounded-full text-destructive"
            >
              <SquareIcon className="size-3 animate-pulse fill-current" />
            </TooltipIconButton>
          </ComposerPrimitive.StopDictation>
        </ComposerPrimitive.If>
        <AuiIf condition={({ thread }) => !thread.isRunning}>
          <ComposerPrimitive.Send asChild={true}>
            <TooltipIconButton
              tooltip="Send message"
              side="bottom"
              type="submit"
              variant="default"
              size="icon"
              className="aui-composer-send size-8 rounded-full"
              aria-label="Send message"
            >
              <ArrowUpIcon className="aui-composer-send-icon size-4" />
            </TooltipIconButton>
          </ComposerPrimitive.Send>
        </AuiIf>
        <AuiIf condition={({ thread }) => thread.isRunning}>
          <ComposerPrimitive.Cancel asChild={true}>
            <Button
              type="button"
              variant="default"
              size="icon"
              className="aui-composer-cancel size-8 rounded-full"
              aria-label="Stop generating"
            >
              <SquareIcon className="aui-composer-cancel-icon size-3 fill-current" />
            </Button>
          </ComposerPrimitive.Cancel>
        </AuiIf>
      </div>
    </div>
  );
};

const MessageError: FC = () => {
  return (
    <MessagePrimitive.Error>
      <ErrorPrimitive.Root className="aui-message-error-root mt-2 rounded-md border border-destructive bg-destructive/10 p-3 text-destructive text-sm dark:bg-destructive/5 dark:text-red-200">
        <ErrorPrimitive.Message className="aui-message-error-message line-clamp-2" />
      </ErrorPrimitive.Root>
    </MessagePrimitive.Error>
  );
};

const AssistantMessage: FC = () => {
  return (
    <MessagePrimitive.Root
      className="aui-assistant-message-root fade-in slide-in-from-bottom-1 relative mx-auto w-full max-w-(--thread-max-width) animate-in py-3 duration-150"
      data-role="assistant"
    >
      <div className="aui-assistant-message-content wrap-break-word px-2 text-foreground leading-relaxed">
        <MessagePrimitive.Parts
          components={{
            Text: MarkdownText,
            Reasoning: Reasoning,
            ReasoningGroup: ReasoningGroup,
            Source: Sources,
            ToolGroup: ToolGroup,
            tools: {
              by_name: {
                web_search: WebSearchToolUI,
                python: PythonToolUI,
                terminal: TerminalToolUI,
              },
              Fallback: ToolFallback,
            },
          }}
        />
        <MessageError />
      </div>

      <div className="aui-assistant-message-footer mt-1 ml-2 flex">
        <BranchPicker />
        <AssistantActionBar />
      </div>
    </MessagePrimitive.Root>
  );
};

const COPY_RESET_MS = 2000;

const CopyButton: FC = () => {
  const aui = useAui();
  const [copied, setCopied] = useState(false);
  const resetTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null);

  const handleCopy = () => {
    const text = aui.message().getCopyText();
    if (copyToClipboard(text)) {
      setCopied(true);
      if (resetTimeoutRef.current) clearTimeout(resetTimeoutRef.current);
      resetTimeoutRef.current = setTimeout(() => {
        setCopied(false);
        resetTimeoutRef.current = null;
      }, COPY_RESET_MS);
    }
  };

  return (
    <TooltipIconButton tooltip="Copy" onClick={handleCopy}>
      {copied ? <CheckIcon /> : <CopyIcon />}
    </TooltipIconButton>
  );
};

const AssistantActionBar: FC = () => {
  return (
    <ActionBarPrimitive.Root
      hideWhenRunning={true}
      autohide="not-last"
      autohideFloat="single-branch"
      className="aui-assistant-action-bar-root col-start-3 row-start-2 -ml-1 flex gap-1 text-muted-foreground data-floating:absolute data-floating:rounded-md data-floating:border data-floating:bg-background data-floating:p-1 data-floating:shadow-sm"
    >
      <CopyButton />
      <ActionBarPrimitive.Reload asChild={true}>
        <TooltipIconButton tooltip="Refresh">
          <RefreshCwIcon />
        </TooltipIconButton>
      </ActionBarPrimitive.Reload>
      <MessageTiming side="top" />
      <ActionBarMorePrimitive.Root>
        <ActionBarMorePrimitive.Trigger asChild={true}>
          <TooltipIconButton
            tooltip="More"
            className="data-[state=open]:bg-accent"
          >
            <MoreHorizontalIcon />
          </TooltipIconButton>
        </ActionBarMorePrimitive.Trigger>
        <ActionBarMorePrimitive.Content
          side="bottom"
          align="start"
          className="aui-action-bar-more-content z-50 min-w-32 overflow-hidden rounded-md border bg-popover p-1 text-popover-foreground shadow-md"
        >
          <ActionBarPrimitive.ExportMarkdown asChild={true}>
            <ActionBarMorePrimitive.Item className="aui-action-bar-more-item flex cursor-pointer select-none items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-none hover:bg-accent hover:text-accent-foreground focus:bg-accent focus:text-accent-foreground">
              <DownloadIcon className="size-4" />
              Export as Markdown
            </ActionBarMorePrimitive.Item>
          </ActionBarPrimitive.ExportMarkdown>
        </ActionBarMorePrimitive.Content>
      </ActionBarMorePrimitive.Root>
    </ActionBarPrimitive.Root>
  );
};

const UserMessageAudio: FC = () => {
  const audioName = useAuiState(({ message }) => sentAudioNames.get(message.id));
  if (!audioName) return null;
  return (
    <div className="col-start-2 flex justify-end">
      <div className="flex items-center gap-2 rounded-lg border border-foreground/20 bg-muted px-3 py-1.5 text-xs">
        <HeadphonesIcon className="size-3.5 text-muted-foreground" />
        <span className="max-w-48 truncate">{audioName}</span>
      </div>
    </div>
  );
};

const UserMessage: FC = () => {
  return (
    <MessagePrimitive.Root
      className="aui-user-message-root  fade-in slide-in-from-bottom-1 mx-auto grid w-full max-w-(--thread-max-width) animate-in auto-rows-auto grid-cols-[minmax(72px,1fr)_auto] content-start gap-y-2 px-2 py-3 duration-150 [&:where(>*)]:col-start-2"
      data-role="user"
    >
      <UserMessageAttachments />
      <UserMessageAudio />

      <div className="aui-user-message-content-wrapper relative col-start-2 min-w-0">
        <div className="aui-user-message-content wrap-break-word rounded-2xl bg-muted  px-4 py-2.5 text-foreground">
          <MessagePrimitive.Parts />
        </div>
        <div className="aui-user-action-bar-wrapper absolute top-1/2 left-0 -translate-x-full -translate-y-1/2 pr-2">
          <UserActionBar />
        </div>
      </div>

      <BranchPicker className="aui-user-branch-picker col-span-full col-start-1 row-start-3 -mr-1 justify-end" />
    </MessagePrimitive.Root>
  );
};

const UserActionBar: FC = () => {
  return (
    <ActionBarPrimitive.Root
      autohide="not-last"
      className="aui-user-action-bar-root flex items-center"
    >
      <CopyButton />
      <ActionBarPrimitive.Edit asChild={true}>
        <TooltipIconButton tooltip="Edit" className="aui-user-action-edit">
          <PencilIcon />
        </TooltipIconButton>
      </ActionBarPrimitive.Edit>
    </ActionBarPrimitive.Root>
  );
};

const EditComposer: FC = () => {
  const aui = useAui();
  const resendAfterCancelRef = useRef(false);

  useAuiEvent("thread.runEnd", () => {
    if (!resendAfterCancelRef.current) {
      return;
    }
    resendAfterCancelRef.current = false;
    aui.composer().send();
  });

  return (
    <MessagePrimitive.Root className="aui-edit-composer-wrapper mx-auto flex w-full max-w-(--thread-max-width) flex-col px-2 py-3">
      <ComposerPrimitive.Root className="aui-edit-composer-root ml-auto flex w-full max-w-[85%] flex-col rounded-2xl bg-muted">
        <ComposerPrimitive.Input
          className="aui-edit-composer-input min-h-14 w-full resize-none bg-transparent p-4 text-foreground text-sm outline-none"
          autoFocus={true}
        />
        <div className="aui-edit-composer-footer mx-3 mb-3 flex items-center gap-2 self-end">
          <ComposerPrimitive.Cancel asChild={true}>
            <Button variant="ghost" size="sm">
              Cancel
            </Button>
          </ComposerPrimitive.Cancel>
          <Button
            size="sm"
            onClick={() => {
              const newText = aui.composer().getState().text;
              const originalText = aui.message().getCopyText();

              if (newText === originalText) {
                aui.composer().cancel();
                return;
              }

              if (aui.thread().getState().isRunning) {
                resendAfterCancelRef.current = true;
                aui.thread().cancelRun();
                return;
              }
              aui.composer().send();
            }}
          >
            Update
          </Button>
        </div>
      </ComposerPrimitive.Root>
    </MessagePrimitive.Root>
  );
};

const BranchPicker: FC<BranchPickerPrimitive.Root.Props> = ({
  className,
  ...rest
}) => {
  return (
    <BranchPickerPrimitive.Root
      hideWhenSingleBranch={true}
      className={cn(
        "aui-branch-picker-root mr-2 -ml-2 inline-flex items-center text-muted-foreground text-xs",
        className,
      )}
      {...rest}
    >
      <BranchPickerPrimitive.Previous asChild={true}>
        <TooltipIconButton tooltip="Previous">
          <ChevronLeftIcon />
        </TooltipIconButton>
      </BranchPickerPrimitive.Previous>
      <span className="aui-branch-picker-state font-medium">
        <BranchPickerPrimitive.Number /> / <BranchPickerPrimitive.Count />
      </span>
      <BranchPickerPrimitive.Next asChild={true}>
        <TooltipIconButton tooltip="Next">
          <ChevronRightIcon />
        </TooltipIconButton>
      </BranchPickerPrimitive.Next>
    </BranchPickerPrimitive.Root>
  );
};


================================================
FILE: studio/frontend/src/components/assistant-ui/tool-fallback.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/components/ui/collapsible";
import { cn } from "@/lib/utils";
import {
  type ToolCallMessagePartComponent,
  type ToolCallMessagePartStatus,
  useScrollLock,
} from "@assistant-ui/react";
import {
  AlertCircleIcon,
  CheckIcon,
  ChevronDownIcon,
  LoaderIcon,
  XCircleIcon,
} from "lucide-react";
import {
  type CSSProperties,
  type ComponentProps,
  type ElementType,
  memo,
  useCallback,
  useRef,
  useState,
} from "react";

const ANIMATION_DURATION = 200;

export type ToolFallbackRootProps = Omit<
  ComponentProps<typeof Collapsible>,
  "open" | "onOpenChange"
> & {
  open?: boolean;
  onOpenChange?: (open: boolean) => void;
  defaultOpen?: boolean;
};

function ToolFallbackRoot({
  className,
  open: controlledOpen,
  onOpenChange: controlledOnOpenChange,
  defaultOpen = false,
  children,
  ...props
}: ToolFallbackRootProps) {
  const collapsibleRef = useRef<HTMLDivElement>(null);
  const [uncontrolledOpen, setUncontrolledOpen] = useState(defaultOpen);
  const lockScroll = useScrollLock(collapsibleRef, ANIMATION_DURATION);

  const isControlled = controlledOpen !== undefined;
  const isOpen = isControlled ? controlledOpen : uncontrolledOpen;

  const handleOpenChange = useCallback(
    (open: boolean) => {
      if (!open) {
        lockScroll();
      }
      if (!isControlled) {
        setUncontrolledOpen(open);
      }
      controlledOnOpenChange?.(open);
    },
    [lockScroll, isControlled, controlledOnOpenChange],
  );

  return (
    <Collapsible
      ref={collapsibleRef}
      data-slot="tool-fallback-root"
      open={isOpen}
      onOpenChange={handleOpenChange}
      className={cn(
        "aui-tool-fallback-root group/tool-fallback-root w-full corner-squircle rounded-lg border py-3",
        className,
      )}
      style={
        {
          "--animation-duration": `${ANIMATION_DURATION}ms`,
        } as CSSProperties
      }
      {...props}
    >
      {children}
    </Collapsible>
  );
}

type ToolStatus = ToolCallMessagePartStatus["type"];

const statusIconMap: Record<ToolStatus, ElementType> = {
  running: LoaderIcon,
  complete: CheckIcon,
  incomplete: XCircleIcon,
  "requires-action": AlertCircleIcon,
};

function ToolFallbackTrigger({
  toolName,
  status,
  icon: ToolIcon,
  className,
  ...props
}: ComponentProps<typeof CollapsibleTrigger> & {
  toolName: string;
  status?: ToolCallMessagePartStatus;
  icon?: ElementType;
}) {
  const statusType = status?.type ?? "complete";
  const isRunning = statusType === "running";
  const isCancelled =
    status?.type === "incomplete" && status.reason === "cancelled";

  const StatusIcon = statusIconMap[statusType];
  const label = isCancelled ? "Cancelled tool" : "Used tool";

  return (
    <CollapsibleTrigger
      data-slot="tool-fallback-trigger"
      className={cn(
        "aui-tool-fallback-trigger group/trigger flex w-full items-center gap-2 px-4 text-sm transition-colors",
        className,
      )}
      {...props}
    >
      {isRunning ? (
        <StatusIcon
          data-slot="tool-fallback-trigger-icon"
          className="aui-tool-fallback-trigger-icon size-4 shrink-0 animate-spin"
        />
      ) : (
        ToolIcon ? (
          <ToolIcon
            data-slot="tool-fallback-trigger-icon"
            className={cn(
              "aui-tool-fallback-trigger-icon size-4 shrink-0",
              isCancelled && "text-muted-foreground",
            )}
          />
        ) : (
          <StatusIcon
            data-slot="tool-fallback-trigger-icon"
            className={cn(
              "aui-tool-fallback-trigger-icon size-4 shrink-0",
              isCancelled && "text-muted-foreground",
            )}
          />
        )
      )}
      <span
        data-slot="tool-fallback-trigger-label"
        className={cn(
          "aui-tool-fallback-trigger-label-wrapper relative inline-block grow text-left leading-none",
          isCancelled && "text-muted-foreground line-through",
        )}
      >
        <span>
          {label}: <b>{toolName}</b>
        </span>
        {isRunning && (
          <span
            aria-hidden={true}
            data-slot="tool-fallback-trigger-shimmer"
            className="aui-tool-fallback-trigger-shimmer shimmer pointer-events-none absolute inset-0 motion-reduce:animate-none"
          >
            {label}: <b>{toolName}</b>
          </span>
        )}
      </span>
      <ChevronDownIcon
        data-slot="tool-fallback-trigger-chevron"
        className={cn(
          "aui-tool-fallback-trigger-chevron size-4 shrink-0",
          "transition-transform duration-(--animation-duration) ease-out",
          "group-data-[state=closed]/trigger:-rotate-90",
          "group-data-[state=open]/trigger:rotate-0",
        )}
      />
    </CollapsibleTrigger>
  );
}

function ToolFallbackContent({
  className,
  children,
  ...props
}: ComponentProps<typeof CollapsibleContent>) {
  return (
    <CollapsibleContent
      data-slot="tool-fallback-content"
      className={cn(
        "aui-tool-fallback-content relative overflow-hidden text-sm outline-none",
        "group/collapsible-content ease-out",
        "data-[state=closed]:animate-collapsible-up",
        "data-[state=open]:animate-collapsible-down",
        "data-[state=closed]:fill-mode-forwards",
        "data-[state=closed]:pointer-events-none",
        "data-[state=open]:duration-(--animation-duration)",
        "data-[state=closed]:duration-(--animation-duration)",
        className,
      )}
      {...props}
    >
      <div className="mt-3 flex flex-col gap-2 border-t pt-2">{children}</div>
    </CollapsibleContent>
  );
}

function ToolFallbackArgs({
  argsText,
  className,
  ...props
}: ComponentProps<"div"> & {
  argsText?: string;
}) {
  if (!argsText) {
    return null;
  }

  return (
    <div
      data-slot="tool-fallback-args"
      className={cn("aui-tool-fallback-args px-4", className)}
      {...props}
    >
      <pre className="aui-tool-fallback-args-value whitespace-pre-wrap">
        {argsText}
      </pre>
    </div>
  );
}

function ToolFallbackResult({
  result,
  className,
  ...props
}: ComponentProps<"div"> & {
  result?: unknown;
}) {
  if (result === undefined) {
    return null;
  }

  return (
    <div
      data-slot="tool-fallback-result"
      className={cn(
        "aui-tool-fallback-result border-t border-dashed px-4 pt-2",
        className,
      )}
      {...props}
    >
      <p className="aui-tool-fallback-result-header font-semibold">Result:</p>
      <pre className="aui-tool-fallback-result-content whitespace-pre-wrap">
        {typeof result === "string" ? result : JSON.stringify(result, null, 2)}
      </pre>
    </div>
  );
}

function ToolFallbackError({
  status,
  className,
  ...props
}: ComponentProps<"div"> & {
  status?: ToolCallMessagePartStatus;
}) {
  if (status?.type !== "incomplete") {
    return null;
  }

  const error = status.error;
  const errorText = error
    ? typeof error === "string"
      ? error
      : JSON.stringify(error)
    : null;

  if (!errorText) {
    return null;
  }

  const isCancelled = status.reason === "cancelled";
  const headerText = isCancelled ? "Cancelled reason:" : "Error:";

  return (
    <div
      data-slot="tool-fallback-error"
      className={cn("aui-tool-fallback-error px-4", className)}
      {...props}
    >
      <p className="aui-tool-fallback-error-header font-semibold text-muted-foreground">
        {headerText}
      </p>
      <p className="aui-tool-fallback-error-reason text-muted-foreground">
        {errorText}
      </p>
    </div>
  );
}

const ToolFallbackImpl: ToolCallMessagePartComponent = ({
  toolName,
  argsText,
  result,
  status,
}) => {
  const isCancelled =
    status?.type === "incomplete" && status.reason === "cancelled";

  return (
    <ToolFallbackRoot
      className={cn(isCancelled && "border-muted-foreground/30 bg-muted/30")}
    >
      <ToolFallbackTrigger toolName={toolName} status={status} />
      <ToolFallbackContent>
        <ToolFallbackError status={status} />
        <ToolFallbackArgs
          argsText={argsText}
          className={cn(isCancelled && "opacity-60")}
        />
        {!isCancelled && <ToolFallbackResult result={result} />}
      </ToolFallbackContent>
    </ToolFallbackRoot>
  );
};

const ToolFallback = memo(
  ToolFallbackImpl,
) as unknown as ToolCallMessagePartComponent & {
  Root: typeof ToolFallbackRoot;
  Trigger: typeof ToolFallbackTrigger;
  Content: typeof ToolFallbackContent;
  Args: typeof ToolFallbackArgs;
  Result: typeof ToolFallbackResult;
  Error: typeof ToolFallbackError;
};

ToolFallback.displayName = "ToolFallback";
ToolFallback.Root = ToolFallbackRoot;
ToolFallback.Trigger = ToolFallbackTrigger;
ToolFallback.Content = ToolFallbackContent;
ToolFallback.Args = ToolFallbackArgs;
ToolFallback.Result = ToolFallbackResult;
ToolFallback.Error = ToolFallbackError;

export {
  ToolFallback,
  ToolFallbackRoot,
  ToolFallbackTrigger,
  ToolFallbackContent,
  ToolFallbackArgs,
  ToolFallbackResult,
  ToolFallbackError,
};


================================================
FILE: studio/frontend/src/components/assistant-ui/tool-group.tsx
================================================
"use client";

import {
  memo,
  useCallback,
  useRef,
  useState,
  type FC,
  type PropsWithChildren,
} from "react";
import { ChevronDownIcon, LoaderIcon } from "lucide-react";
import { cva, type VariantProps } from "class-variance-authority";
import { useScrollLock } from "@assistant-ui/react";
import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/components/ui/collapsible";
import { cn } from "@/lib/utils";

const ANIMATION_DURATION = 200;

const toolGroupVariants = cva("aui-tool-group-root group/tool-group w-full", {
  variants: {
    variant: {
      outline: "corner-squircle rounded-lg border py-3",
      ghost: "",
      muted: "corner-squircle rounded-lg border border-muted-foreground/30 bg-muted/30 py-3",
    },
  },
  defaultVariants: { variant: "outline" },
});

export type ToolGroupRootProps = Omit<
  React.ComponentProps<typeof Collapsible>,
  "open" | "onOpenChange"
> &
  VariantProps<typeof toolGroupVariants> & {
    open?: boolean;
    onOpenChange?: (open: boolean) => void;
    defaultOpen?: boolean;
  };

function ToolGroupRoot({
  className,
  variant,
  open: controlledOpen,
  onOpenChange: controlledOnOpenChange,
  defaultOpen = false,
  children,
  ...props
}: ToolGroupRootProps) {
  const collapsibleRef = useRef<HTMLDivElement>(null);
  const [uncontrolledOpen, setUncontrolledOpen] = useState(defaultOpen);
  const lockScroll = useScrollLock(collapsibleRef, ANIMATION_DURATION);

  const isControlled = controlledOpen !== undefined;
  const isOpen = isControlled ? controlledOpen : uncontrolledOpen;

  const handleOpenChange = useCallback(
    (open: boolean) => {
      if (!open) {
        lockScroll();
      }
      if (!isControlled) {
        setUncontrolledOpen(open);
      }
      controlledOnOpenChange?.(open);
    },
    [lockScroll, isControlled, controlledOnOpenChange],
  );

  return (
    <Collapsible
      ref={collapsibleRef}
      data-slot="tool-group-root"
      data-variant={variant ?? "outline"}
      open={isOpen}
      onOpenChange={handleOpenChange}
      className={cn(
        toolGroupVariants({ variant }),
        "group/tool-group-root",
        className,
      )}
      style={
        {
          "--animation-duration": `${ANIMATION_DURATION}ms`,
        } as React.CSSProperties
      }
      {...props}
    >
      {children}
    </Collapsible>
  );
}

function ToolGroupTrigger({
  count,
  active = false,
  className,
  ...props
}: React.ComponentProps<typeof CollapsibleTrigger> & {
  count: number;
  active?: boolean;
}) {
  const label = `${count} tool ${count === 1 ? "call" : "calls"}`;

  return (
    <CollapsibleTrigger
      data-slot="tool-group-trigger"
      className={cn(
        "aui-tool-group-trigger group/trigger flex items-center gap-2 text-sm transition-colors",
        "group-data-[variant=outline]/tool-group-root:w-full group-data-[variant=outline]/tool-group-root:px-4",
        "group-data-[variant=muted]/tool-group-root:w-full group-data-[variant=muted]/tool-group-root:px-4",
        className,
      )}
      {...props}
    >
      {active && (
        <LoaderIcon
          data-slot="tool-group-trigger-loader"
          className="aui-tool-group-trigger-loader size-4 shrink-0 animate-spin"
        />
      )}
      <span
        data-slot="tool-group-trigger-label"
        className={cn(
          "aui-tool-group-trigger-label-wrapper relative inline-block text-left font-medium leading-none",
          "group-data-[variant=outline]/tool-group-root:grow",
          "group-data-[variant=muted]/tool-group-root:grow",
        )}
      >
        <span>{label}</span>
        {active && (
          <span
            aria-hidden
            data-slot="tool-group-trigger-shimmer"
            className="aui-tool-group-trigger-shimmer shimmer pointer-events-none absolute inset-0 motion-reduce:animate-none"
          >
            {label}
          </span>
        )}
      </span>
      <ChevronDownIcon
        data-slot="tool-group-trigger-chevron"
        className={cn(
          "aui-tool-group-trigger-chevron size-4 shrink-0",
          "transition-transform duration-(--animation-duration) ease-out",
          "group-data-[state=closed]/trigger:-rotate-90",
          "group-data-[state=open]/trigger:rotate-0",
        )}
      />
    </CollapsibleTrigger>
  );
}

function ToolGroupContent({
  className,
  children,
  ...props
}: React.ComponentProps<typeof CollapsibleContent>) {
  return (
    <CollapsibleContent
      data-slot="tool-group-content"
      className={cn(
        "aui-tool-group-content relative overflow-hidden text-sm outline-none",
        "group/collapsible-content ease-out",
        "data-[state=closed]:animate-collapsible-up",
        "data-[state=open]:animate-collapsible-down",
        "data-[state=closed]:fill-mode-forwards",
        "data-[state=closed]:pointer-events-none",
        "data-[state=open]:duration-(--animation-duration)",
        "data-[state=closed]:duration-(--animation-duration)",
        className,
      )}
      {...props}
    >
      <div
        className={cn(
          "mt-2 flex flex-col gap-2",
          "group-data-[variant=outline]/tool-group-root:mt-3 group-data-[variant=outline]/tool-group-root:border-t group-data-[variant=outline]/tool-group-root:px-4 group-data-[variant=outline]/tool-group-root:pt-3",
          "group-data-[variant=muted]/tool-group-root:mt-3 group-data-[variant=muted]/tool-group-root:border-t group-data-[variant=muted]/tool-group-root:px-4 group-data-[variant=muted]/tool-group-root:pt-3",
        )}
      >
        {children}
      </div>
    </CollapsibleContent>
  );
}

type ToolGroupComponent = FC<
  PropsWithChildren<{ startIndex: number; endIndex: number }>
> & {
  Root: typeof ToolGroupRoot;
  Trigger: typeof ToolGroupTrigger;
  Content: typeof ToolGroupContent;
};

const ToolGroupImpl: FC<
  PropsWithChildren<{ startIndex: number; endIndex: number }>
> = ({ children, startIndex, endIndex }) => {
  const toolCount = endIndex - startIndex + 1;

  // Single tool call — render directly without wrapper
  if (toolCount <= 1) {
    return <>{children}</>;
  }

  return (
    <ToolGroupRoot>
      <ToolGroupTrigger count={toolCount} />
      <ToolGroupContent>{children}</ToolGroupContent>
    </ToolGroupRoot>
  );
};

const ToolGroup = memo(ToolGroupImpl) as unknown as ToolGroupComponent;

ToolGroup.displayName = "ToolGroup";
ToolGroup.Root = ToolGroupRoot;
ToolGroup.Trigger = ToolGroupTrigger;
ToolGroup.Content = ToolGroupContent;

export {
  ToolGroup,
  ToolGroupRoot,
  ToolGroupTrigger,
  ToolGroupContent,
  toolGroupVariants,
};


================================================
FILE: studio/frontend/src/components/assistant-ui/tool-ui-python.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import { copyToClipboard } from "@/lib/copy-to-clipboard";
import type { ToolCallMessagePartComponent } from "@assistant-ui/react";
import { code as codePlugin } from "@streamdown/code";
import { CheckIcon, CodeIcon, CopyIcon, LoaderIcon } from "lucide-react";
import { memo, useCallback, useMemo, useRef, useState } from "react";
import { Streamdown } from "streamdown";
import {
  ToolFallbackContent,
  ToolFallbackRoot,
  ToolFallbackTrigger,
} from "./tool-fallback";

const MAX_DISPLAY = 10_000;
const COPY_RESET_MS = 2000;
const SHIKI_THEME = ["github-light", "github-dark"] as ["github-light", "github-dark"];

function truncate(text: string): string {
  return text.length <= MAX_DISPLAY
    ? text
    : `${text.slice(0, MAX_DISPLAY)}\n... (truncated)`;
}

function CopyBtn({ text }: { text: string }) {
  const [copied, setCopied] = useState(false);
  const timer = useRef<ReturnType<typeof setTimeout> | null>(null);
  const copy = useCallback(() => {
    if (copyToClipboard(text)) {
      setCopied(true);
      if (timer.current) {
        clearTimeout(timer.current);
      }
      timer.current = setTimeout(() => setCopied(false), COPY_RESET_MS);
    }
  }, [text]);

  return (
    <button
      type="button"
      onClick={copy}
      className="inline-flex items-center gap-1 rounded px-1.5 py-0.5 text-xs text-muted-foreground transition-colors hover:bg-muted hover:text-foreground"
      aria-label="Copy to clipboard"
    >
      {copied ? (
        <CheckIcon className="size-3" />
      ) : (
        <CopyIcon className="size-3" />
      )}
      {copied ? "Copied" : "Copy"}
    </button>
  );
}

/** Render code with syntax highlighting via Streamdown + shiki. No extra borders — inherits parent container. */
function HighlightedCode({ code: source, language }: { code: string; language: string }) {
  const markdown = useMemo(
    () => `\`\`\`${language}\n${truncate(source)}\n\`\`\``,
    [source, language],
  );
  return (
    <div className="max-h-48 overflow-auto text-xs [&_pre]:!m-0 [&_pre]:!bg-transparent [&_pre]:!p-0 [&_pre]:!text-xs [&_[data-streamdown=code-block]]:!my-0 [&_[data-streamdown=code-block]]:!p-0 [&_[data-streamdown=code-block]]:!border-0">
      <Streamdown
        mode="static"
        plugins={{ code: codePlugin }}
        controls={{ code: false }}
        shikiTheme={SHIKI_THEME}
      >
        {markdown}
      </Streamdown>
    </div>
  );
}

const PythonToolUIImpl: ToolCallMessagePartComponent = ({
  args,
  result,
  status,
}) => {
  const code = (args as { code?: string })?.code ?? "";
  const firstLine = code.split("\n")[0]?.slice(0, 60) ?? "";
  const isRunning = status?.type === "running";
  const output =
    typeof result === "string"
      ? result
      : result
        ? JSON.stringify(result, null, 2)
        : "";

  return (
    <ToolFallbackRoot>
      <ToolFallbackTrigger
        toolName={firstLine ? `Python: ${firstLine}` : "Python"}
        status={status}
        icon={CodeIcon}
      />
      <ToolFallbackContent>
        <div className="flex flex-col px-4">
          {/* Code + copy */}
          {code && (
            <div className="flex justify-end">
              <CopyBtn text={code} />
            </div>
          )}
          <HighlightedCode code={code} language="python" />

          {/* Output */}
          {isRunning ? (
            <div className="mt-2 flex items-center gap-2 text-sm text-muted-foreground">
              <LoaderIcon className="size-3.5 animate-spin" />
              <span>Running&hellip;</span>
            </div>
          ) : output ? (
            <div className="mt-2 border-t border-dashed pt-2">
              <div className="flex items-center justify-between">
                <span className="text-xs font-medium text-muted-foreground">output</span>
                <CopyBtn text={output} />
              </div>
              <pre className="mt-1 max-h-60 overflow-auto whitespace-pre-wrap break-words font-mono text-xs">
                {truncate(output)}
              </pre>
            </div>
          ) : null}
        </div>
      </ToolFallbackContent>
    </ToolFallbackRoot>
  );
};

export const PythonToolUI = memo(
  PythonToolUIImpl,
) as unknown as ToolCallMessagePartComponent;
PythonToolUI.displayName = "PythonToolUI";


================================================
FILE: studio/frontend/src/components/assistant-ui/tool-ui-terminal.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import { copyToClipboard } from "@/lib/copy-to-clipboard";
import type { ToolCallMessagePartComponent } from "@assistant-ui/react";
import { CheckIcon, CopyIcon, LoaderIcon, TerminalIcon } from "lucide-react";
import { memo, useCallback, useRef, useState } from "react";
import {
  ToolFallbackContent,
  ToolFallbackRoot,
  ToolFallbackTrigger,
} from "./tool-fallback";

const MAX_DISPLAY = 10_000;
const COPY_RESET_MS = 2000;

function truncate(text: string): string {
  return text.length <= MAX_DISPLAY
    ? text
    : `${text.slice(0, MAX_DISPLAY)}\n... (truncated)`;
}

function CopyBtn({ text }: { text: string }) {
  const [copied, setCopied] = useState(false);
  const timer = useRef<ReturnType<typeof setTimeout> | null>(null);
  const copy = useCallback(() => {
    if (copyToClipboard(text)) {
      setCopied(true);
      if (timer.current) {
        clearTimeout(timer.current);
      }
      timer.current = setTimeout(() => setCopied(false), COPY_RESET_MS);
    }
  }, [text]);

  return (
    <button
      type="button"
      onClick={copy}
      className="inline-flex items-center gap-1 rounded px-1.5 py-0.5 text-xs text-muted-foreground transition-colors hover:bg-muted hover:text-foreground"
      aria-label="Copy to clipboard"
    >
      {copied ? (
        <CheckIcon className="size-3" />
      ) : (
        <CopyIcon className="size-3" />
      )}
      {copied ? "Copied" : "Copy"}
    </button>
  );
}

const TerminalToolUIImpl: ToolCallMessagePartComponent = ({
  args,
  result,
  status,
}) => {
  const command = (args as { command?: string })?.command ?? "";
  const isRunning = status?.type === "running";
  const output =
    typeof result === "string"
      ? result
      : result
        ? JSON.stringify(result, null, 2)
        : "";

  return (
    <ToolFallbackRoot>
      <ToolFallbackTrigger
        toolName={command ? `$ ${command.slice(0, 60)}` : "Terminal"}
        status={status}
        icon={TerminalIcon}
      />
      <ToolFallbackContent>
        <div className="flex flex-col px-4">
          {isRunning ? (
            <div className="flex items-center gap-2 text-sm text-muted-foreground">
              <LoaderIcon className="size-3.5 animate-spin" />
              <span>Running&hellip;</span>
            </div>
          ) : output ? (
            <div>
              <div className="flex items-center justify-between">
                <span className="text-xs font-medium text-muted-foreground">output</span>
                <CopyBtn text={output} />
              </div>
              <pre className="mt-1 max-h-60 overflow-auto whitespace-pre-wrap break-words font-mono text-xs">
                {truncate(output)}
              </pre>
            </div>
          ) : null}
        </div>
      </ToolFallbackContent>
    </ToolFallbackRoot>
  );
};

export const TerminalToolUI = memo(
  TerminalToolUIImpl,
) as unknown as ToolCallMessagePartComponent;
TerminalToolUI.displayName = "TerminalToolUI";


================================================
FILE: studio/frontend/src/components/assistant-ui/tool-ui-web-search.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import { type ToolCallMessagePartComponent, useAuiState } from "@assistant-ui/react";
import { GlobeIcon, LoaderIcon } from "lucide-react";
import { memo, useEffect, useState } from "react";
import { Source, SourceIcon, SourceTitle } from "./sources";
import {
  ToolFallbackContent,
  ToolFallbackRoot,
  ToolFallbackTrigger,
} from "./tool-fallback";

interface ParsedSource {
  title: string;
  url: string;
  snippet: string;
}

const RE_BLOCK_SEP = /\n---\n/;
const RE_TITLE = /Title:\s*(.+)/;
const RE_URL = /URL:\s*(.+)/;
const RE_SNIPPET = /Snippet:\s*(.+)/s;

/** Parse the backend's "Title: ...\nURL: ...\nSnippet: ...\n---" format into structured sources. */
function parseSearchResults(raw: string): ParsedSource[] {
  if (!raw) {
    return [];
  }
  const blocks = raw.split(RE_BLOCK_SEP).filter(Boolean);
  const sources: ParsedSource[] = [];
  for (const block of blocks) {
    const titleMatch = block.match(RE_TITLE);
    const urlMatch = block.match(RE_URL);
    const snippetMatch = block.match(RE_SNIPPET);
    if (titleMatch && urlMatch) {
      sources.push({
        title: titleMatch[1].trim(),
        url: urlMatch[1].trim(),
        snippet: snippetMatch?.[1]?.trim() ?? "",
      });
    }
  }
  return sources;
}

const WebSearchToolUIImpl: ToolCallMessagePartComponent = ({
  args,
  result,
  status,
}) => {
  const query = (args as { query?: string })?.query ?? "";
  const isRunning = status?.type === "running";
  const sources = result
    ? parseSearchResults(
        typeof result === "string" ? result : JSON.stringify(result),
      )
    : [];

  // Collapse when LLM starts generating text after the tool call
  const hasText = useAuiState(({ message }) =>
    message.content.some((p) => p.type === "text" && "text" in p && (p as { text: string }).text.length > 0),
  );
  const [open, setOpen] = useState(isRunning);
  useEffect(() => {
    if (isRunning) {
      setOpen(true);
    } else if (hasText) {
      setOpen(false);
    }
  }, [isRunning, hasText]);

  return (
    <ToolFallbackRoot open={open} onOpenChange={setOpen}>
      <ToolFallbackTrigger
        toolName={query ? `Searched "${query}"` : "Web Search"}
        status={status}
        icon={GlobeIcon}
      />
      <ToolFallbackContent>
        {isRunning ? (
          <div className="flex items-center gap-2 px-4 text-sm text-muted-foreground">
            <LoaderIcon className="size-3.5 animate-spin" />
            <span>Searching for &ldquo;{query}&rdquo;&hellip;</span>
          </div>
        ) : sources.length > 0 ? (
          <div className="flex flex-col gap-1.5 px-4">
            {sources.map((source) => (
              <Source
                key={source.url}
                href={source.url}
                variant="outline"
                size="default"
                className="flex w-full max-w-full items-center gap-2 py-1.5"
              >
                <SourceIcon url={source.url} className="size-3.5" />
                <SourceTitle className="max-w-none flex-1 truncate">
                  {source.title}
                </SourceTitle>
              </Source>
            ))}
          </div>
        ) : result ? (
          <div className="px-4">
            <pre className="max-h-40 overflow-auto whitespace-pre-wrap break-words rounded bg-muted/50 p-2 text-xs">
              {typeof result === "string"
                ? result
                : JSON.stringify(result, null, 2)}
            </pre>
          </div>
        ) : null}
      </ToolFallbackContent>
    </ToolFallbackRoot>
  );
};

export const WebSearchToolUI = memo(
  WebSearchToolUIImpl,
) as unknown as ToolCallMessagePartComponent;
WebSearchToolUI.displayName = "WebSearchToolUI";


================================================
FILE: studio/frontend/src/components/assistant-ui/tooltip-icon-button.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import { Slottable } from "@radix-ui/react-slot";
import { type ComponentPropsWithRef, forwardRef } from "react";

import { Button } from "@/components/ui/button";
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import { cn } from "@/lib/utils";

export type TooltipIconButtonProps = ComponentPropsWithRef<typeof Button> & {
  tooltip: string;
  side?: "top" | "bottom" | "left" | "right";
};

export const TooltipIconButton = forwardRef<
  HTMLButtonElement,
  TooltipIconButtonProps
>(({ children, tooltip, side = "bottom", className, ...rest }, ref) => {
  return (
    <Tooltip>
      <TooltipTrigger asChild={true}>
        <Button
          variant="ghost"
          size="icon"
          {...rest}
          className={cn("aui-button-icon size-6 p-1", className)}
          ref={ref}
        >
          <Slottable>{children}</Slottable>
          <span className="aui-sr-only sr-only">{tooltip}</span>
        </Button>
      </TooltipTrigger>
      <TooltipContent side={side}>{tooltip}</TooltipContent>
    </Tooltip>
  );
});

TooltipIconButton.displayName = "TooltipIconButton";


================================================
FILE: studio/frontend/src/components/example.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { cn } from "@/lib/utils";
import type { ComponentProps } from "react";

function ExampleWrapper({ className, ...props }: ComponentProps<"div">) {
  return (
    <div className="bg-background w-full">
      <div
        data-slot="example-wrapper"
        className={cn(
          "mx-auto grid min-h-screen w-full max-w-5xl min-w-0 content-center items-start gap-8 p-4 pt-2 sm:gap-12 sm:p-6 md:grid-cols-2 md:gap-8 lg:p-12 2xl:max-w-6xl",
          className,
        )}
        {...props}
      />
    </div>
  );
}

function Example({
  title,
  children,
  className,
  containerClassName,
  ...props
}: ComponentProps<"div"> & {
  title?: string;
  containerClassName?: string;
}) {
  return (
    <div
      data-slot="example"
      className={cn(
        "mx-auto flex w-full max-w-lg min-w-0 flex-col gap-1 self-stretch lg:max-w-none",
        containerClassName,
      )}
      {...props}
    >
      {title && (
        <div className="text-muted-foreground px-1.5 py-2 text-xs font-medium">
          {title}
        </div>
      )}
      <div
        data-slot="example-content"
        className={cn(
          "bg-background text-foreground flex min-w-0 flex-1 flex-col items-start gap-6 border border-dashed p-4 sm:p-6 *:[div:not([class*='w-'])]:w-full",
          className,
        )}
      >
        {children}
      </div>
    </div>
  );
}

export { ExampleWrapper, Example };


================================================
FILE: studio/frontend/src/components/layout/dashboard-grid.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type * as React from "react";

import { cn } from "@/lib/utils";

const colsVariants = {
  3: "lg:grid-cols-3",
  4: "lg:grid-cols-4",
} as const;

function DashboardGrid({
  className,
  cols = 3,
  ...props
}: React.ComponentProps<"div"> & { cols?: 3 | 4 }) {
  return (
    <div
      data-slot="dashboard-grid"
      className={cn(
        "grid grid-cols-1 gap-6 md:grid-cols-2",
        colsVariants[cols],
        className,
      )}
      {...props}
    />
  );
}

export { DashboardGrid };


================================================
FILE: studio/frontend/src/components/layout/dashboard-layout.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type * as React from "react";

import { cn } from "@/lib/utils";

function DashboardLayout({
  className,
  children,
  ...props
}: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="dashboard-layout"
      className={cn(
        "min-h-screen w-full bg-background",
        "flex justify-center",
        className,
      )}
      {...props}
    >
      <div className="w-full max-w-7xl px-6 py-8 lg:px-8">{children}</div>
    </div>
  );
}

export { DashboardLayout };


================================================
FILE: studio/frontend/src/components/layout/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export { DashboardLayout } from "./dashboard-layout";
export { DashboardGrid } from "./dashboard-grid";


================================================
FILE: studio/frontend/src/components/markdown/markdown-preview.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { cn } from "@/lib/utils";
import { code } from "@streamdown/code";
import { math } from "@streamdown/math";
import { mermaid } from "@streamdown/mermaid";
import { memo, type ReactElement } from "react";
import { Streamdown } from "streamdown";
import "katex/dist/katex.min.css";

const MARKDOWN_PLUGINS = { code, math, mermaid } as const;

type MarkdownPreviewProps = {
  markdown: string;
  className?: string;
  plain?: boolean;
};

function MarkdownPreviewImpl({
  markdown,
  className,
  plain = false,
}: MarkdownPreviewProps): ReactElement {
  const markdownClassName =
    "w-full max-w-none min-w-0 space-y-2 [overflow-wrap:anywhere] [&_*]:max-w-none [&_p]:w-full [&_ul]:w-full [&_ol]:w-full [&_li]:w-full [&_h1]:w-full [&_h2]:w-full [&_h3]:w-full [&_h4]:w-full [&_h5]:w-full [&_h6]:w-full [&_pre]:w-full [&_table]:w-full [&_p]:break-words [&_li]:break-words [&_code]:break-words [&_pre]:whitespace-pre-wrap [&_pre]:break-words";

  return (
    <div
      className={cn(
        plain
          ? "h-full w-full min-w-0 overflow-auto p-2 text-xs leading-relaxed pointer-events-none select-none"
          : "nodrag max-h-56 w-full min-w-0 overflow-auto rounded-md border border-border/60 bg-muted/20 p-2 text-xs leading-relaxed",
        className,
      )}
    >
      <Streamdown
        mode="static"
        plugins={MARKDOWN_PLUGINS}
        controls={false}
        className={markdownClassName}
      >
        {markdown.trim() ? markdown : "_Empty note_"}
      </Streamdown>
    </div>
  );
}

export const MarkdownPreview = memo(MarkdownPreviewImpl);


================================================
FILE: studio/frontend/src/components/markdown/mermaid-error.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { MermaidErrorComponentProps } from "streamdown";

function hasSlashComment(chart: string): boolean {
  return /(^|[^:])\/\/.*/m.test(chart);
}

export function MermaidError({
  error,
  chart,
  retry,
}: MermaidErrorComponentProps) {
  return (
    <div className="my-4 rounded-lg border border-red-300 bg-red-50 p-3 text-red-800">
      <p className="text-sm font-semibold">Mermaid render failed</p>
      <p className="mt-1 break-words font-mono text-xs">{error}</p>
      {hasSlashComment(chart) ? (
        <p className="mt-1 text-xs">Hint: Mermaid comments use `%%`, not `//`.</p>
      ) : null}
      <button
        type="button"
        onClick={retry}
        className="mt-2 rounded border border-red-300 px-2 py-1 text-xs hover:bg-red-100"
      >
        Retry
      </button>
    </div>
  );
}


================================================
FILE: studio/frontend/src/components/navbar.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  HoverCard,
  HoverCardContent,
  HoverCardTrigger,
} from "@/components/ui/hover-card";
import { AnimatedThemeToggler } from "@/components/ui/animated-theme-toggler";
import {
  Sheet,
  SheetContent,
  SheetHeader,
  SheetTitle,
  SheetTrigger,
} from "@/components/ui/sheet";
import { cn } from "@/lib/utils";
import {
  ArrowRight01Icon,
  Book03Icon,
  BubbleChatIcon,
  ChefHatIcon,
  CursorInfo02Icon,
  PackageIcon,
  ZapIcon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { useTrainingRuntimeStore } from "@/features/training";
import { usePlatformStore } from "@/config/env";
import { Link, useRouterState } from "@tanstack/react-router";
import { motion } from "motion/react";
import { useState } from "react";
import { TOUR_OPEN_EVENT } from "@/features/tour";

const NAV_ITEMS = [
  { label: "Studio", href: "/studio", icon: ZapIcon, enabled: true },
  { label: "Recipes", href: "/data-recipes", icon: ChefHatIcon, enabled: true },
  { label: "Export", href: "/export", icon: PackageIcon, enabled: true },
  { label: "Chat", href: "/chat", icon: BubbleChatIcon, enabled: true },
];

function getTourId(pathname: string): "studio" | "chat" | "export" | null {
  if (pathname === "/studio") return "studio";
  if (pathname === "/chat") return "chat";
  if (pathname === "/export") return "export";
  return null;
}

export function Navbar() {
  const pathname = useRouterState({ select: (s) => s.location.pathname });
  const isTrainingRunning = useTrainingRuntimeStore((s) => s.isTrainingRunning);
  const [mobileOpen, setMobileOpen] = useState(false);

  const chatOnly = usePlatformStore((s) => s.isChatOnly());

  const tourId = getTourId(pathname);

  const openTour = () => {
    if (!tourId) return;
    window.dispatchEvent(
      new CustomEvent(TOUR_OPEN_EVENT, { detail: { id: tourId } }),
    );
  };

  return (
    <header className="relative top-0 z-40 h-16 w-full">
      <div className="mx-auto grid h-full max-w-7xl grid-cols-[1fr_auto_1fr] items-center px-4 sm:px-6">
        {/* Left: logo */}
        <Link to={chatOnly ? "/chat" : "/studio"} className="flex items-center gap-1.5 justify-self-start select-none">
          <img
            src="/blacklogo.png"
            alt="Unsloth"
            className="h-9 w-auto dark:hidden"
          />
          <img
            src="/whitelogo.png"
            alt="Unsloth"
            className="hidden h-9 w-auto dark:block"
          />
          <span className="relative -top-[1px] inline-flex items-center text-[10px] font-extrabold leading-none tracking-[0.12em] text-primary">
            BETA
          </span>
        </Link>

        {/* Center: pill nav */}
        <nav
          data-tour="navbar"
          className="hidden items-center rounded-full border border-border bg-card p-1 ring-1 ring-foreground/5 md:flex"
        >
          {NAV_ITEMS.map((item) => {
            const active =
              pathname === item.href || pathname.startsWith(`${item.href}/`);
            const disabledByTraining =
              isTrainingRunning && item.href !== "/studio";
            const disabledByDevice =
              chatOnly && item.href !== "/chat" && item.href !== "/data-recipes";
            if (!item.enabled || disabledByTraining || disabledByDevice) {
              return (
                <span
                  key={item.href}
                  className="relative rounded-full px-3 py-1.5 text-sm font-medium text-muted-foreground/40 cursor-not-allowed"
                >
                  {item.label}
                </span>
              );
            }
            return (
              <Link
                key={item.href}
                to={item.href}
                className={cn(
                  "relative rounded-full px-3 py-1.5 text-sm font-medium transition-colors",
                  active
                    ? "text-background"
                    : "text-muted-foreground hover:text-foreground",
                )}
              >
                {active && (
                  <motion.span
                    layoutId="nav-pill"
                    className="absolute inset-0 rounded-full bg-foreground"
                    transition={{
                      type: "spring",
                      stiffness: 500,
                      damping: 35,
                      mass: 0.5,
                    }}
                  />
                )}
                <span className="relative z-10 flex items-center">
                  <motion.span
                    initial={false}
                    animate={{
                      width: active ? 14 : 0,
                      marginLeft: active ? -4 : 0,
                      marginRight: active ? 4 : 0,
                      opacity: active ? 1 : 0,
                    }}
                    transition={{ duration: 0.2, ease: [0.165, 0.84, 0.44, 1] }}
                    className="inline-flex shrink-0 items-center justify-center overflow-hidden"
                  >
                    <HugeiconsIcon
                      icon={item.icon}
                      className="size-3.5 -mt-px shrink-0"
                    />
                  </motion.span>
                  {item.label}
                </span>
              </Link>
            );
          })}
        </nav>

        {/* Right: docs/tour desktop */}
        <div className="hidden items-center justify-self-end gap-2 md:flex">
          <AnimatedThemeToggler
            className="flex h-9 w-9 items-center justify-center rounded-md text-muted-foreground transition-colors hover:bg-accent hover:text-foreground [&_svg]:size-4"
            title="Toggle theme"
            aria-label="Toggle theme"
          />
          <HoverCard openDelay={200} closeDelay={100}>
            <HoverCardTrigger asChild={true}>
              <a
                href="https://unsloth.ai/docs"
                target="_blank"
                rel="noopener noreferrer"
                className="flex items-center gap-1.5 text-sm font-medium text-emerald-600 hover:text-emerald-700 transition-colors"
              >
                <HugeiconsIcon icon={Book03Icon} className="size-4" />
                Learn more
              </a>
            </HoverCardTrigger>
            <HoverCardContent align="end" className="w-80 p-0">
              <a
                href="https://unsloth.ai/docs"
                target="_blank"
                rel="noopener noreferrer"
                className="group/card flex flex-col gap-1 p-4 no-underline"
              >
                <p className="text-sm font-semibold font-heading">
                  Unsloth Documentation
                </p>
                <p className="text-xs text-muted-foreground leading-relaxed">
                  Guides on fine-tuning LLMs 2x faster with 70% less memory.
                  Covers LoRA, QLoRA, data formatting, and deployment.
                </p>
                <span className="mt-1 flex items-center gap-1 text-xs font-medium text-emerald-600 group-hover/card:underline">
                  Visit docs
                  <HugeiconsIcon icon={ArrowRight01Icon} className="size-3" />
                </span>
              </a>
            </HoverCardContent>
          </HoverCard>

          <button
            type="button"
            onClick={tourId ? openTour : undefined}
            className={cn(
              "flex h-9 items-center gap-1.5 rounded-md px-3 text-muted-foreground transition-colors hover:bg-accent hover:text-foreground",
              !tourId && "invisible pointer-events-none",
            )}
            title="Tour"
            aria-hidden={!tourId}
            tabIndex={tourId ? 0 : -1}
          >
            <HugeiconsIcon icon={CursorInfo02Icon} className="size-4" />
            <span className="text-sm font-medium">Tour</span>
          </button>
        </div>

        {/* Right: mobile */}
        <div className="col-start-3 flex items-center gap-2 justify-self-end md:hidden">
          {tourId ? (
            <button
              type="button"
              onClick={openTour}
              className="flex h-9 w-9 items-center justify-center rounded-md text-muted-foreground transition-colors hover:bg-accent hover:text-foreground"
              title="Tour"
            >
              <HugeiconsIcon icon={CursorInfo02Icon} className="size-4" />
            </button>
          ) : null}
          <Sheet open={mobileOpen} onOpenChange={setMobileOpen}>
            <SheetTrigger asChild={true}>
              <button
                type="button"
                className="rounded-md border border-border px-3 py-1.5 text-sm font-medium text-foreground"
                aria-label="Open navigation menu"
              >
                Menu
              </button>
            </SheetTrigger>
            <SheetContent side="right" className="w-[300px] p-4">
              <SheetHeader>
                <SheetTitle>Navigate</SheetTitle>
              </SheetHeader>
              <div className="mt-6 flex flex-col gap-2">
                {NAV_ITEMS.filter((item) => item.enabled).map((item) => {
                  const active = pathname === item.href;
                  const disabledByTraining =
                    isTrainingRunning && item.href !== "/studio";
                  const disabledByDevice =
                    chatOnly && item.href !== "/chat" && item.href !== "/data-recipes";
                  if (disabledByTraining || disabledByDevice) {
                    return (
                      <span
                        key={item.href}
                        className="flex items-center gap-2 rounded-md border border-border px-3 py-2 text-sm font-medium text-muted-foreground/40 cursor-not-allowed"
                      >
                        <HugeiconsIcon icon={item.icon} className="size-4" />
                        {item.label}
                      </span>
                    );
                  }
                  return (
                    <Link
                      key={item.href}
                      to={item.href}
                      onClick={() => setMobileOpen(false)}
                      className={cn(
                        "flex items-center gap-2 rounded-md border px-3 py-2 text-sm font-medium",
                        active
                          ? "border-foreground bg-foreground text-background"
                          : "border-border text-foreground hover:bg-accent",
                      )}
                    >
                      <HugeiconsIcon icon={item.icon} className="size-4" />
                      {item.label}
                    </Link>
                  );
                })}
                <a
                  href="https://unsloth.ai/docs"
                  target="_blank"
                  rel="noopener noreferrer"
                  className="mt-2 flex items-center gap-2 rounded-md border border-border px-3 py-2 text-sm font-medium text-foreground hover:bg-accent"
                  onClick={() => setMobileOpen(false)}
                >
                  <HugeiconsIcon icon={Book03Icon} className="size-4" />
                  Learn more (Docs)
                </a>
                {tourId ? (
                  <button
                    type="button"
                    className="flex items-center gap-2 rounded-md border border-border px-3 py-2 text-left text-sm font-medium text-foreground hover:bg-accent"
                    onClick={() => {
                      openTour();
                      setMobileOpen(false);
                    }}
                  >
                    <HugeiconsIcon icon={CursorInfo02Icon} className="size-4" />
                    Start tour
                  </button>
                ) : null}
                <div className="mt-2 flex items-center justify-between rounded-md border border-border px-3 py-2">
                  <span className="text-sm font-medium text-foreground">Theme</span>
                  <AnimatedThemeToggler
                    className="flex h-8 w-8 items-center justify-center rounded-md text-muted-foreground transition-colors hover:bg-accent hover:text-foreground [&_svg]:size-4"
                    title="Toggle theme"
                    aria-label="Toggle theme"
                  />
                </div>
              </div>
            </SheetContent>
          </Sheet>
        </div>
      </div>
    </header>
  );
}


================================================
FILE: studio/frontend/src/components/section-card.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { cn } from "@/lib/utils";
import type { ReactNode } from "react";

interface SectionCardProps {
  icon: ReactNode;
  title: string;
  description: string;
  accent?: "emerald" | "indigo" | "orange" | "blue";
  featured?: boolean;
  className?: string;
  badge?: string;
  headerAction?: ReactNode;
  children: ReactNode;
}

const accentStyles = {
  emerald: {
    border: "ring-emerald-500/20",
    iconBox:
      "ring-emerald-200 bg-emerald-50 text-emerald-600 dark:ring-emerald-800 dark:bg-emerald-950 dark:text-emerald-400",
  },
  indigo: {
    border: "ring-indigo-500/20",
    iconBox:
      "ring-indigo-200 bg-indigo-50 text-indigo-600 dark:ring-indigo-800 dark:bg-indigo-950 dark:text-indigo-400",
  },
  orange: {
    border: "ring-orange-500/20",
    iconBox:
      "ring-orange-200 bg-orange-50 text-orange-600 dark:ring-orange-800 dark:bg-orange-950 dark:text-orange-400",
  },
  blue: {
    border: "ring-blue-500/20",
    iconBox:
      "ring-blue-200 bg-blue-50 text-blue-600 dark:ring-blue-800 dark:bg-blue-950 dark:text-blue-400",
  },
};

export function SectionCard({
  icon,
  title,
  description,
  accent = "emerald",
  featured,
  className,
  badge,
  headerAction,
  children,
}: SectionCardProps) {
  const styles = accentStyles[accent];

  return (
    <div
      className={cn(
        "bg-card corner-squircle rounded-3xl ring-1 ring-foreground/10 flex flex-col gap-5 p-5 relative overflow-clip transition-all duration-300 ease-in-out",
        featured && styles.border,
        className,
      )}
    >
      {featured && (
        <div className="pointer-events-none absolute inset-x-0 top-0 h-24 bg-gradient-to-b from-emerald-500/[0.04] to-transparent" />
      )}
      {/* Header */}
      <div className="flex items-center gap-3">
        <div
          className={cn(
            "rounded-xl corner-squircle p-2 ring-1 shrink-0",
            styles.iconBox,
          )}
        >
          {icon}
        </div>
        <div className="min-w-0 flex-1">
          <div className="flex items-center gap-2 pb-1">
            <h3 className="text-sm font-semibold">{title}</h3>
            {badge && (
              <span className="rounded-full bg-emerald-100 px-2 py-0.5 text-[10px] font-semibold text-emerald-700 dark:bg-emerald-900 dark:text-emerald-300">
                {badge}
              </span>
            )}
          </div>
          <p className="text-xs text-muted-foreground">{description}</p>
        </div>
        {headerAction && <div className="shrink-0">{headerAction}</div>}
      </div>
      {/* Content */}
      {children}
    </div>
  );
}


================================================
FILE: studio/frontend/src/components/ui/accordion.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import { Accordion as AccordionPrimitive } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";
import { ArrowDown01Icon, ArrowUp01Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";

function Accordion({
  className,
  ...props
}: React.ComponentProps<typeof AccordionPrimitive.Root>) {
  return (
    <AccordionPrimitive.Root
      data-slot="accordion"
      className={cn(
        "overflow-hidden rounded-2xl border flex w-full flex-col",
        className,
      )}
      {...props}
    />
  );
}

function AccordionItem({
  className,
  ...props
}: React.ComponentProps<typeof AccordionPrimitive.Item>) {
  return (
    <AccordionPrimitive.Item
      data-slot="accordion-item"
      className={cn("data-open:bg-muted/50 not-last:border-b", className)}
      {...props}
    />
  );
}

function AccordionTrigger({
  className,
  children,
  ...props
}: React.ComponentProps<typeof AccordionPrimitive.Trigger>) {
  return (
    <AccordionPrimitive.Header className="flex">
      <AccordionPrimitive.Trigger
        data-slot="accordion-trigger"
        className={cn(
          "**:data-[slot=accordion-trigger-icon]:text-muted-foreground gap-6 p-4 text-left text-sm font-medium hover:underline **:data-[slot=accordion-trigger-icon]:ml-auto **:data-[slot=accordion-trigger-icon]:size-4 group/accordion-trigger relative flex flex-1 items-start justify-between border border-transparent transition-all outline-none disabled:pointer-events-none disabled:opacity-50",
          className,
        )}
        {...props}
      >
        {children}
        <HugeiconsIcon
          icon={ArrowDown01Icon}
          strokeWidth={2}
          data-slot="accordion-trigger-icon"
          className="pointer-events-none shrink-0 group-aria-expanded/accordion-trigger:hidden"
        />
        <HugeiconsIcon
          icon={ArrowUp01Icon}
          strokeWidth={2}
          data-slot="accordion-trigger-icon"
          className="pointer-events-none hidden shrink-0 group-aria-expanded/accordion-trigger:inline"
        />
      </AccordionPrimitive.Trigger>
    </AccordionPrimitive.Header>
  );
}

function AccordionContent({
  className,
  children,
  ...props
}: React.ComponentProps<typeof AccordionPrimitive.Content>) {
  return (
    <AccordionPrimitive.Content
      data-slot="accordion-content"
      className="data-open:animate-accordion-down data-closed:animate-accordion-up px-4 text-sm overflow-hidden"
      {...props}
    >
      <div
        className={cn(
          "pt-0 pb-4 [&_a]:hover:text-foreground h-(--radix-accordion-content-height) [&_a]:underline [&_a]:underline-offset-3 [&_p:not(:last-child)]:mb-4",
          className,
        )}
      >
        {children}
      </div>
    </AccordionPrimitive.Content>
  );
}

export { Accordion, AccordionItem, AccordionTrigger, AccordionContent };


================================================
FILE: studio/frontend/src/components/ui/alert-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { AlertDialog as AlertDialogPrimitive } from "radix-ui";
import type * as React from "react";

import { Button } from "@/components/ui/button";
import { cn } from "@/lib/utils";

function AlertDialog({
  ...props
}: React.ComponentProps<typeof AlertDialogPrimitive.Root>) {
  return <AlertDialogPrimitive.Root data-slot="alert-dialog" {...props} />;
}

function AlertDialogTrigger({
  ...props
}: React.ComponentProps<typeof AlertDialogPrimitive.Trigger>) {
  return (
    <AlertDialogPrimitive.Trigger data-slot="alert-dialog-trigger" {...props} />
  );
}

function AlertDialogPortal({
  ...props
}: React.ComponentProps<typeof AlertDialogPrimitive.Portal>) {
  return (
    <AlertDialogPrimitive.Portal data-slot="alert-dialog-portal" {...props} />
  );
}

function AlertDialogOverlay({
  className,
  ...props
}: React.ComponentProps<typeof AlertDialogPrimitive.Overlay>) {
  return (
    <AlertDialogPrimitive.Overlay
      data-slot="alert-dialog-overlay"
      className={cn(
        "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 bg-black/80 duration-100 supports-backdrop-filter:backdrop-blur-xs fixed inset-0 z-50",
        className,
      )}
      {...props}
    />
  );
}

function AlertDialogContent({
  className,
  size = "default",
  overlayClassName,
  ...props
}: React.ComponentProps<typeof AlertDialogPrimitive.Content> & {
  size?: "default" | "sm";
  overlayClassName?: string;
}) {
  return (
    <AlertDialogPortal>
      <AlertDialogOverlay className={overlayClassName} />
      <AlertDialogPrimitive.Content
        data-slot="alert-dialog-content"
        data-size={size}
        className={cn(
          "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 bg-background ring-foreground/5 gap-6 rounded-4xl p-6 ring-1 duration-100 data-[size=default]:max-w-xs data-[size=sm]:max-w-xs data-[size=default]:sm:max-w-md group/alert-dialog-content fixed top-1/2 left-1/2 z-50 grid w-full -translate-x-1/2 -translate-y-1/2 outline-none",
          className,
        )}
        {...props}
      />
    </AlertDialogPortal>
  );
}

function AlertDialogHeader({
  className,
  ...props
}: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="alert-dialog-header"
      className={cn(
        "grid grid-rows-[auto_1fr] place-items-center gap-1.5 text-center has-data-[slot=alert-dialog-media]:grid-rows-[auto_auto_1fr] has-data-[slot=alert-dialog-media]:gap-x-6 sm:group-data-[size=default]/alert-dialog-content:place-items-start sm:group-data-[size=default]/alert-dialog-content:text-left sm:group-data-[size=default]/alert-dialog-content:has-data-[slot=alert-dialog-media]:grid-rows-[auto_1fr]",
        className,
      )}
      {...props}
    />
  );
}

function AlertDialogFooter({
  className,
  ...props
}: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="alert-dialog-footer"
      className={cn(
        "flex flex-col-reverse gap-2 group-data-[size=sm]/alert-dialog-content:grid group-data-[size=sm]/alert-dialog-content:grid-cols-2 sm:flex-row sm:justify-end",
        className,
      )}
      {...props}
    />
  );
}

function AlertDialogMedia({
  className,
  ...props
}: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="alert-dialog-media"
      className={cn(
        "bg-muted mb-2 inline-flex size-16 items-center justify-center rounded-full sm:group-data-[size=default]/alert-dialog-content:row-span-2 *:[svg:not([class*='size-'])]:size-8",
        className,
      )}
      {...props}
    />
  );
}

function AlertDialogTitle({
  className,
  ...props
}: React.ComponentProps<typeof AlertDialogPrimitive.Title>) {
  return (
    <AlertDialogPrimitive.Title
      data-slot="alert-dialog-title"
      className={cn(
        "text-lg font-medium sm:group-data-[size=default]/alert-dialog-content:group-has-data-[slot=alert-dialog-media]/alert-dialog-content:col-start-2",
        className,
      )}
      {...props}
    />
  );
}

function AlertDialogDescription({
  className,
  ...props
}: React.ComponentProps<typeof AlertDialogPrimitive.Description>) {
  return (
    <AlertDialogPrimitive.Description
      data-slot="alert-dialog-description"
      className={cn(
        "text-muted-foreground *:[a]:hover:text-foreground text-sm text-balance md:text-pretty *:[a]:underline *:[a]:underline-offset-3",
        className,
      )}
      {...props}
    />
  );
}

function AlertDialogAction({
  className,
  variant = "default",
  size = "default",
  ...props
}: React.ComponentProps<typeof AlertDialogPrimitive.Action> &
  Pick<React.ComponentProps<typeof Button>, "variant" | "size">) {
  return (
    <Button variant={variant} size={size} asChild>
      <AlertDialogPrimitive.Action
        data-slot="alert-dialog-action"
        className={cn(className)}
        {...props}
      />
    </Button>
  );
}

function AlertDialogCancel({
  className,
  variant = "outline",
  size = "default",
  ...props
}: React.ComponentProps<typeof AlertDialogPrimitive.Cancel> &
  Pick<React.ComponentProps<typeof Button>, "variant" | "size">) {
  return (
    <Button variant={variant} size={size} asChild>
      <AlertDialogPrimitive.Cancel
        data-slot="alert-dialog-cancel"
        className={cn(className)}
        {...props}
      />
    </Button>
  );
}

export {
  AlertDialog,
  AlertDialogAction,
  AlertDialogCancel,
  AlertDialogContent,
  AlertDialogDescription,
  AlertDialogFooter,
  AlertDialogHeader,
  AlertDialogMedia,
  AlertDialogOverlay,
  AlertDialogPortal,
  AlertDialogTitle,
  AlertDialogTrigger,
};


================================================
FILE: studio/frontend/src/components/ui/alert.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { type VariantProps, cva } from "class-variance-authority";
import type * as React from "react";

import { cn } from "@/lib/utils";

const alertVariants = cva(
  "grid gap-0.5 rounded-lg border px-4 py-3 text-left text-sm has-data-[slot=alert-action]:relative has-data-[slot=alert-action]:pr-18 has-[>svg]:grid-cols-[auto_1fr] has-[>svg]:gap-x-2.5 *:[svg]:row-span-2 *:[svg]:translate-y-0.5 *:[svg]:text-current *:[svg:not([class*='size-'])]:size-4 w-full relative group/alert",
  {
    variants: {
      variant: {
        default: "bg-card text-card-foreground",
        destructive:
          "text-destructive bg-card *:data-[slot=alert-description]:text-destructive/90 *:[svg]:text-current",
      },
    },
    defaultVariants: {
      variant: "default",
    },
  },
);

function Alert({
  className,
  variant,
  ...props
}: React.ComponentProps<"div"> & VariantProps<typeof alertVariants>) {
  return (
    <div
      data-slot="alert"
      role="alert"
      className={cn(alertVariants({ variant }), className)}
      {...props}
    />
  );
}

function AlertTitle({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="alert-title"
      className={cn(
        "font-medium group-has-[>svg]/alert:col-start-2 [&_a]:hover:text-foreground [&_a]:underline [&_a]:underline-offset-3",
        className,
      )}
      {...props}
    />
  );
}

function AlertDescription({
  className,
  ...props
}: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="alert-description"
      className={cn(
        "text-muted-foreground text-sm text-balance md:text-pretty [&_p:not(:last-child)]:mb-4 [&_a]:hover:text-foreground [&_a]:underline [&_a]:underline-offset-3",
        className,
      )}
      {...props}
    />
  );
}

function AlertAction({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="alert-action"
      className={cn("absolute top-2.5 right-3", className)}
      {...props}
    />
  );
}

export { Alert, AlertTitle, AlertDescription, AlertAction };


================================================
FILE: studio/frontend/src/components/ui/animated-shiny-text.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ComponentPropsWithoutRef, CSSProperties, FC } from "react"

import { cn } from "@/lib/utils"

export interface AnimatedShinyTextProps extends ComponentPropsWithoutRef<"span"> {
  shimmerWidth?: number
}

export const AnimatedShinyText: FC<AnimatedShinyTextProps> = ({
  children,
  className,
  shimmerWidth = 100,
  ...props
}) => {
  return (
    <span
      style={
        {
          "--shiny-width": `${shimmerWidth}px`,
        } as CSSProperties
      }
      className={cn(
        "mx-auto max-w-md text-neutral-600/70 dark:text-neutral-400/70",

        // Shine effect
        "animate-shiny-text [background-size:var(--shiny-width)_100%] bg-clip-text [background-position:0_0] bg-no-repeat [transition:background-position_1s_cubic-bezier(.6,.6,0,1)_infinite]",

        // Shine gradient
        "bg-gradient-to-r from-transparent via-black/80 via-50% to-transparent dark:via-white/80",

        className
      )}
      {...props}
    >
      {children}
    </span>
  )
}


================================================
FILE: studio/frontend/src/components/ui/animated-theme-toggler.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useCallback, useEffect, useRef, useState } from "react"
import { Moon, Sun } from "lucide-react"
import { flushSync } from "react-dom"

import { cn } from "@/lib/utils"

interface AnimatedThemeTogglerProps extends React.ComponentPropsWithoutRef<"button"> {
  duration?: number
}

export const AnimatedThemeToggler = ({
  className,
  duration = 400,
  ...props
}: AnimatedThemeTogglerProps) => {
  const [isDark, setIsDark] = useState(false)
  const buttonRef = useRef<HTMLButtonElement>(null)

  useEffect(() => {
    const updateTheme = () => {
      setIsDark(document.documentElement.classList.contains("dark"))
    }

    updateTheme()

    const observer = new MutationObserver(updateTheme)
    observer.observe(document.documentElement, {
      attributes: true,
      attributeFilter: ["class"],
    })

    return () => observer.disconnect()
  }, [])

  const toggleTheme = useCallback(async () => {
    if (!buttonRef.current) return

    await document.startViewTransition(() => {
      flushSync(() => {
        const newTheme = !isDark
        setIsDark(newTheme)
        document.documentElement.classList.toggle("dark")
        localStorage.setItem("theme", newTheme ? "dark" : "light")
      })
    }).ready

    const { top, left, width, height } =
      buttonRef.current.getBoundingClientRect()
    const x = left + width / 2
    const y = top + height / 2
    const maxRadius = Math.hypot(
      Math.max(left, window.innerWidth - left),
      Math.max(top, window.innerHeight - top)
    )

    document.documentElement.animate(
      {
        clipPath: [
          `circle(0px at ${x}px ${y}px)`,
          `circle(${maxRadius}px at ${x}px ${y}px)`,
        ],
      },
      {
        duration,
        easing: "ease-in-out",
        pseudoElement: "::view-transition-new(root)",
      }
    )
  }, [isDark, duration])

  return (
    <button
      ref={buttonRef}
      onClick={toggleTheme}
      className={cn(className)}
      {...props}
    >
      {isDark ? <Sun /> : <Moon />}
      <span className="sr-only">Toggle theme</span>
    </button>
  )
}


================================================
FILE: studio/frontend/src/components/ui/aspect-ratio.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { AspectRatio as AspectRatioPrimitive } from "radix-ui";

function AspectRatio({
  ...props
}: React.ComponentProps<typeof AspectRatioPrimitive.Root>) {
  return <AspectRatioPrimitive.Root data-slot="aspect-ratio" {...props} />;
}

export { AspectRatio };


================================================
FILE: studio/frontend/src/components/ui/avatar.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Avatar as AvatarPrimitive } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";

function Avatar({
  className,
  size = "default",
  ...props
}: React.ComponentProps<typeof AvatarPrimitive.Root> & {
  size?: "default" | "sm" | "lg";
}) {
  return (
    <AvatarPrimitive.Root
      data-slot="avatar"
      data-size={size}
      className={cn(
        "size-8 rounded-full after:rounded-full data-[size=lg]:size-10 data-[size=sm]:size-6 after:border-border group/avatar relative flex shrink-0 select-none after:absolute after:inset-0 after:border after:mix-blend-darken dark:after:mix-blend-lighten",
        className,
      )}
      {...props}
    />
  );
}

function AvatarImage({
  className,
  ...props
}: React.ComponentProps<typeof AvatarPrimitive.Image>) {
  return (
    <AvatarPrimitive.Image
      data-slot="avatar-image"
      className={cn(
        "rounded-full aspect-square size-full object-cover",
        className,
      )}
      {...props}
    />
  );
}

function AvatarFallback({
  className,
  ...props
}: React.ComponentProps<typeof AvatarPrimitive.Fallback>) {
  return (
    <AvatarPrimitive.Fallback
      data-slot="avatar-fallback"
      className={cn(
        "bg-muted text-muted-foreground rounded-full flex size-full items-center justify-center text-sm group-data-[size=sm]/avatar:text-xs",
        className,
      )}
      {...props}
    />
  );
}

function AvatarBadge({ className, ...props }: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="avatar-badge"
      className={cn(
        "bg-primary text-primary-foreground ring-background absolute right-0 bottom-0 z-10 inline-flex items-center justify-center rounded-full bg-blend-color ring-2 select-none",
        "group-data-[size=sm]/avatar:size-2 group-data-[size=sm]/avatar:[&>svg]:hidden",
        "group-data-[size=default]/avatar:size-2.5 group-data-[size=default]/avatar:[&>svg]:size-2",
        "group-data-[size=lg]/avatar:size-3 group-data-[size=lg]/avatar:[&>svg]:size-2",
        className,
      )}
      {...props}
    />
  );
}

function AvatarGroup({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="avatar-group"
      className={cn(
        "*:data-[slot=avatar]:ring-background group/avatar-group flex -space-x-2 *:data-[slot=avatar]:ring-2",
        className,
      )}
      {...props}
    />
  );
}

function AvatarGroupCount({
  className,
  ...props
}: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="avatar-group-count"
      className={cn(
        "bg-muted text-muted-foreground size-8 rounded-full text-sm group-has-data-[size=lg]/avatar-group:size-10 group-has-data-[size=sm]/avatar-group:size-6 [&>svg]:size-4 group-has-data-[size=lg]/avatar-group:[&>svg]:size-5 group-has-data-[size=sm]/avatar-group:[&>svg]:size-3 ring-background relative flex shrink-0 items-center justify-center ring-2",
        className,
      )}
      {...props}
    />
  );
}

export {
  Avatar,
  AvatarImage,
  AvatarFallback,
  AvatarGroup,
  AvatarGroupCount,
  AvatarBadge,
};


================================================
FILE: studio/frontend/src/components/ui/badge.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

/* eslint-disable react-refresh/only-export-components */

import { type VariantProps, cva } from "class-variance-authority";
import { Slot } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";

export const badgeVariants = cva(
  "h-5 gap-1 rounded-4xl border border-transparent px-2 py-0.5 text-xs font-medium transition-all has-data-[icon=inline-end]:pr-1.5 has-data-[icon=inline-start]:pl-1.5 [&>svg]:size-3! inline-flex items-center justify-center w-fit whitespace-nowrap shrink-0 [&>svg]:pointer-events-none focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px] aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive overflow-hidden group/badge",
  {
    variants: {
      variant: {
        default: "bg-primary text-primary-foreground [a]:hover:bg-primary/80",
        secondary:
          "bg-secondary text-secondary-foreground [a]:hover:bg-secondary/80",
        destructive:
          "bg-destructive/10 [a]:hover:bg-destructive/20 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 text-destructive dark:bg-destructive/20",
        outline:
          "border-border text-foreground [a]:hover:bg-muted [a]:hover:text-muted-foreground bg-input/30",
        ghost:
          "hover:bg-muted hover:text-muted-foreground dark:hover:bg-muted/50",
        link: "text-primary underline-offset-4 hover:underline",
      },
    },
    defaultVariants: {
      variant: "default",
    },
  },
);

export function Badge({
  className,
  variant = "default",
  asChild = false,
  ...props
}: React.ComponentProps<"span"> &
  VariantProps<typeof badgeVariants> & {
    asChild?: boolean;
  }): React.ReactElement {
  const Comp = asChild ? Slot.Root : "span";

  return (
    <Comp
      data-slot="badge"
      data-variant={variant}
      className={cn(badgeVariants({ variant }), className)}
      {...props}
    />
  );
}


================================================
FILE: studio/frontend/src/components/ui/breadcrumb.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Slot } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";
import {
  ArrowRight01Icon,
  MoreHorizontalCircle01Icon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";

function Breadcrumb({ className, ...props }: React.ComponentProps<"nav">) {
  return (
    <nav
      aria-label="breadcrumb"
      data-slot="breadcrumb"
      className={cn(className)}
      {...props}
    />
  );
}

function BreadcrumbList({ className, ...props }: React.ComponentProps<"ol">) {
  return (
    <ol
      data-slot="breadcrumb-list"
      className={cn(
        "text-muted-foreground gap-1.5 text-sm sm:gap-2.5 flex flex-wrap items-center break-words",
        className,
      )}
      {...props}
    />
  );
}

function BreadcrumbItem({ className, ...props }: React.ComponentProps<"li">) {
  return (
    <li
      data-slot="breadcrumb-item"
      className={cn("gap-1.5 inline-flex items-center", className)}
      {...props}
    />
  );
}

function BreadcrumbLink({
  asChild,
  className,
  ...props
}: React.ComponentProps<"a"> & {
  asChild?: boolean;
}) {
  const Comp = asChild ? Slot.Root : "a";

  return (
    <Comp
      data-slot="breadcrumb-link"
      className={cn("hover:text-foreground transition-colors", className)}
      {...props}
    />
  );
}

function BreadcrumbPage({ className, ...props }: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="breadcrumb-page"
      role="link"
      aria-disabled="true"
      aria-current="page"
      className={cn("text-foreground font-normal", className)}
      {...props}
    />
  );
}

function BreadcrumbSeparator({
  children,
  className,
  ...props
}: React.ComponentProps<"li">) {
  return (
    <li
      data-slot="breadcrumb-separator"
      role="presentation"
      aria-hidden="true"
      className={cn("[&>svg]:size-3.5", className)}
      {...props}
    >
      {children ?? <HugeiconsIcon icon={ArrowRight01Icon} strokeWidth={2} />}
    </li>
  );
}

function BreadcrumbEllipsis({
  className,
  ...props
}: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="breadcrumb-ellipsis"
      role="presentation"
      aria-hidden="true"
      className={cn(
        "size-5 [&>svg]:size-4 flex items-center justify-center",
        className,
      )}
      {...props}
    >
      <HugeiconsIcon icon={MoreHorizontalCircle01Icon} strokeWidth={2} />
      <span className="sr-only">More</span>
    </span>
  );
}

export {
  Breadcrumb,
  BreadcrumbList,
  BreadcrumbItem,
  BreadcrumbLink,
  BreadcrumbPage,
  BreadcrumbSeparator,
  BreadcrumbEllipsis,
};


================================================
FILE: studio/frontend/src/components/ui/button.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

/* eslint-disable react-refresh/only-export-components */

import { type VariantProps, cva } from "class-variance-authority";
import { Slot } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";

export const buttonVariants = cva(
  "focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 rounded-4xl border border-transparent bg-clip-padding text-sm font-medium focus-visible:ring-[3px] aria-invalid:ring-[3px] [&_svg:not([class*='size-'])]:size-4 inline-flex items-center justify-center whitespace-nowrap transition-all disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none shrink-0 [&_svg]:shrink-0 outline-none group/button select-none cursor-pointer",
  {
    variants: {
      variant: {
        default: "bg-primary text-primary-foreground hover:bg-primary/80",
        dark: "bg-foreground text-background hover:bg-foreground/85 dark:bg-foreground dark:text-background",
        outline:
          "border-border bg-input/30 hover:bg-input/50 hover:text-foreground aria-expanded:bg-muted aria-expanded:text-foreground",
        secondary:
          "bg-secondary text-secondary-foreground hover:bg-secondary/80 aria-expanded:bg-secondary aria-expanded:text-secondary-foreground",
        ghost:
          "hover:bg-muted hover:text-foreground dark:hover:bg-muted/50 aria-expanded:bg-muted aria-expanded:text-foreground",
        destructive:
          "bg-destructive/10 hover:bg-destructive/20 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 dark:bg-destructive/20 text-destructive focus-visible:border-destructive/40 dark:hover:bg-destructive/30",
        link: "text-primary underline-offset-4 hover:underline",
      },
      size: {
        default:
          "h-9 gap-1.5 px-3 has-data-[icon=inline-end]:pr-2.5 has-data-[icon=inline-start]:pl-2.5",
        xs: "h-6 gap-1 px-2.5 text-xs has-data-[icon=inline-end]:pr-2 has-data-[icon=inline-start]:pl-2 [&_svg:not([class*='size-'])]:size-3",
        sm: "h-8 gap-1 px-3 has-data-[icon=inline-end]:pr-2 has-data-[icon=inline-start]:pl-2",
        lg: "h-10 gap-1.5 px-4 has-data-[icon=inline-end]:pr-3 has-data-[icon=inline-start]:pl-3",
        icon: "size-9",
        "icon-xs": "size-6 [&_svg:not([class*='size-'])]:size-3",
        "icon-sm": "size-8",
        "icon-lg": "size-10",
      },
    },
    defaultVariants: {
      variant: "default",
      size: "default",
    },
  },
);

export function Button({
  className,
  variant = "default",
  size = "default",
  asChild = false,
  ...props
}: React.ComponentProps<"button"> &
  VariantProps<typeof buttonVariants> & {
    asChild?: boolean;
  }): React.ReactElement {
  const Comp = asChild ? Slot.Root : "button";

  return (
    <Comp
      data-slot="button"
      data-variant={variant}
      data-size={size}
      className={cn(buttonVariants({ variant, size, className }))}
      {...props}
    />
  );
}


================================================
FILE: studio/frontend/src/components/ui/calendar.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import * as React from "react";
import {
  type DayButton,
  DayPicker,
  getDefaultClassNames,
} from "react-day-picker";

import { Button, buttonVariants } from "@/components/ui/button";
import { cn } from "@/lib/utils";
import {
  ArrowDownIcon,
  ArrowLeftIcon,
  ArrowRightIcon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";

function Calendar({
  className,
  classNames,
  showOutsideDays = true,
  captionLayout = "label",
  buttonVariant = "ghost",
  formatters,
  components,
  ...props
}: React.ComponentProps<typeof DayPicker> & {
  buttonVariant?: React.ComponentProps<typeof Button>["variant"];
}) {
  const defaultClassNames = getDefaultClassNames();

  return (
    <DayPicker
      showOutsideDays={showOutsideDays}
      className={cn(
        "p-3 [--cell-radius:var(--radius-4xl)] [--cell-size:--spacing(8)] bg-background group/calendar [[data-slot=card-content]_&]:bg-transparent [[data-slot=popover-content]_&]:bg-transparent",
        String.raw`rtl:**:[.rdp-button\_next>svg]:rotate-180`,
        String.raw`rtl:**:[.rdp-button\_previous>svg]:rotate-180`,
        className,
      )}
      captionLayout={captionLayout}
      formatters={{
        formatMonthDropdown: (date) =>
          date.toLocaleString("default", { month: "short" }),
        ...formatters,
      }}
      classNames={{
        root: cn("w-fit", defaultClassNames.root),
        months: cn(
          "flex gap-4 flex-col md:flex-row relative",
          defaultClassNames.months,
        ),
        month: cn("flex flex-col w-full gap-4", defaultClassNames.month),
        nav: cn(
          "flex items-center gap-1 w-full absolute top-0 inset-x-0 justify-between",
          defaultClassNames.nav,
        ),
        button_previous: cn(
          buttonVariants({ variant: buttonVariant }),
          "size-(--cell-size) aria-disabled:opacity-50 p-0 select-none",
          defaultClassNames.button_previous,
        ),
        button_next: cn(
          buttonVariants({ variant: buttonVariant }),
          "size-(--cell-size) aria-disabled:opacity-50 p-0 select-none",
          defaultClassNames.button_next,
        ),
        month_caption: cn(
          "flex items-center justify-center h-(--cell-size) w-full px-(--cell-size)",
          defaultClassNames.month_caption,
        ),
        dropdowns: cn(
          "w-full flex items-center text-sm font-medium justify-center h-(--cell-size) gap-1.5",
          defaultClassNames.dropdowns,
        ),
        dropdown_root: cn(
          "relative cn-calendar-dropdown-root rounded-(--cell-radius)",
          defaultClassNames.dropdown_root,
        ),
        dropdown: cn(
          "absolute bg-popover inset-0 opacity-0",
          defaultClassNames.dropdown,
        ),
        caption_label: cn(
          "select-none font-medium",
          captionLayout === "label"
            ? "text-sm"
            : "cn-calendar-caption-label rounded-(--cell-radius) flex items-center gap-1 text-sm  [&>svg]:text-muted-foreground [&>svg]:size-3.5",
          defaultClassNames.caption_label,
        ),
        table: "w-full border-collapse",
        weekdays: cn("flex", defaultClassNames.weekdays),
        weekday: cn(
          "text-muted-foreground rounded-(--cell-radius) flex-1 font-normal text-[0.8rem] select-none",
          defaultClassNames.weekday,
        ),
        week: cn("flex w-full mt-2", defaultClassNames.week),
        week_number_header: cn(
          "select-none w-(--cell-size)",
          defaultClassNames.week_number_header,
        ),
        week_number: cn(
          "text-[0.8rem] select-none text-muted-foreground",
          defaultClassNames.week_number,
        ),
        day: cn(
          "relative w-full rounded-(--cell-radius) h-full p-0 text-center [&:last-child[data-selected=true]_button]:rounded-r-(--cell-radius) group/day aspect-square select-none",
          props.showWeekNumber
            ? "[&:nth-child(2)[data-selected=true]_button]:rounded-l-(--cell-radius)"
            : "[&:first-child[data-selected=true]_button]:rounded-l-(--cell-radius)",
          defaultClassNames.day,
        ),
        range_start: cn(
          "rounded-l-(--cell-radius) bg-muted relative after:bg-muted after:absolute after:inset-y-0 after:w-4 after:right-0 -z-0 isolate",
          defaultClassNames.range_start,
        ),
        range_middle: cn("rounded-none", defaultClassNames.range_middle),
        range_end: cn(
          "rounded-r-(--cell-radius) bg-muted relative after:bg-muted-200 after:absolute after:inset-y-0 after:w-4 after:left-0 -z-0 isolate",
          defaultClassNames.range_end,
        ),
        today: cn(
          "bg-muted text-foreground rounded-(--cell-radius) data-[selected=true]:rounded-none",
          defaultClassNames.today,
        ),
        outside: cn(
          "text-muted-foreground aria-selected:text-muted-foreground",
          defaultClassNames.outside,
        ),
        disabled: cn(
          "text-muted-foreground opacity-50",
          defaultClassNames.disabled,
        ),
        hidden: cn("invisible", defaultClassNames.hidden),
        ...classNames,
      }}
      components={{
        Root: ({ className, rootRef, ...props }) => {
          return (
            <div
              data-slot="calendar"
              ref={rootRef}
              className={cn(className)}
              {...props}
            />
          );
        },
        Chevron: ({ className, orientation, ...props }) => {
          if (orientation === "left") {
            return (
              <HugeiconsIcon
                icon={ArrowLeftIcon}
                strokeWidth={2}
                className={cn("size-4", className)}
                {...props}
              />
            );
          }

          if (orientation === "right") {
            return (
              <HugeiconsIcon
                icon={ArrowRightIcon}
                strokeWidth={2}
                className={cn("size-4", className)}
                {...props}
              />
            );
          }

          return (
            <HugeiconsIcon
              icon={ArrowDownIcon}
              strokeWidth={2}
              className={cn("size-4", className)}
              {...props}
            />
          );
        },
        DayButton: CalendarDayButton,
        WeekNumber: ({ children, ...props }) => {
          return (
            <td {...props}>
              <div className="flex size-(--cell-size) items-center justify-center text-center">
                {children}
              </div>
            </td>
          );
        },
        ...components,
      }}
      {...props}
    />
  );
}

function CalendarDayButton({
  className,
  day,
  modifiers,
  ...props
}: React.ComponentProps<typeof DayButton>) {
  const defaultClassNames = getDefaultClassNames();

  const ref = React.useRef<HTMLButtonElement>(null);
  React.useEffect(() => {
    if (modifiers.focused) ref.current?.focus();
  }, [modifiers.focused]);

  return (
    <Button
      ref={ref}
      variant="ghost"
      size="icon"
      data-day={day.date.toLocaleDateString()}
      data-selected-single={
        modifiers.selected &&
        !modifiers.range_start &&
        !modifiers.range_end &&
        !modifiers.range_middle
      }
      data-range-start={modifiers.range_start}
      data-range-end={modifiers.range_end}
      data-range-middle={modifiers.range_middle}
      className={cn(
        "data-[selected-single=true]:bg-primary data-[selected-single=true]:text-primary-foreground data-[range-middle=true]:bg-muted data-[range-middle=true]:text-foreground data-[range-start=true]:bg-primary data-[range-start=true]:text-primary-foreground data-[range-end=true]:bg-primary data-[range-end=true]:text-primary-foreground group-data-[focused=true]/day:border-ring group-data-[focused=true]/day:ring-ring/50 dark:hover:text-foreground relative isolate z-10 flex aspect-square size-auto w-full min-w-(--cell-size) flex-col gap-1 border-0 leading-none font-normal group-data-[focused=true]/day:relative group-data-[focused=true]/day:z-10 group-data-[focused=true]/day:ring-[3px] data-[range-end=true]:rounded-(--cell-radius) data-[range-end=true]:rounded-r-(--cell-radius) data-[range-middle=true]:rounded-none data-[range-start=true]:rounded-(--cell-radius) data-[range-start=true]:rounded-l-(--cell-radius) [&>span]:text-xs [&>span]:opacity-70",
        defaultClassNames.day,
        className,
      )}
      {...props}
    />
  );
}

export { Calendar, CalendarDayButton };


================================================
FILE: studio/frontend/src/components/ui/card.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type * as React from "react";

import { cn } from "@/lib/utils";

function Card({
  className,
  size = "default",
  ...props
}: React.ComponentProps<"div"> & { size?: "default" | "sm" }) {
  return (
    <div
      data-slot="card"
      data-size={size}
      className={cn(
        "ring-foreground/10 bg-card corner-squircle text-card-foreground gap-6 overflow-hidden rounded-4xl py-6 text-sm ring-1 has-[>img:first-child]:pt-0 data-[size=sm]:gap-4 data-[size=sm]:py-4 *:[img:first-child]:rounded-t-xl *:[img:last-child]:rounded-b-xl group/card  flex flex-col",
        className,
      )}
      {...props}
    />
  );
}

function CardHeader({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="card-header"
      className={cn(
        "gap-2 rounded-t-xl px-6 group-data-[size=sm]/card:px-4 [.border-b]:pb-6 group-data-[size=sm]/card:[.border-b]:pb-4 group/card-header @container/card-header grid auto-rows-min items-start has-data-[slot=card-action]:grid-cols-[1fr_auto] has-data-[slot=card-description]:grid-rows-[auto_auto]",
        className,
      )}
      {...props}
    />
  );
}

function CardTitle({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="card-title"
      className={cn("text-base font-medium", className)}
      {...props}
    />
  );
}

function CardDescription({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="card-description"
      className={cn("text-muted-foreground text-sm", className)}
      {...props}
    />
  );
}

function CardAction({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="card-action"
      className={cn(
        "col-start-2 row-span-2 row-start-1 self-start justify-self-end",
        className,
      )}
      {...props}
    />
  );
}

function CardContent({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="card-content"
      className={cn("px-6 group-data-[size=sm]/card:px-4", className)}
      {...props}
    />
  );
}

function CardFooter({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="card-footer"
      className={cn(
        "rounded-b-xl px-6 group-data-[size=sm]/card:px-4 [.border-t]:pt-6 group-data-[size=sm]/card:[.border-t]:pt-4 flex items-center",
        className,
      )}
      {...props}
    />
  );
}

export {
  Card,
  CardHeader,
  CardFooter,
  CardTitle,
  CardAction,
  CardDescription,
  CardContent,
};


================================================
FILE: studio/frontend/src/components/ui/chart.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import * as React from "react";
import * as RechartsPrimitive from "recharts";

import { cn } from "@/lib/utils";

// Format: { THEME_NAME: CSS_SELECTOR }
const THEMES = { light: "", dark: ".dark" } as const;

export type ChartConfig = {
  [k in string]: {
    label?: React.ReactNode;
    icon?: React.ComponentType;
  } & (
    | { color?: string; theme?: never }
    | { color?: never; theme: Record<keyof typeof THEMES, string> }
  );
};

type ChartContextProps = {
  config: ChartConfig;
};

const ChartContext = React.createContext<ChartContextProps | null>(null);

function useChart() {
  const context = React.useContext(ChartContext);

  if (!context) {
    throw new Error("useChart must be used within a <ChartContainer />");
  }

  return context;
}

function ChartContainer({
  id,
  className,
  children,
  config,
  ...props
}: React.ComponentProps<"div"> & {
  config: ChartConfig;
  children: React.ComponentProps<
    typeof RechartsPrimitive.ResponsiveContainer
  >["children"];
}) {
  const uniqueId = React.useId();
  const chartId = `chart-${id || uniqueId.replace(/:/g, "")}`;
  const containerRef = React.useRef<HTMLDivElement | null>(null);
  const [containerSize, setContainerSize] = React.useState<{
    width: number;
    height: number;
  } | null>(null);

  React.useEffect(() => {
    const element = containerRef.current;
    if (!element) return;

    const updateSizeState = () => {
      const { width, height } = element.getBoundingClientRect();
      const nextSize =
        width > 0 && height > 0
          ? {
              width: Math.round(width),
              height: Math.round(height),
            }
          : null;

      setContainerSize((currentSize) => {
        if (!nextSize) {
          // Keep the last valid size once mounted to avoid unmount/remount thrash.
          return currentSize;
        }
        if (
          currentSize &&
          currentSize.width === nextSize.width &&
          currentSize.height === nextSize.height
        ) {
          return currentSize;
        }
        return nextSize;
      });
    };

    updateSizeState();

    if (typeof ResizeObserver === "undefined") {
      const recheckSize = () => {
        if (document.visibilityState === "visible") {
          updateSizeState();
        }
      };

      window.addEventListener("resize", recheckSize);
      window.addEventListener("orientationchange", recheckSize);
      document.addEventListener("visibilitychange", recheckSize);

      return () => {
        window.removeEventListener("resize", recheckSize);
        window.removeEventListener("orientationchange", recheckSize);
        document.removeEventListener("visibilitychange", recheckSize);
      };
    }

    const observer = new ResizeObserver(() => {
      updateSizeState();
    });
    observer.observe(element);

    return () => observer.disconnect();
  }, []);

  return (
    <ChartContext.Provider value={{ config }}>
      <div
        ref={containerRef}
        data-slot="chart"
        data-chart={chartId}
        className={cn(
          "[&_.recharts-cartesian-axis-tick_text]:fill-muted-foreground [&_.recharts-cartesian-grid_line[stroke='#ccc']]:stroke-border/50 [&_.recharts-curve.recharts-tooltip-cursor]:stroke-border [&_.recharts-polar-grid_[stroke='#ccc']]:stroke-border [&_.recharts-radial-bar-background-sector]:fill-muted [&_.recharts-rectangle.recharts-tooltip-cursor]:fill-muted [&_.recharts-reference-line_[stroke='#ccc']]:stroke-border flex min-w-0 aspect-video justify-center text-xs [&_.recharts-dot[stroke='#fff']]:stroke-transparent [&_.recharts-layer]:outline-hidden [&_.recharts-sector]:outline-hidden [&_.recharts-sector[stroke='#fff']]:stroke-transparent [&_.recharts-surface]:outline-hidden",
          className,
        )}
        {...props}
      >
        <ChartStyle id={chartId} config={config} />
        {containerSize ? (
          <RechartsPrimitive.ResponsiveContainer
            width="100%"
            height="100%"
            minWidth={0}
            minHeight={1}
            initialDimension={containerSize}
          >
            {children}
          </RechartsPrimitive.ResponsiveContainer>
        ) : null}
      </div>
    </ChartContext.Provider>
  );
}

const ChartStyle = ({ id, config }: { id: string; config: ChartConfig }) => {
  const colorConfig = Object.entries(config).filter(
    ([, config]) => config.theme || config.color,
  );

  if (!colorConfig.length) {
    return null;
  }

  return (
    <style
      dangerouslySetInnerHTML={{
        __html: Object.entries(THEMES)
          .map(
            ([theme, prefix]) => `
${prefix} [data-chart=${id}] {
${colorConfig
  .map(([key, itemConfig]) => {
    const color =
      itemConfig.theme?.[theme as keyof typeof itemConfig.theme] ||
      itemConfig.color;
    return color ? `  --color-${key}: ${color};` : null;
  })
  .join("\n")}
}
`,
          )
          .join("\n"),
      }}
    />
  );
};

const ChartTooltip = RechartsPrimitive.Tooltip;

function ChartTooltipContent({
  active,
  payload,
  className,
  indicator = "dot",
  hideLabel = false,
  hideIndicator = false,
  label,
  labelFormatter,
  labelClassName,
  formatter,
  color,
  nameKey,
  labelKey,
}: Partial<RechartsPrimitive.TooltipContentProps<any, any>> &
  React.ComponentProps<"div"> & {
    hideLabel?: boolean;
    hideIndicator?: boolean;
    indicator?: "line" | "dot" | "dashed";
    nameKey?: string;
    labelKey?: string;
  }) {
  const { config } = useChart();

  const tooltipLabel = React.useMemo(() => {
    if (hideLabel || !payload?.length) {
      return null;
    }

    const [item] = payload;
    const key = `${labelKey || item?.dataKey || item?.name || "value"}`;
    const itemConfig = getPayloadConfigFromPayload(config, item, key);
    const value =
      !labelKey && typeof label === "string"
        ? config[label as keyof typeof config]?.label || label
        : itemConfig?.label;

    if (labelFormatter) {
      return (
        <div className={cn("font-medium", labelClassName)}>
          {labelFormatter(value, payload)}
        </div>
      );
    }

    if (!value) {
      return null;
    }

    return <div className={cn("font-medium", labelClassName)}>{value}</div>;
  }, [
    label,
    labelFormatter,
    payload,
    hideLabel,
    labelClassName,
    config,
    labelKey,
  ]);

  if (!active || !payload?.length) {
    return null;
  }

  const nestLabel = payload.length === 1 && indicator !== "dot";

  return (
    <div
      className={cn(
        "border-border/50 corner-squircle bg-background gap-1.5 rounded-lg border px-2.5 py-1.5 text-xs shadow-xl grid min-w-[8rem] items-start",
        className,
      )}
    >
      {!nestLabel ? tooltipLabel : null}
      <div className="grid gap-1.5">
        {payload
          .filter((item) => item.type !== "none")
          .map((item, index) => {
            const key = `${nameKey || item.name || item.dataKey || "value"}`;
            const itemConfig = getPayloadConfigFromPayload(config, item, key);
            const indicatorColor = color || item.payload.fill || item.color;
            let customContent: React.ReactNode = null;
            let formattedValue: React.ReactNode =
              item.value != null && typeof item.value !== "object"
                ? String(item.value)
                : item.value;
            let formattedLabel: React.ReactNode = itemConfig?.label || item.name;

            if (formatter && item?.value !== undefined && item.name) {
              const result = formatter(
                item.value,
                item.name,
                item,
                index,
                item.payload,
              );

              if (Array.isArray(result)) {
                formattedValue = result[0];
                formattedLabel = result[1];
              } else {
                customContent = result;
              }
            }

            return (
              <div
                key={item.dataKey}
                className={cn(
                  "[&>svg]:text-muted-foreground flex w-full flex-wrap items-stretch gap-2 [&>svg]:h-2.5 [&>svg]:w-2.5",
                  indicator === "dot" && "items-center",
                )}
              >
                {customContent ?? (
                  <>
                    {itemConfig?.icon ? (
                      <itemConfig.icon />
                    ) : (
                      !hideIndicator && (
                        <div
                          className={cn(
                            "shrink-0 rounded-[2px] border-(--color-border) bg-(--color-bg)",
                            {
                              "h-2.5 w-2.5": indicator === "dot",
                              "w-1": indicator === "line",
                              "w-0 border-[1.5px] border-dashed bg-transparent":
                                indicator === "dashed",
                              "my-0.5": nestLabel && indicator === "dashed",
                            },
                          )}
                          style={
                            {
                              "--color-bg": indicatorColor,
                              "--color-border": indicatorColor,
                            } as React.CSSProperties
                          }
                        />
                      )
                    )}
                    <div
                      className={cn(
                        "flex flex-1 justify-between gap-3 leading-none",
                        nestLabel ? "items-end" : "items-center",
                      )}
                    >
                      <div className="grid gap-1.5">
                        {nestLabel ? tooltipLabel : null}
                        <span className="text-muted-foreground">{formattedLabel}</span>
                      </div>
                      {formattedValue != null && (
                        <span className="text-foreground font-mono font-medium tabular-nums">
                          {formattedValue}
                        </span>
                      )}
                    </div>
                  </>
                )}
              </div>
            );
          })}
      </div>
    </div>
  );
}

const ChartLegend = RechartsPrimitive.Legend;

function ChartLegendContent({
  className,
  hideIcon = false,
  payload,
  verticalAlign = "bottom",
  nameKey,
}: React.ComponentProps<"div"> &
  Pick<RechartsPrimitive.DefaultLegendContentProps, "payload" | "verticalAlign"> & {
    hideIcon?: boolean;
    nameKey?: string;
  }) {
  const { config } = useChart();

  if (!payload?.length) {
    return null;
  }

  return (
    <div
      className={cn(
        "flex items-center justify-center gap-4",
        verticalAlign === "top" ? "pb-3" : "pt-3",
        className,
      )}
    >
      {payload
        .filter((item) => item.type !== "none")
        .map((item) => {
          const key = `${nameKey || item.dataKey || "value"}`;
          const itemConfig = getPayloadConfigFromPayload(config, item, key);

          return (
            <div
              key={item.value}
              className={cn(
                "[&>svg]:text-muted-foreground flex items-center gap-1.5 [&>svg]:h-3 [&>svg]:w-3",
              )}
            >
              {itemConfig?.icon && !hideIcon ? (
                <itemConfig.icon />
              ) : (
                <div
                  className="h-2 w-2 shrink-0 rounded-[2px]"
                  style={{
                    backgroundColor: item.color,
                  }}
                />
              )}
              {itemConfig?.label}
            </div>
          );
        })}
    </div>
  );
}

function getPayloadConfigFromPayload(
  config: ChartConfig,
  payload: unknown,
  key: string,
) {
  if (typeof payload !== "object" || payload === null) {
    return undefined;
  }

  const payloadPayload =
    "payload" in payload &&
    typeof payload.payload === "object" &&
    payload.payload !== null
      ? payload.payload
      : undefined;

  let configLabelKey: string = key;

  if (
    key in payload &&
    typeof payload[key as keyof typeof payload] === "string"
  ) {
    configLabelKey = payload[key as keyof typeof payload] as string;
  } else if (
    payloadPayload &&
    key in payloadPayload &&
    typeof payloadPayload[key as keyof typeof payloadPayload] === "string"
  ) {
    configLabelKey = payloadPayload[
      key as keyof typeof payloadPayload
    ] as string;
  }

  return configLabelKey in config
    ? config[configLabelKey]
    : config[key as keyof typeof config];
}

export {
  ChartContainer,
  ChartTooltip,
  ChartTooltipContent,
  ChartLegend,
  ChartLegendContent,
  ChartStyle,
};


================================================
FILE: studio/frontend/src/components/ui/checkbox.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Checkbox as CheckboxPrimitive } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";
import { Tick02Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";

function Checkbox({
  className,
  ...props
}: React.ComponentProps<typeof CheckboxPrimitive.Root>) {
  return (
    <CheckboxPrimitive.Root
      data-slot="checkbox"
      className={cn(
        "border-input dark:bg-input/30 data-checked:bg-primary data-checked:text-primary-foreground dark:data-checked:bg-primary data-checked:border-primary aria-invalid:aria-checked:border-primary aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 flex size-4 items-center justify-center rounded-[6px] border transition-shadow group-has-disabled/field:opacity-50 focus-visible:ring-[3px] aria-invalid:ring-[3px] peer relative shrink-0 outline-none after:absolute after:-inset-x-3 after:-inset-y-2 disabled:cursor-not-allowed disabled:opacity-50",
        className,
      )}
      {...props}
    >
      <CheckboxPrimitive.Indicator
        data-slot="checkbox-indicator"
        className="[&>svg]:size-3.5 grid place-content-center text-current transition-none"
      >
        <HugeiconsIcon icon={Tick02Icon} strokeWidth={2} />
      </CheckboxPrimitive.Indicator>
    </CheckboxPrimitive.Root>
  );
}

export { Checkbox };


================================================
FILE: studio/frontend/src/components/ui/collapsible.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { cn } from "@/lib/utils";
import { Collapsible as CollapsiblePrimitive } from "radix-ui";

function Collapsible({
  ...props
}: React.ComponentProps<typeof CollapsiblePrimitive.Root>) {
  return <CollapsiblePrimitive.Root data-slot="collapsible" {...props} />;
}

function CollapsibleTrigger({
  ...props
}: React.ComponentProps<typeof CollapsiblePrimitive.CollapsibleTrigger>) {
  return (
    <CollapsiblePrimitive.CollapsibleTrigger
      data-slot="collapsible-trigger"
      {...props}
    />
  );
}

function CollapsibleContent({
  className,
  ...props
}: React.ComponentProps<typeof CollapsiblePrimitive.CollapsibleContent>) {
  return (
    <CollapsiblePrimitive.CollapsibleContent
      data-slot="collapsible-content"
      className={cn(
        "overflow-hidden data-[state=open]:animate-collapsible-down data-[state=closed]:animate-collapsible-up [--duration:150ms]",
        className,
      )}
      {...props}
    />
  );
}

export { Collapsible, CollapsibleTrigger, CollapsibleContent };


================================================
FILE: studio/frontend/src/components/ui/combobox.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

/* eslint-disable react-refresh/only-export-components */

import { Combobox as ComboboxPrimitive } from "@base-ui/react";
import * as React from "react";
import { createContext, useContext, useState } from "react";

import { Button } from "@/components/ui/button";
import { useDialogPortalContainer } from "@/components/ui/dialog";
import {
  InputGroup,
  InputGroupAddon,
  InputGroupButton,
  InputGroupInput,
} from "@/components/ui/input-group";
import { cn } from "@/lib/utils";
import {
  ArrowDown01Icon,
  Cancel01Icon,
  Tick02Icon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";

const ComboboxOpenContext = createContext(false);
type ComboboxRootProps = ComboboxPrimitive.Root.Props<string, false>;

function Combobox({
  onOpenChange,
  children,
  ...props
}: ComboboxRootProps): React.ReactElement {
  const [isOpen, setIsOpen] = useState(false);
  return (
    <ComboboxOpenContext.Provider value={isOpen}>
      <ComboboxPrimitive.Root
        onOpenChange={(open, eventDetails) => {
          setIsOpen(open);
          onOpenChange?.(open, eventDetails);
        }}
        {...props}
      >
        {children}
      </ComboboxPrimitive.Root>
    </ComboboxOpenContext.Provider>
  );
}

function ComboboxValue({
  ...props
}: ComboboxPrimitive.Value.Props): React.ReactElement {
  return <ComboboxPrimitive.Value data-slot="combobox-value" {...props} />;
}

function ComboboxTrigger({
  className,
  children,
  ...props
}: ComboboxPrimitive.Trigger.Props): React.ReactElement {
  return (
    <ComboboxPrimitive.Trigger
      data-slot="combobox-trigger"
      className={cn("[&_svg:not([class*='size-'])]:size-4", className)}
      {...props}
    >
      {children}
      <HugeiconsIcon
        icon={ArrowDown01Icon}
        strokeWidth={2}
        className="text-muted-foreground size-4 pointer-events-none"
      />
    </ComboboxPrimitive.Trigger>
  );
}

function ComboboxClear({
  className,
  ...props
}: ComboboxPrimitive.Clear.Props): React.ReactElement {
  return (
    <ComboboxPrimitive.Clear
      data-slot="combobox-clear"
      render={<InputGroupButton variant="ghost" size="icon-xs" />}
      className={cn(className)}
      {...props}
    >
      <HugeiconsIcon
        icon={Cancel01Icon}
        strokeWidth={2}
        className="pointer-events-none"
      />
    </ComboboxPrimitive.Clear>
  );
}

function ComboboxInput({
  className,
  children,
  disabled = false,
  showTrigger = true,
  showClear = false,
  ...props
}: ComboboxPrimitive.Input.Props & {
  showTrigger?: boolean;
  showClear?: boolean;
}): React.ReactElement {
  const isOpen = useContext(ComboboxOpenContext);

  return (
    <InputGroup
      className={cn("w-auto", className)}
      style={{
        borderRadius: isOpen ? "12px" : undefined,
        transition: isOpen
          ? "border-radius 0ms"
          : "border-radius 150ms cubic-bezier(0.645, 0.045, 0.355, 1)",
      }}
    >
      <ComboboxPrimitive.Input
        render={<InputGroupInput disabled={disabled} />}
        {...props}
      />
      <InputGroupAddon align="inline-end">
        {showTrigger && (
          <InputGroupButton
            size="icon-xs"
            variant="ghost"
            asChild
            data-slot="input-group-button"
            className="group-has-data-[slot=combobox-clear]/input-group:hidden data-pressed:bg-transparent"
            disabled={disabled}
          >
            <ComboboxTrigger />
          </InputGroupButton>
        )}
        {showClear && <ComboboxClear disabled={disabled} />}
      </InputGroupAddon>
      {children}
    </InputGroup>
  );
}

function ComboboxContent({
  className,
  side = "bottom",
  sideOffset = 6,
  align = "start",
  alignOffset = 0,
  anchor,
  container,
  ...props
}: ComboboxPrimitive.Popup.Props &
  Pick<
    ComboboxPrimitive.Positioner.Props,
    "side" | "align" | "sideOffset" | "alignOffset" | "anchor"
  > & {
    container?: HTMLElement | null;
  }): React.ReactElement {
  const dialogContainer = useDialogPortalContainer();
  return (
    <ComboboxPrimitive.Portal container={container ?? dialogContainer ?? undefined}>
      <ComboboxPrimitive.Positioner
        side={side}
        sideOffset={sideOffset}
        align={align}
        alignOffset={alignOffset}
        anchor={anchor}
        className="isolate z-[120] pointer-events-auto"
      >
        <ComboboxPrimitive.Popup
          data-slot="combobox-content"
          data-chips={!!anchor}
          className={cn(
            "bg-popover text-popover-foreground data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 shadow-border ring-1 ring-border *:data-[slot=input-group]:bg-input/30 max-h-72 min-w-36 overflow-hidden rounded-xl corner-squircle duration-100 *:data-[slot=input-group]:m-1 *:data-[slot=input-group]:mb-0 *:data-[slot=input-group]:h-9 *:data-[slot=input-group]:border-none *:data-[slot=input-group]:shadow-none group/combobox-content relative pointer-events-auto max-h-(--available-height) w-(--anchor-width) max-w-(--available-width) min-w-[calc(var(--anchor-width)+--spacing(7))] origin-(--transform-origin) data-[chips=true]:min-w-(--anchor-width)",
            className,
          )}
          {...props}
        />
      </ComboboxPrimitive.Positioner>
    </ComboboxPrimitive.Portal>
  );
}

function ComboboxList({
  className,
  ...props
}: ComboboxPrimitive.List.Props): React.ReactElement {
  return (
    <ComboboxPrimitive.List
      data-slot="combobox-list"
      className={cn(
        "no-scrollbar max-h-[min(calc(--spacing(72)---spacing(9)),calc(var(--available-height)---spacing(9)))] scroll-py-1 overflow-y-auto p-1 data-empty:p-0 overflow-y-auto overscroll-contain",
        className,
      )}
      {...props}
    />
  );
}

function ComboboxItem({
  className,
  children,
  ...props
}: ComboboxPrimitive.Item.Props): React.ReactElement {
  return (
    <ComboboxPrimitive.Item
      data-slot="combobox-item"
      className={cn(
        "data-highlighted:bg-accent data-highlighted:text-accent-foreground not-data-[variant=destructive]:data-highlighted:**:text-accent-foreground gap-2 rounded-xl corner-squircle py-2 pr-2 pl-3 text-sm [&[aria-selected=true]]:pr-7 [&_svg:not([class*='size-'])]:size-4 relative flex w-full cursor-pointer items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    >
      {children}
      <ComboboxPrimitive.ItemIndicator
        render={
          <span className="pointer-events-none absolute right-2 flex size-4 items-center justify-center" />
        }
      >
        <HugeiconsIcon
          icon={Tick02Icon}
          strokeWidth={2}
          className="pointer-events-none"
        />
      </ComboboxPrimitive.ItemIndicator>
    </ComboboxPrimitive.Item>
  );
}

function ComboboxGroup({
  className,
  ...props
}: ComboboxPrimitive.Group.Props): React.ReactElement {
  return (
    <ComboboxPrimitive.Group
      data-slot="combobox-group"
      className={cn(className)}
      {...props}
    />
  );
}

function ComboboxLabel({
  className,
  ...props
}: ComboboxPrimitive.GroupLabel.Props): React.ReactElement {
  return (
    <ComboboxPrimitive.GroupLabel
      data-slot="combobox-label"
      className={cn("text-muted-foreground px-3.5 py-2.5 text-xs", className)}
      {...props}
    />
  );
}

function ComboboxCollection({
  ...props
}: ComboboxPrimitive.Collection.Props): React.ReactElement {
  return (
    <ComboboxPrimitive.Collection data-slot="combobox-collection" {...props} />
  );
}

function ComboboxEmpty({
  className,
  ...props
}: ComboboxPrimitive.Empty.Props): React.ReactElement {
  return (
    <ComboboxPrimitive.Empty
      data-slot="combobox-empty"
      className={cn(
        "text-muted-foreground hidden w-full justify-center py-2 text-center text-sm group-data-empty/combobox-content:flex",
        className,
      )}
      {...props}
    />
  );
}

function ComboboxSeparator({
  className,
  ...props
}: ComboboxPrimitive.Separator.Props): React.ReactElement {
  return (
    <ComboboxPrimitive.Separator
      data-slot="combobox-separator"
      className={cn("bg-border/50 -mx-1 my-1 h-px", className)}
      {...props}
    />
  );
}

function ComboboxChips({
  className,
  ...props
}: React.ComponentPropsWithRef<typeof ComboboxPrimitive.Chips> &
  ComboboxPrimitive.Chips.Props): React.ReactElement {
  return (
    <ComboboxPrimitive.Chips
      data-slot="combobox-chips"
      className={cn(
        "bg-input/30 border-input focus-within:border-ring focus-within:ring-ring/50 has-aria-invalid:ring-destructive/20 dark:has-aria-invalid:ring-destructive/40 has-aria-invalid:border-destructive dark:has-aria-invalid:border-destructive/50 flex min-h-9 flex-wrap items-center gap-1.5 rounded-4xl border bg-clip-padding px-2.5 py-1.5 text-sm transition-colors focus-within:ring-[3px] has-aria-invalid:ring-[3px] has-data-[slot=combobox-chip]:px-1.5",
        className,
      )}
      {...props}
    />
  );
}

function ComboboxChip({
  className,
  children,
  showRemove = true,
  ...props
}: ComboboxPrimitive.Chip.Props & {
  showRemove?: boolean;
}): React.ReactElement {
  return (
    <ComboboxPrimitive.Chip
      data-slot="combobox-chip"
      className={cn(
        "bg-muted-foreground/10 text-foreground flex h-[calc(--spacing(5.5))] w-fit items-center justify-center gap-1 rounded-4xl px-2 text-xs font-medium whitespace-nowrap has-data-[slot=combobox-chip-remove]:pr-0 has-disabled:pointer-events-none has-disabled:cursor-not-allowed has-disabled:opacity-50",
        className,
      )}
      {...props}
    >
      {children}
      {showRemove && (
        <ComboboxPrimitive.ChipRemove
          render={<Button variant="ghost" size="icon-xs" />}
          className="-ml-1 opacity-50 hover:opacity-100"
          data-slot="combobox-chip-remove"
        >
          <HugeiconsIcon
            icon={Cancel01Icon}
            strokeWidth={2}
            className="pointer-events-none"
          />
        </ComboboxPrimitive.ChipRemove>
      )}
    </ComboboxPrimitive.Chip>
  );
}

function ComboboxChipsInput({
  className,
  ...props
}: ComboboxPrimitive.Input.Props): React.ReactElement {
  return (
    <ComboboxPrimitive.Input
      data-slot="combobox-chip-input"
      className={cn("min-w-16 flex-1 outline-none", className)}
      {...props}
    />
  );
}

function useComboboxAnchor(): React.MutableRefObject<HTMLDivElement | null> {
  return React.useRef<HTMLDivElement | null>(null);
}

export {
  Combobox,
  ComboboxInput,
  ComboboxContent,
  ComboboxList,
  ComboboxItem,
  ComboboxGroup,
  ComboboxLabel,
  ComboboxCollection,
  ComboboxEmpty,
  ComboboxSeparator,
  ComboboxChips,
  ComboboxChip,
  ComboboxChipsInput,
  ComboboxTrigger,
  ComboboxValue,
  useComboboxAnchor,
};


================================================
FILE: studio/frontend/src/components/ui/command.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import { Command as CommandPrimitive } from "cmdk";
import type * as React from "react";

import {
  Dialog,
  DialogContent,
  DialogDescription,
  DialogHeader,
  DialogTitle,
} from "@/components/ui/dialog";
import { InputGroup, InputGroupAddon } from "@/components/ui/input-group";
import { cn } from "@/lib/utils";
import { SearchIcon, Tick02Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";

function Command({
  className,
  ...props
}: React.ComponentProps<typeof CommandPrimitive>) {
  return (
    <CommandPrimitive
      data-slot="command"
      className={cn(
        "bg-popover text-popover-foreground rounded-4xl p-1 flex size-full flex-col overflow-hidden",
        className,
      )}
      {...props}
    />
  );
}

function CommandDialog({
  title = "Command Palette",
  description = "Search for a command to run...",
  children,
  className,
  showCloseButton = false,
  ...props
}: React.ComponentProps<typeof Dialog> & {
  title?: string;
  description?: string;
  className?: string;
  showCloseButton?: boolean;
}) {
  return (
    <Dialog {...props}>
      <DialogHeader className="sr-only">
        <DialogTitle>{title}</DialogTitle>
        <DialogDescription>{description}</DialogDescription>
      </DialogHeader>
      <DialogContent
        className={cn(
          "rounded-4xl! p-0 top-1/3 translate-y-0 overflow-hidden p-0",
          className,
        )}
        showCloseButton={showCloseButton}
      >
        {children}
      </DialogContent>
    </Dialog>
  );
}

function CommandInput({
  className,
  ...props
}: React.ComponentProps<typeof CommandPrimitive.Input>) {
  return (
    <div data-slot="command-input-wrapper" className="p-1 pb-0">
      <InputGroup className="bg-input/30 h-9">
        <CommandPrimitive.Input
          data-slot="command-input"
          className={cn(
            "w-full text-sm outline-hidden disabled:cursor-not-allowed disabled:opacity-50",
            className,
          )}
          {...props}
        />
        <InputGroupAddon>
          <HugeiconsIcon
            icon={SearchIcon}
            strokeWidth={2}
            className="size-4 shrink-0 opacity-50"
          />
        </InputGroupAddon>
      </InputGroup>
    </div>
  );
}

function CommandList({
  className,
  ...props
}: React.ComponentProps<typeof CommandPrimitive.List>) {
  return (
    <CommandPrimitive.List
      data-slot="command-list"
      className={cn(
        "no-scrollbar max-h-72 scroll-py-1 outline-none overflow-x-hidden overflow-y-auto",
        className,
      )}
      {...props}
    />
  );
}

function CommandEmpty({
  className,
  ...props
}: React.ComponentProps<typeof CommandPrimitive.Empty>) {
  return (
    <CommandPrimitive.Empty
      data-slot="command-empty"
      className={cn("py-6 text-center text-sm", className)}
      {...props}
    />
  );
}

function CommandGroup({
  className,
  ...props
}: React.ComponentProps<typeof CommandPrimitive.Group>) {
  return (
    <CommandPrimitive.Group
      data-slot="command-group"
      className={cn(
        "text-foreground [&_[cmdk-group-heading]]:text-muted-foreground overflow-hidden p-1 [&_[cmdk-group-heading]]:px-3 [&_[cmdk-group-heading]]:py-2 [&_[cmdk-group-heading]]:text-xs [&_[cmdk-group-heading]]:font-medium",
        className,
      )}
      {...props}
    />
  );
}

function CommandSeparator({
  className,
  ...props
}: React.ComponentProps<typeof CommandPrimitive.Separator>) {
  return (
    <CommandPrimitive.Separator
      data-slot="command-separator"
      className={cn("bg-border/50 my-1 h-px", className)}
      {...props}
    />
  );
}

function CommandItem({
  className,
  children,
  ...props
}: React.ComponentProps<typeof CommandPrimitive.Item>) {
  return (
    <CommandPrimitive.Item
      data-slot="command-item"
      className={cn(
        "data-selected:bg-muted data-selected:text-foreground data-selected:*:[svg]:text-foreground relative flex cursor-default items-center gap-2 rounded-lg px-3 py-2 text-sm outline-hidden select-none [&_svg:not([class*='size-'])]:size-4 [[data-slot=dialog-content]_&]:rounded-2xl group/command-item data-[disabled=true]:pointer-events-none data-[disabled=true]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    >
      {children}
      <HugeiconsIcon
        icon={Tick02Icon}
        strokeWidth={2}
        className="ml-auto opacity-0 group-has-[[data-slot=command-shortcut]]/command-item:hidden group-data-[checked=true]/command-item:opacity-100"
      />
    </CommandPrimitive.Item>
  );
}

function CommandShortcut({
  className,
  ...props
}: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="command-shortcut"
      className={cn(
        "text-muted-foreground group-data-selected/command-item:text-foreground ml-auto text-xs tracking-widest",
        className,
      )}
      {...props}
    />
  );
}

export {
  Command,
  CommandDialog,
  CommandInput,
  CommandList,
  CommandEmpty,
  CommandGroup,
  CommandItem,
  CommandShortcut,
  CommandSeparator,
};


================================================
FILE: studio/frontend/src/components/ui/confetti.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  GlobalOptions as ConfettiGlobalOptions,
  CreateTypes as ConfettiInstance,
  Options as ConfettiOptions,
} from "canvas-confetti";
import confetti from "canvas-confetti";
import type { ReactNode } from "react";
import type React from "react";
import {
  createContext,
  forwardRef,
  useCallback,
  useEffect,
  useImperativeHandle,
  useMemo,
  useRef,
} from "react";

import { Button } from "@/components/ui/button";

type Api = {
  fire: (options?: ConfettiOptions) => void;
};

type Props = React.ComponentPropsWithRef<"canvas"> & {
  options?: ConfettiOptions;
  globalOptions?: ConfettiGlobalOptions;
  manualstart?: boolean;
  children?: ReactNode;
};

export type ConfettiRef = Api | null;

const ConfettiContext = createContext<Api>({} as Api);

// Define component first
const ConfettiComponent = forwardRef<ConfettiRef, Props>((props, ref) => {
  const {
    options,
    globalOptions = { resize: true, useWorker: true },
    manualstart = false,
    children,
    ...rest
  } = props;
  const instanceRef = useRef<ConfettiInstance | null>(null);

  const canvasRef = useCallback(
    (node: HTMLCanvasElement) => {
      if (node !== null) {
        if (instanceRef.current) return;
        instanceRef.current = confetti.create(node, {
          ...globalOptions,
          resize: true,
        });
      } else {
        if (instanceRef.current) {
          instanceRef.current.reset();
          instanceRef.current = null;
        }
      }
    },
    [globalOptions],
  );

  const fire = useCallback(
    async (opts = {}) => {
      try {
        await instanceRef.current?.({ ...options, ...opts });
      } catch (error) {
        console.error("Confetti error:", error);
      }
    },
    [options],
  );

  const api = useMemo(
    () => ({
      fire,
    }),
    [fire],
  );

  useImperativeHandle(ref, () => api, [api]);

  useEffect(() => {
    if (!manualstart) {
      (async () => {
        try {
          await fire();
        } catch (error) {
          console.error("Confetti effect error:", error);
        }
      })();
    }
  }, [manualstart, fire]);

  return (
    <ConfettiContext.Provider value={api}>
      <canvas ref={canvasRef} {...rest} />
      {children}
    </ConfettiContext.Provider>
  );
});

// Set display name immediately
ConfettiComponent.displayName = "Confetti";

// Export as Confetti
export const Confetti = ConfettiComponent;

interface ConfettiButtonProps extends React.ComponentProps<"button"> {
  options?: ConfettiOptions &
    ConfettiGlobalOptions & { canvas?: HTMLCanvasElement };
}

const ConfettiButtonComponent = ({
  options,
  children,
  ...props
}: ConfettiButtonProps) => {
  const handleClick = async (event: React.MouseEvent<HTMLButtonElement>) => {
    try {
      const rect = event.currentTarget.getBoundingClientRect();
      const x = rect.left + rect.width / 2;
      const y = rect.top + rect.height / 2;
      await confetti({
        ...options,
        origin: {
          x: x / window.innerWidth,
          y: y / window.innerHeight,
        },
      });
    } catch (error) {
      console.error("Confetti button error:", error);
    }
  };

  return (
    <Button onClick={handleClick} {...props}>
      {children}
    </Button>
  );
};

ConfettiButtonComponent.displayName = "ConfettiButton";

export const ConfettiButton = ConfettiButtonComponent;


================================================
FILE: studio/frontend/src/components/ui/context-menu.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import { ContextMenu as ContextMenuPrimitive } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";
import { ArrowRight01Icon, Tick02Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";

function ContextMenu({
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.Root>) {
  return <ContextMenuPrimitive.Root data-slot="context-menu" {...props} />;
}

function ContextMenuTrigger({
  className,
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.Trigger>) {
  return (
    <ContextMenuPrimitive.Trigger
      data-slot="context-menu-trigger"
      className={cn("select-none", className)}
      {...props}
    />
  );
}

function ContextMenuGroup({
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.Group>) {
  return (
    <ContextMenuPrimitive.Group data-slot="context-menu-group" {...props} />
  );
}

function ContextMenuPortal({
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.Portal>) {
  return (
    <ContextMenuPrimitive.Portal data-slot="context-menu-portal" {...props} />
  );
}

function ContextMenuSub({
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.Sub>) {
  return <ContextMenuPrimitive.Sub data-slot="context-menu-sub" {...props} />;
}

function ContextMenuRadioGroup({
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.RadioGroup>) {
  return (
    <ContextMenuPrimitive.RadioGroup
      data-slot="context-menu-radio-group"
      {...props}
    />
  );
}

function ContextMenuContent({
  className,
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.Content> & {
  side?: "top" | "right" | "bottom" | "left";
}) {
  return (
    <ContextMenuPrimitive.Portal>
      <ContextMenuPrimitive.Content
        data-slot="context-menu-content"
        className={cn(
          "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/5 bg-popover text-popover-foreground min-w-48 rounded-2xl p-1 shadow-2xl ring-1 duration-100 z-50 max-h-(--radix-context-menu-content-available-height) origin-(--radix-context-menu-content-transform-origin) overflow-x-hidden overflow-y-auto",
          className,
        )}
        {...props}
      />
    </ContextMenuPrimitive.Portal>
  );
}

function ContextMenuItem({
  className,
  inset,
  variant = "default",
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.Item> & {
  inset?: boolean;
  variant?: "default" | "destructive";
}) {
  return (
    <ContextMenuPrimitive.Item
      data-slot="context-menu-item"
      data-inset={inset}
      data-variant={variant}
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/20 data-[variant=destructive]:focus:text-destructive data-[variant=destructive]:*:[svg]:text-destructive focus:*:[svg]:text-accent-foreground gap-2.5 rounded-xl px-3 py-2 text-sm [&_svg:not([class*='size-'])]:size-4 group/context-menu-item relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    />
  );
}

function ContextMenuSubTrigger({
  className,
  inset,
  children,
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.SubTrigger> & {
  inset?: boolean;
}) {
  return (
    <ContextMenuPrimitive.SubTrigger
      data-slot="context-menu-sub-trigger"
      data-inset={inset}
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-open:bg-accent data-open:text-accent-foreground rounded-xl px-3 py-2 text-sm [&_svg:not([class*='size-'])]:size-4 flex cursor-default items-center outline-hidden select-none data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    >
      {children}
      <HugeiconsIcon
        icon={ArrowRight01Icon}
        strokeWidth={2}
        className="ml-auto"
      />
    </ContextMenuPrimitive.SubTrigger>
  );
}

function ContextMenuSubContent({
  className,
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.SubContent>) {
  return (
    <ContextMenuPrimitive.SubContent
      data-slot="context-menu-sub-content"
      className={cn(
        "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 bg-popover text-popover-foreground min-w-32 rounded-md border p-1 shadow-lg duration-100 z-50 origin-(--radix-context-menu-content-transform-origin) overflow-hidden",
        className,
      )}
      {...props}
    />
  );
}

function ContextMenuCheckboxItem({
  className,
  children,
  checked,
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.CheckboxItem>) {
  return (
    <ContextMenuPrimitive.CheckboxItem
      data-slot="context-menu-checkbox-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground gap-2 rounded-xl py-2 pr-8 pl-3 text-sm [&_svg:not([class*='size-'])]:size-4 relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      checked={checked}
      {...props}
    >
      <span className="absolute right-2 pointer-events-none">
        <ContextMenuPrimitive.ItemIndicator>
          <HugeiconsIcon icon={Tick02Icon} strokeWidth={2} />
        </ContextMenuPrimitive.ItemIndicator>
      </span>
      {children}
    </ContextMenuPrimitive.CheckboxItem>
  );
}

function ContextMenuRadioItem({
  className,
  children,
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.RadioItem>) {
  return (
    <ContextMenuPrimitive.RadioItem
      data-slot="context-menu-radio-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground gap-2 rounded-sm py-1.5 pr-8 pl-2 text-sm [&_svg:not([class*='size-'])]:size-4 relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    >
      <span className="absolute right-2 pointer-events-none">
        <ContextMenuPrimitive.ItemIndicator>
          <HugeiconsIcon icon={Tick02Icon} strokeWidth={2} />
        </ContextMenuPrimitive.ItemIndicator>
      </span>
      {children}
    </ContextMenuPrimitive.RadioItem>
  );
}

function ContextMenuLabel({
  className,
  inset,
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.Label> & {
  inset?: boolean;
}) {
  return (
    <ContextMenuPrimitive.Label
      data-slot="context-menu-label"
      data-inset={inset}
      className={cn(
        "text-muted-foreground px-3 py-2.5 text-xs data-[inset]:pl-8",
        className,
      )}
      {...props}
    />
  );
}

function ContextMenuSeparator({
  className,
  ...props
}: React.ComponentProps<typeof ContextMenuPrimitive.Separator>) {
  return (
    <ContextMenuPrimitive.Separator
      data-slot="context-menu-separator"
      className={cn("bg-border/50 -mx-1 my-1 h-px", className)}
      {...props}
    />
  );
}

function ContextMenuShortcut({
  className,
  ...props
}: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="context-menu-shortcut"
      className={cn(
        "text-muted-foreground group-focus/context-menu-item:text-accent-foreground ml-auto text-xs tracking-widest",
        className,
      )}
      {...props}
    />
  );
}

export {
  ContextMenu,
  ContextMenuTrigger,
  ContextMenuContent,
  ContextMenuItem,
  ContextMenuCheckboxItem,
  ContextMenuRadioItem,
  ContextMenuLabel,
  ContextMenuSeparator,
  ContextMenuShortcut,
  ContextMenuGroup,
  ContextMenuPortal,
  ContextMenuSub,
  ContextMenuSubContent,
  ContextMenuSubTrigger,
  ContextMenuRadioGroup,
};


================================================
FILE: studio/frontend/src/components/ui/data-table.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  type ColumnDef,
  type SortingState,
  flexRender,
  getCoreRowModel,
  getSortedRowModel,
  useReactTable,
} from "@tanstack/react-table";
import { useState } from "react";

import {
  Table,
  TableBody,
  TableCell,
  TableHead,
  TableHeader,
  TableRow,
} from "@/components/ui/table";
import { cn } from "@/lib/utils";

interface DataTableProps<TData, TValue> {
  columns: ColumnDef<TData, TValue>[];
  data: TData[];
  className?: string;
  onRowClick?: (row: TData, rowIndex: number, rowId: string) => void;
  getRowClassName?: (
    row: TData,
    rowIndex: number,
    rowId: string,
  ) => string | undefined;
}

export function DataTable<TData, TValue>({
  columns,
  data,
  className,
  onRowClick,
  getRowClassName,
}: DataTableProps<TData, TValue>) {
  const [sorting, setSorting] = useState<SortingState>([]);

  // eslint-disable-next-line react-hooks/incompatible-library
  const table = useReactTable({
    data,
    columns,
    getCoreRowModel: getCoreRowModel(),
    getSortedRowModel: getSortedRowModel(),
    onSortingChange: setSorting,
    state: { sorting },
  });

  return (
    <div className={cn("w-full", className)}>
      <Table>
        <TableHeader className="sticky top-0 z-10">
          {table.getHeaderGroups().map((headerGroup) => (
            <TableRow
              key={headerGroup.id}
              className="bg-muted/60 hover:bg-muted/60 border-b border-border/60"
            >
              {headerGroup.headers.map((header) => (
                <TableHead
                  key={header.id}
                  className="border-r border-border/40 last:border-r-0 h-11 px-4 text-xs"
                  style={{
                    width:
                      header.getSize() !== 150 ? header.getSize() : undefined,
                  }}
                >
                  {header.isPlaceholder
                    ? null
                    : flexRender(
                        header.column.columnDef.header,
                        header.getContext(),
                      )}
                </TableHead>
              ))}
            </TableRow>
          ))}
        </TableHeader>
        <TableBody>
          {table.getRowModel().rows.length ? (
            table.getRowModel().rows.map((row, idx) => (
              <TableRow
                key={row.id}
                data-state={row.getIsSelected() ? "selected" : undefined}
                className={cn(
                  "transition-colors border-b border-border/30",
                  idx % 2 === 0
                    ? "bg-background"
                    : "bg-muted/20",
                  "hover:bg-primary/[0.03]",
                  getRowClassName?.(row.original, idx, row.id),
                )}
                onClick={() => onRowClick?.(row.original, idx, row.id)}
              >
                {row.getVisibleCells().map((cell) => (
                  <TableCell
                    key={cell.id}
                    className="border-r border-border/20 last:border-r-0 text-[13px] py-3 px-4 align-top whitespace-normal"
                  >
                    {flexRender(cell.column.columnDef.cell, cell.getContext())}
                  </TableCell>
                ))}
              </TableRow>
            ))
          ) : (
            <TableRow>
              <TableCell
                colSpan={columns.length}
                className="h-32 text-center text-muted-foreground text-sm"
              >
                No results.
              </TableCell>
            </TableRow>
          )}
        </TableBody>
      </Table>
    </div>
  );
}


================================================
FILE: studio/frontend/src/components/ui/dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import { Dialog as DialogPrimitive } from "radix-ui";
import type * as React from "react";
import { createContext, useContext } from "react";

import { Button } from "@/components/ui/button";
import { cn } from "@/lib/utils";
import { Cancel01Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";

const DialogPortalContainerContext = createContext<HTMLElement | null>(null);

export function useDialogPortalContainer(): HTMLElement | null {
  return useContext(DialogPortalContainerContext);
}

function Dialog({
  ...props
}: React.ComponentProps<typeof DialogPrimitive.Root>) {
  return <DialogPrimitive.Root data-slot="dialog" {...props} />;
}

function DialogTrigger({
  ...props
}: React.ComponentProps<typeof DialogPrimitive.Trigger>) {
  return <DialogPrimitive.Trigger data-slot="dialog-trigger" {...props} />;
}

function DialogPortal({
  ...props
}: React.ComponentProps<typeof DialogPrimitive.Portal>) {
  return <DialogPrimitive.Portal data-slot="dialog-portal" {...props} />;
}

function DialogClose({
  ...props
}: React.ComponentProps<typeof DialogPrimitive.Close>) {
  return <DialogPrimitive.Close data-slot="dialog-close" {...props} />;
}

function DialogOverlay({
  className,
  position = "fixed",
  ...props
}: React.ComponentProps<typeof DialogPrimitive.Overlay> & {
  position?: "fixed" | "absolute";
}) {
  return (
    <DialogPrimitive.Overlay
      data-slot="dialog-overlay"
      className={cn(
        "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 bg-black/80 duration-100  inset-0 isolate z-50",
        position === "fixed" ? "fixed" : "absolute",
        className,
      )}
      {...props}
    />
  );
}

function DialogContent({
  className,
  children,
  showCloseButton = true,
  container,
  position = "fixed",
  overlayClassName,
  overlayPosition,
  ...props
}: React.ComponentProps<typeof DialogPrimitive.Content> & {
  showCloseButton?: boolean;
  container?: HTMLElement | null;
  position?: "fixed" | "absolute";
  overlayClassName?: string;
  overlayPosition?: "fixed" | "absolute";
}) {
  const resolvedContainer = container ?? null;
  return (
    <DialogPortalContainerContext.Provider value={resolvedContainer}>
      <DialogPortal container={resolvedContainer ?? undefined}>
        <DialogOverlay
          className={overlayClassName}
          position={overlayPosition ?? position}
        />
        <DialogPrimitive.Content
          data-slot="dialog-content"
          className={cn(
            "bg-background data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 ring-foreground/5 grid max-w-[calc(100%-2rem)] gap-6 rounded-4xl p-6 text-sm ring-1 duration-100 sm:max-w-md top-1/2 left-1/2 z-50 w-full -translate-x-1/2 -translate-y-1/2",
            position === "fixed" ? "fixed" : "absolute",
            className,
          )}
          {...props}
        >
          {children}
          {showCloseButton && (
            <DialogPrimitive.Close data-slot="dialog-close" asChild>
              <Button
                variant="ghost"
                className="absolute top-4 right-4"
                size="icon-sm"
              >
                <HugeiconsIcon icon={Cancel01Icon} strokeWidth={2} />
                <span className="sr-only">Close</span>
              </Button>
            </DialogPrimitive.Close>
          )}
        </DialogPrimitive.Content>
      </DialogPortal>
    </DialogPortalContainerContext.Provider>
  );
}

function DialogHeader({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="dialog-header"
      className={cn("gap-2 flex flex-col", className)}
      {...props}
    />
  );
}

function DialogFooter({
  className,
  showCloseButton = false,
  children,
  ...props
}: React.ComponentProps<"div"> & {
  showCloseButton?: boolean;
}) {
  return (
    <div
      data-slot="dialog-footer"
      className={cn(
        "flex flex-col-reverse gap-2 sm:flex-row sm:justify-end",
        className,
      )}
      {...props}
    >
      {children}
      {showCloseButton && (
        <DialogPrimitive.Close asChild>
          <Button variant="outline">Close</Button>
        </DialogPrimitive.Close>
      )}
    </div>
  );
}

function DialogTitle({
  className,
  ...props
}: React.ComponentProps<typeof DialogPrimitive.Title>) {
  return (
    <DialogPrimitive.Title
      data-slot="dialog-title"
      className={cn("text-base leading-none font-medium", className)}
      {...props}
    />
  );
}

function DialogDescription({
  className,
  ...props
}: React.ComponentProps<typeof DialogPrimitive.Description>) {
  return (
    <DialogPrimitive.Description
      data-slot="dialog-description"
      className={cn(
        "text-muted-foreground *:[a]:hover:text-foreground text-sm *:[a]:underline *:[a]:underline-offset-3",
        className,
      )}
      {...props}
    />
  );
}

export {
  Dialog,
  DialogClose,
  DialogContent,
  DialogDescription,
  DialogFooter,
  DialogHeader,
  DialogOverlay,
  DialogPortal,
  DialogPortalContainerContext,
  DialogTitle,
  DialogTrigger,
};


================================================
FILE: studio/frontend/src/components/ui/dropdown-menu.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { DropdownMenu as DropdownMenuPrimitive } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";
import { ArrowRight01Icon, Tick02Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";

function DropdownMenu({
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.Root>) {
  return <DropdownMenuPrimitive.Root data-slot="dropdown-menu" {...props} />;
}

function DropdownMenuPortal({
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.Portal>) {
  return (
    <DropdownMenuPrimitive.Portal data-slot="dropdown-menu-portal" {...props} />
  );
}

function DropdownMenuTrigger({
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.Trigger>) {
  return (
    <DropdownMenuPrimitive.Trigger
      data-slot="dropdown-menu-trigger"
      {...props}
    />
  );
}

function DropdownMenuContent({
  className,
  align = "start",
  sideOffset = 4,
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.Content>) {
  return (
    <DropdownMenuPrimitive.Portal>
      <DropdownMenuPrimitive.Content
        data-slot="dropdown-menu-content"
        sideOffset={sideOffset}
        align={align}
        className={cn(
          "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 shadow-border ring-1 ring-border bg-popover text-popover-foreground min-w-48 rounded-lg p-1 duration-100 z-50 max-h-(--radix-dropdown-menu-content-available-height) w-(--radix-dropdown-menu-trigger-width) origin-(--radix-dropdown-menu-content-transform-origin) overflow-x-hidden overflow-y-auto data-[state=closed]:overflow-hidden",
          className,
        )}
        {...props}
      />
    </DropdownMenuPrimitive.Portal>
  );
}

function DropdownMenuGroup({
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.Group>) {
  return (
    <DropdownMenuPrimitive.Group data-slot="dropdown-menu-group" {...props} />
  );
}

function DropdownMenuItem({
  className,
  inset,
  variant = "default",
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.Item> & {
  inset?: boolean;
  variant?: "default" | "destructive";
}) {
  return (
    <DropdownMenuPrimitive.Item
      data-slot="dropdown-menu-item"
      data-inset={inset}
      data-variant={variant}
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/20 data-[variant=destructive]:focus:text-destructive data-[variant=destructive]:*:[svg]:text-destructive not-data-[variant=destructive]:focus:**:text-accent-foreground gap-2.5 rounded-lg px-3 py-2 text-sm [&_svg:not([class*='size-'])]:size-4 group/dropdown-menu-item relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    />
  );
}

function DropdownMenuCheckboxItem({
  className,
  children,
  checked,
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.CheckboxItem>) {
  return (
    <DropdownMenuPrimitive.CheckboxItem
      data-slot="dropdown-menu-checkbox-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground focus:**:text-accent-foreground gap-2.5 rounded-lg py-2 pr-8 pl-3 text-sm [&_svg:not([class*='size-'])]:size-4 relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      checked={checked}
      {...props}
    >
      <span
        className="pointer-events-none absolute right-2 flex items-center justify-center pointer-events-none"
        data-slot="dropdown-menu-checkbox-item-indicator"
      >
        <DropdownMenuPrimitive.ItemIndicator>
          <HugeiconsIcon icon={Tick02Icon} strokeWidth={2} />
        </DropdownMenuPrimitive.ItemIndicator>
      </span>
      {children}
    </DropdownMenuPrimitive.CheckboxItem>
  );
}

function DropdownMenuRadioGroup({
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.RadioGroup>) {
  return (
    <DropdownMenuPrimitive.RadioGroup
      data-slot="dropdown-menu-radio-group"
      {...props}
    />
  );
}

function DropdownMenuRadioItem({
  className,
  children,
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.RadioItem>) {
  return (
    <DropdownMenuPrimitive.RadioItem
      data-slot="dropdown-menu-radio-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground focus:**:text-accent-foreground gap-2.5 rounded-lg py-2 pr-8 pl-3 text-sm [&_svg:not([class*='size-'])]:size-4 relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    >
      <span
        className="pointer-events-none absolute right-2 flex items-center justify-center pointer-events-none"
        data-slot="dropdown-menu-radio-item-indicator"
      >
        <DropdownMenuPrimitive.ItemIndicator>
          <HugeiconsIcon icon={Tick02Icon} strokeWidth={2} />
        </DropdownMenuPrimitive.ItemIndicator>
      </span>
      {children}
    </DropdownMenuPrimitive.RadioItem>
  );
}

function DropdownMenuLabel({
  className,
  inset,
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.Label> & {
  inset?: boolean;
}) {
  return (
    <DropdownMenuPrimitive.Label
      data-slot="dropdown-menu-label"
      data-inset={inset}
      className={cn(
        "text-muted-foreground px-3 py-2.5 text-xs data-[inset]:pl-8",
        className,
      )}
      {...props}
    />
  );
}

function DropdownMenuSeparator({
  className,
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.Separator>) {
  return (
    <DropdownMenuPrimitive.Separator
      data-slot="dropdown-menu-separator"
      className={cn("bg-border/50 -mx-1 my-1 h-px", className)}
      {...props}
    />
  );
}

function DropdownMenuShortcut({
  className,
  ...props
}: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="dropdown-menu-shortcut"
      className={cn(
        "text-muted-foreground group-focus/dropdown-menu-item:text-accent-foreground ml-auto text-xs tracking-widest",
        className,
      )}
      {...props}
    />
  );
}

function DropdownMenuSub({
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.Sub>) {
  return <DropdownMenuPrimitive.Sub data-slot="dropdown-menu-sub" {...props} />;
}

function DropdownMenuSubTrigger({
  className,
  inset,
  children,
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.SubTrigger> & {
  inset?: boolean;
}) {
  return (
    <DropdownMenuPrimitive.SubTrigger
      data-slot="dropdown-menu-sub-trigger"
      data-inset={inset}
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-open:bg-accent data-open:text-accent-foreground not-data-[variant=destructive]:focus:**:text-accent-foreground gap-2 rounded-lg px-3 py-2 text-sm [&_svg:not([class*='size-'])]:size-4 flex cursor-default items-center outline-hidden select-none data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    >
      {children}
      <HugeiconsIcon
        icon={ArrowRight01Icon}
        strokeWidth={2}
        className="ml-auto"
      />
    </DropdownMenuPrimitive.SubTrigger>
  );
}

function DropdownMenuSubContent({
  className,
  ...props
}: React.ComponentProps<typeof DropdownMenuPrimitive.SubContent>) {
  return (
    <DropdownMenuPrimitive.SubContent
      data-slot="dropdown-menu-sub-content"
      className={cn(
        "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 shadow-border ring-1 ring-border bg-popover text-popover-foreground min-w-36 rounded-lg p-1 duration-100 z-50 origin-(--radix-dropdown-menu-content-transform-origin) overflow-hidden",
        className,
      )}
      {...props}
    />
  );
}

export {
  DropdownMenu,
  DropdownMenuPortal,
  DropdownMenuTrigger,
  DropdownMenuContent,
  DropdownMenuGroup,
  DropdownMenuLabel,
  DropdownMenuItem,
  DropdownMenuCheckboxItem,
  DropdownMenuRadioGroup,
  DropdownMenuRadioItem,
  DropdownMenuSeparator,
  DropdownMenuShortcut,
  DropdownMenuSub,
  DropdownMenuSubTrigger,
  DropdownMenuSubContent,
};


================================================
FILE: studio/frontend/src/components/ui/empty.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { cva, type VariantProps } from "class-variance-authority"

import { cn } from "@/lib/utils"

function Empty({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="empty"
      className={cn(
        "gap-4 rounded-lg border-dashed p-12 flex w-full min-w-0 flex-1 flex-col items-center justify-center text-center text-balance",
        className
      )}
      {...props}
    />
  )
}

function EmptyHeader({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="empty-header"
      className={cn(
        "gap-2 flex max-w-sm flex-col items-center",
        className
      )}
      {...props}
    />
  )
}

const emptyMediaVariants = cva(
  "mb-2 flex shrink-0 items-center justify-center [&_svg]:pointer-events-none [&_svg]:shrink-0",
  {
    variants: {
      variant: {
        default: "bg-transparent",
        icon: "bg-muted text-foreground flex size-10 shrink-0 items-center justify-center rounded-lg [&_svg:not([class*='size-'])]:size-6",
      },
    },
    defaultVariants: {
      variant: "default",
    },
  }
)

function EmptyMedia({
  className,
  variant = "default",
  ...props
}: React.ComponentProps<"div"> & VariantProps<typeof emptyMediaVariants>) {
  return (
    <div
      data-slot="empty-icon"
      data-variant={variant}
      className={cn(emptyMediaVariants({ variant, className }))}
      {...props}
    />
  )
}

function EmptyTitle({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="empty-title"
      className={cn("text-lg font-medium tracking-tight", className)}
      {...props}
    />
  )
}

function EmptyDescription({ className, ...props }: React.ComponentProps<"p">) {
  return (
    <div
      data-slot="empty-description"
      className={cn(
        "text-sm/relaxed text-muted-foreground [&>a:hover]:text-primary [&>a]:underline [&>a]:underline-offset-4",
        className
      )}
      {...props}
    />
  )
}

function EmptyContent({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="empty-content"
      className={cn(
        "gap-4 text-sm flex w-full max-w-sm min-w-0 flex-col items-center text-balance",
        className
      )}
      {...props}
    />
  )
}

export {
  Empty,
  EmptyHeader,
  EmptyTitle,
  EmptyDescription,
  EmptyContent,
  EmptyMedia,
}


================================================
FILE: studio/frontend/src/components/ui/field.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { type VariantProps, cva } from "class-variance-authority";
import { useMemo } from "react";

import { Label } from "@/components/ui/label";
import { Separator } from "@/components/ui/separator";
import { cn } from "@/lib/utils";

function FieldSet({ className, ...props }: React.ComponentProps<"fieldset">) {
  return (
    <fieldset
      data-slot="field-set"
      className={cn(
        "gap-6 has-[>[data-slot=checkbox-group]]:gap-3 has-[>[data-slot=radio-group]]:gap-3 flex flex-col",
        className,
      )}
      {...props}
    />
  );
}

function FieldLegend({
  className,
  variant = "legend",
  ...props
}: React.ComponentProps<"legend"> & { variant?: "legend" | "label" }) {
  return (
    <legend
      data-slot="field-legend"
      data-variant={variant}
      className={cn(
        "mb-3 font-medium data-[variant=label]:text-sm data-[variant=legend]:text-base",
        className,
      )}
      {...props}
    />
  );
}

function FieldGroup({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="field-group"
      className={cn(
        "gap-7 data-[slot=checkbox-group]:gap-3 [&>[data-slot=field-group]]:gap-4 group/field-group @container/field-group flex w-full flex-col",
        className,
      )}
      {...props}
    />
  );
}

const fieldVariants = cva(
  "data-[invalid=true]:text-destructive gap-3 group/field flex w-full",
  {
    variants: {
      orientation: {
        vertical: "flex-col [&>*]:w-full [&>.sr-only]:w-auto",
        horizontal:
          "flex-row items-center [&>[data-slot=field-label]]:flex-auto has-[>[data-slot=field-content]]:items-start has-[>[data-slot=field-content]]:[&>[role=checkbox],[role=radio]]:mt-px",
        responsive:
          "flex-col [&>*]:w-full [&>.sr-only]:w-auto @md/field-group:flex-row @md/field-group:items-center @md/field-group:[&>*]:w-auto @md/field-group:[&>[data-slot=field-label]]:flex-auto @md/field-group:has-[>[data-slot=field-content]]:items-start @md/field-group:has-[>[data-slot=field-content]]:[&>[role=checkbox],[role=radio]]:mt-px",
      },
    },
    defaultVariants: {
      orientation: "vertical",
    },
  },
);

function Field({
  className,
  orientation = "vertical",
  ...props
}: React.ComponentProps<"div"> & VariantProps<typeof fieldVariants>) {
  return (
    <div
      role="group"
      data-slot="field"
      data-orientation={orientation}
      className={cn(fieldVariants({ orientation }), className)}
      {...props}
    />
  );
}

function FieldContent({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="field-content"
      className={cn(
        "gap-1 group/field-content flex flex-1 flex-col leading-snug",
        className,
      )}
      {...props}
    />
  );
}

function FieldLabel({
  className,
  ...props
}: React.ComponentProps<typeof Label>) {
  return (
    <Label
      data-slot="field-label"
      className={cn(
        "has-data-checked:bg-primary/5 has-data-checked:border-primary/50 dark:has-data-checked:bg-primary/10 gap-2 group-data-[disabled=true]/field:opacity-50 has-[>[data-slot=field]]:rounded-xl has-[>[data-slot=field]]:border [&>*]:data-[slot=field]:p-4 group/field-label peer/field-label flex w-fit leading-snug",
        "has-[>[data-slot=field]]:w-full has-[>[data-slot=field]]:flex-col",
        className,
      )}
      {...props}
    />
  );
}

function FieldTitle({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="field-label"
      className={cn(
        "gap-2 text-sm font-medium group-data-[disabled=true]/field:opacity-50 flex w-fit items-center leading-snug",
        className,
      )}
      {...props}
    />
  );
}

function FieldDescription({ className, ...props }: React.ComponentProps<"p">) {
  return (
    <p
      data-slot="field-description"
      className={cn(
        "text-muted-foreground text-left text-sm [[data-variant=legend]+&]:-mt-1.5 leading-normal font-normal group-has-[[data-orientation=horizontal]]/field:text-balance",
        "last:mt-0 nth-last-2:-mt-1",
        "[&>a:hover]:text-primary [&>a]:underline [&>a]:underline-offset-4",
        className,
      )}
      {...props}
    />
  );
}

function FieldSeparator({
  children,
  className,
  ...props
}: React.ComponentProps<"div"> & {
  children?: React.ReactNode;
}) {
  return (
    <div
      data-slot="field-separator"
      data-content={!!children}
      className={cn(
        "-my-2 h-5 text-sm group-data-[variant=outline]/field-group:-mb-2 relative",
        className,
      )}
      {...props}
    >
      <Separator className="absolute inset-0 top-1/2" />
      {children && (
        <span
          className="text-muted-foreground px-2 bg-background relative mx-auto block w-fit"
          data-slot="field-separator-content"
        >
          {children}
        </span>
      )}
    </div>
  );
}

function FieldError({
  className,
  children,
  errors,
  ...props
}: React.ComponentProps<"div"> & {
  errors?: Array<{ message?: string } | undefined>;
}) {
  const content = useMemo(() => {
    if (children) {
      return children;
    }

    if (!errors?.length) {
      return null;
    }

    const uniqueErrors = [
      ...new Map(errors.map((error) => [error?.message, error])).values(),
    ];

    if (uniqueErrors?.length === 1) {
      return uniqueErrors[0]?.message;
    }

    return (
      <ul className="ml-4 flex list-disc flex-col gap-1">
        {uniqueErrors.map(
          (error, index) =>
            error?.message && <li key={index}>{error.message}</li>,
        )}
      </ul>
    );
  }, [children, errors]);

  if (!content) {
    return null;
  }

  return (
    <div
      role="alert"
      data-slot="field-error"
      className={cn("text-destructive text-sm font-normal", className)}
      {...props}
    >
      {content}
    </div>
  );
}

export {
  Field,
  FieldLabel,
  FieldDescription,
  FieldError,
  FieldGroup,
  FieldLegend,
  FieldSeparator,
  FieldSet,
  FieldContent,
  FieldTitle,
};


================================================
FILE: studio/frontend/src/components/ui/hover-card.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { HoverCard as HoverCardPrimitive } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";

function HoverCard({
  ...props
}: React.ComponentProps<typeof HoverCardPrimitive.Root>) {
  return <HoverCardPrimitive.Root data-slot="hover-card" {...props} />;
}

function HoverCardTrigger({
  ...props
}: React.ComponentProps<typeof HoverCardPrimitive.Trigger>) {
  return (
    <HoverCardPrimitive.Trigger data-slot="hover-card-trigger" {...props} />
  );
}

function HoverCardContent({
  className,
  align = "center",
  sideOffset = 4,
  ...props
}: React.ComponentProps<typeof HoverCardPrimitive.Content>) {
  return (
    <HoverCardPrimitive.Portal data-slot="hover-card-portal">
      <HoverCardPrimitive.Content
        data-slot="hover-card-content"
        align={align}
        sideOffset={sideOffset}
        className={cn(
          "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/5 bg-popover text-popover-foreground w-72 rounded-2xl p-4 text-sm shadow-2xl ring-1 duration-100 z-50 origin-(--radix-hover-card-content-transform-origin) outline-hidden",
          className,
        )}
        {...props}
      />
    </HoverCardPrimitive.Portal>
  );
}

export { HoverCard, HoverCardTrigger, HoverCardContent };


================================================
FILE: studio/frontend/src/components/ui/input-group.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { type VariantProps, cva } from "class-variance-authority";
import type * as React from "react";

import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Textarea } from "@/components/ui/textarea";
import { cn } from "@/lib/utils";

function InputGroup({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="input-group"
      role="group"
      className={cn(
        "border-input bg-input/30 has-[[data-slot=input-group-control]:focus-visible]:border-ring has-[[data-slot=input-group-control]:focus-visible]:ring-ring/50 has-[[data-slot][aria-invalid=true]]:ring-destructive/20 has-[[data-slot][aria-invalid=true]]:border-destructive dark:has-[[data-slot][aria-invalid=true]]:ring-destructive/40 h-9 rounded-4xl border transition-colors has-data-[align=block-end]:rounded-2xl has-data-[align=block-start]:rounded-2xl has-[[data-slot=input-group-control]:focus-visible]:ring-[3px] has-[[data-slot][aria-invalid=true]]:ring-[3px] has-[textarea]:rounded-xl has-[>[data-align=block-end]]:h-auto has-[>[data-align=block-end]]:flex-col has-[>[data-align=block-start]]:h-auto has-[>[data-align=block-start]]:flex-col has-[>[data-align=block-end]]:[&>input]:pt-3 has-[>[data-align=block-start]]:[&>input]:pb-3 has-[>[data-align=inline-end]]:[&>input]:pr-1.5 has-[>[data-align=inline-start]]:[&>input]:pl-1.5 [[data-slot=combobox-content]_&]:focus-within:border-inherit [[data-slot=combobox-content]_&]:focus-within:ring-0 group/input-group relative flex w-full min-w-0 items-center outline-none has-[>textarea]:h-auto",
        className,
      )}
      {...props}
    />
  );
}

const inputGroupAddonVariants = cva(
  "text-muted-foreground **:data-[slot=kbd]:bg-muted-foreground/10 h-auto gap-2 py-2 text-sm font-medium group-data-[disabled=true]/input-group:opacity-50 **:data-[slot=kbd]:rounded-4xl **:data-[slot=kbd]:px-1.5 [&>svg:not([class*='size-'])]:size-4 flex cursor-text items-center justify-center select-none",
  {
    variants: {
      align: {
        "inline-start":
          "pl-3 has-[>button]:ml-[-0.25rem] has-[>kbd]:ml-[-0.15rem] order-first",
        "inline-end":
          "pr-3 has-[>button]:mr-[-0.25rem] has-[>kbd]:mr-[-0.15rem] order-last",
        "block-start":
          "px-3 pt-3 group-has-[>input]/input-group:pt-3 [.border-b]:pb-3 order-first w-full justify-start",
        "block-end":
          "px-3 pb-3 group-has-[>input]/input-group:pb-3 [.border-t]:pt-3 order-last w-full justify-start",
      },
    },
    defaultVariants: {
      align: "inline-start",
    },
  },
);

function InputGroupAddon({
  className,
  align = "inline-start",
  ...props
}: React.ComponentProps<"div"> & VariantProps<typeof inputGroupAddonVariants>) {
  return (
    <div
      role="group"
      data-slot="input-group-addon"
      data-align={align}
      className={cn(inputGroupAddonVariants({ align }), className)}
      onClick={(e) => {
        if ((e.target as HTMLElement).closest("button")) {
          return;
        }
        e.currentTarget.parentElement?.querySelector("input")?.focus();
      }}
      {...props}
    />
  );
}

const inputGroupButtonVariants = cva(
  "gap-2 rounded-4xl text-sm shadow-none flex items-center",
  {
    variants: {
      size: {
        xs: "h-6 gap-1 px-1.5 [&>svg:not([class*='size-'])]:size-3.5",
        sm: "",
        "icon-xs": "size-6 p-0 has-[>svg]:p-0",
        "icon-sm": "size-8 p-0 has-[>svg]:p-0",
      },
    },
    defaultVariants: {
      size: "xs",
    },
  },
);

function InputGroupButton({
  className,
  type = "button",
  variant = "ghost",
  size = "xs",
  ...props
}: Omit<React.ComponentProps<typeof Button>, "size"> &
  VariantProps<typeof inputGroupButtonVariants>) {
  return (
    <Button
      type={type}
      data-size={size}
      variant={variant}
      className={cn(inputGroupButtonVariants({ size }), className)}
      {...props}
    />
  );
}

function InputGroupText({ className, ...props }: React.ComponentProps<"span">) {
  return (
    <span
      className={cn(
        "text-muted-foreground gap-2 text-sm [&_svg:not([class*='size-'])]:size-4 flex items-center [&_svg]:pointer-events-none",
        className,
      )}
      {...props}
    />
  );
}

function InputGroupInput({
  className,
  ...props
}: React.ComponentProps<"input">) {
  return (
    <Input
      data-slot="input-group-control"
      className={cn(
        "rounded-none border-0 bg-transparent shadow-none ring-0 focus-visible:ring-0 aria-invalid:ring-0 dark:bg-transparent flex-1",
        className,
      )}
      {...props}
    />
  );
}

function InputGroupTextarea({
  className,
  ...props
}: React.ComponentProps<"textarea">) {
  return (
    <Textarea
      data-slot="input-group-control"
      className={cn(
        "rounded-none border-0 bg-transparent py-2 shadow-none ring-0 focus-visible:ring-0 aria-invalid:ring-0 dark:bg-transparent flex-1 resize-none",
        className,
      )}
      {...props}
    />
  );
}

export {
  InputGroup,
  InputGroupAddon,
  InputGroupButton,
  InputGroupText,
  InputGroupInput,
  InputGroupTextarea,
};


================================================
FILE: studio/frontend/src/components/ui/input.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type * as React from "react";

import { cn } from "@/lib/utils";

function Input({ className, type, ...props }: React.ComponentProps<"input">) {
  return (
    <input
      type={type}
      data-slot="input"
      className={cn(
        "bg-input/30 border-input focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 h-9 rounded-4xl border px-3 py-1 text-base transition-colors file:h-7 file:text-sm file:font-medium focus-visible:ring-[3px] aria-invalid:ring-[3px] md:text-sm file:text-foreground placeholder:text-muted-foreground w-full min-w-0 outline-none file:inline-flex file:border-0 file:bg-transparent disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-50",
        className,
      )}
      {...props}
    />
  );
}

export { Input };


================================================
FILE: studio/frontend/src/components/ui/label.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import { Label as LabelPrimitive } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";

function Label({
  className,
  ...props
}: React.ComponentProps<typeof LabelPrimitive.Root>) {
  return (
    <LabelPrimitive.Root
      data-slot="label"
      className={cn(
        "gap-2 text-sm leading-none font-medium group-data-[disabled=true]:opacity-50 peer-disabled:opacity-50 flex items-center select-none group-data-[disabled=true]:pointer-events-none peer-disabled:cursor-not-allowed",
        className,
      )}
      {...props}
    />
  );
}

export { Label };


================================================
FILE: studio/frontend/src/components/ui/light-rays.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { motion } from "motion/react";
import { type CSSProperties, useState } from "react";

import { cn } from "@/lib/utils";

interface LightRaysProps extends React.HTMLAttributes<HTMLDivElement> {
  ref?: React.Ref<HTMLDivElement>;
  count?: number;
  color?: string;
  blur?: number;
  speed?: number;
  length?: string;
}

type LightRay = {
  id: string;
  left: number;
  rotate: number;
  width: number;
  swing: number;
  delay: number;
  duration: number;
  intensity: number;
};

const createRays = (count: number, cycle: number): LightRay[] => {
  if (count <= 0) return [];

  return Array.from({ length: count }, (_, index) => {
    const left = 8 + Math.random() * 84;
    const rotate = -28 + Math.random() * 56;
    const width = 160 + Math.random() * 160;
    const swing = 0.8 + Math.random() * 1.8;
    const delay = Math.random() * cycle;
    const duration = cycle * (0.75 + Math.random() * 0.5);
    const intensity = 0.6 + Math.random() * 0.5;

    return {
      id: `${index}-${Math.round(left * 10)}`,
      left,
      rotate,
      width,
      swing,
      delay,
      duration,
      intensity,
    };
  });
};

const Ray = ({
  left,
  rotate,
  width,
  swing,
  delay,
  duration,
  intensity,
}: LightRay) => {
  return (
    <motion.div
      className="pointer-events-none absolute -top-[12%] left-[var(--ray-left)] h-[var(--light-rays-length)] w-[var(--ray-width)] origin-top -translate-x-1/2 rounded-full bg-gradient-to-b from-[color-mix(in_srgb,var(--light-rays-color)_70%,transparent)] to-transparent opacity-0 mix-blend-screen blur-[var(--light-rays-blur)]"
      style={
        {
          "--ray-left": `${left}%`,
          "--ray-width": `${width}px`,
        } as CSSProperties
      }
      initial={{ rotate: rotate }}
      animate={{
        opacity: [0, intensity, 0],
        rotate: [rotate - swing, rotate + swing, rotate - swing],
      }}
      transition={{
        duration: duration,
        repeat: Number.POSITIVE_INFINITY,
        ease: "easeInOut",
        delay: delay,
        repeatDelay: duration * 0.1,
      }}
    />
  );
};

export function LightRays({
  className,
  style,
  count = 7,
  color = "rgba(160, 210, 255, 0.2)",
  blur = 36,
  speed = 14,
  length = "70vh",
  ref,
  ...props
}: LightRaysProps) {
  const cycleDuration = Math.max(speed, 0.1);
  const [rays] = useState(() => createRays(count, cycleDuration));

  return (
    <div
      ref={ref}
      className={cn(
        "pointer-events-none absolute inset-0 isolate overflow-hidden rounded-[inherit]",
        className,
      )}
      style={
        {
          "--light-rays-color": color,
          "--light-rays-blur": `${blur}px`,
          "--light-rays-length": length,
          ...style,
        } as CSSProperties
      }
      {...props}
    >
      <div className="absolute inset-0 overflow-hidden">
        <div
          aria-hidden
          className="absolute inset-0 opacity-60"
          style={
            {
              background:
                "radial-gradient(circle at 20% 15%, color-mix(in srgb, var(--light-rays-color) 45%, transparent), transparent 70%)",
            } as CSSProperties
          }
        />
        <div
          aria-hidden
          className="absolute inset-0 opacity-60"
          style={
            {
              background:
                "radial-gradient(circle at 80% 10%, color-mix(in srgb, var(--light-rays-color) 35%, transparent), transparent 75%)",
            } as CSSProperties
          }
        />
        {rays.map((ray) => (
          <Ray key={ray.id} {...ray} />
        ))}
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/components/ui/menubar.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import { Menubar as MenubarPrimitive } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";
import { ArrowRight01Icon, Tick02Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";

function Menubar({
  className,
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Root>) {
  return (
    <MenubarPrimitive.Root
      data-slot="menubar"
      className={cn(
        "bg-background h-9 rounded-2xl border p-1 flex items-center",
        className,
      )}
      {...props}
    />
  );
}

function MenubarMenu({
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Menu>) {
  return <MenubarPrimitive.Menu data-slot="menubar-menu" {...props} />;
}

function MenubarGroup({
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Group>) {
  return <MenubarPrimitive.Group data-slot="menubar-group" {...props} />;
}

function MenubarPortal({
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Portal>) {
  return <MenubarPrimitive.Portal data-slot="menubar-portal" {...props} />;
}

function MenubarRadioGroup({
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.RadioGroup>) {
  return (
    <MenubarPrimitive.RadioGroup data-slot="menubar-radio-group" {...props} />
  );
}

function MenubarTrigger({
  className,
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Trigger>) {
  return (
    <MenubarPrimitive.Trigger
      data-slot="menubar-trigger"
      className={cn(
        "hover:bg-muted aria-expanded:bg-muted rounded-xl px-2.5 py-1 text-sm font-medium flex items-center outline-hidden select-none",
        className,
      )}
      {...props}
    />
  );
}

function MenubarContent({
  className,
  align = "start",
  alignOffset = -4,
  sideOffset = 8,
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Content>) {
  return (
    <MenubarPortal>
      <MenubarPrimitive.Content
        data-slot="menubar-content"
        align={align}
        alignOffset={alignOffset}
        sideOffset={sideOffset}
        className={cn(
          "bg-popover text-popover-foreground data-open:animate-in data-open:fade-in-0 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/5 min-w-48 rounded-2xl p-1 shadow-2xl ring-1 duration-100 z-50 origin-(--radix-menubar-content-transform-origin) overflow-hidden",
          className,
        )}
        {...props}
      />
    </MenubarPortal>
  );
}

function MenubarItem({
  className,
  inset,
  variant = "default",
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Item> & {
  inset?: boolean;
  variant?: "default" | "destructive";
}) {
  return (
    <MenubarPrimitive.Item
      data-slot="menubar-item"
      data-inset={inset}
      data-variant={variant}
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/20 data-[variant=destructive]:focus:text-destructive data-[variant=destructive]:*:[svg]:!text-destructive not-data-[variant=destructive]:focus:**:text-accent-foreground gap-2.5 rounded-xl px-3 py-2 text-sm data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg:not([class*='size-'])]:size-4 group/menubar-item relative flex cursor-default items-center outline-hidden select-none data-[disabled]:pointer-events-none [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    />
  );
}

function MenubarCheckboxItem({
  className,
  children,
  checked,
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.CheckboxItem>) {
  return (
    <MenubarPrimitive.CheckboxItem
      data-slot="menubar-checkbox-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground focus:**:text-accent-foreground gap-2.5 rounded-xl py-2 pr-3 pl-8 text-sm data-disabled:opacity-50 relative flex cursor-default items-center outline-hidden select-none data-disabled:pointer-events-none [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      checked={checked}
      {...props}
    >
      <span className="left-2 size-4 [&_svg:not([class*='size-'])]:size-4 pointer-events-none absolute flex items-center justify-center">
        <MenubarPrimitive.ItemIndicator>
          <HugeiconsIcon icon={Tick02Icon} strokeWidth={2} />
        </MenubarPrimitive.ItemIndicator>
      </span>
      {children}
    </MenubarPrimitive.CheckboxItem>
  );
}

function MenubarRadioItem({
  className,
  children,
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.RadioItem>) {
  return (
    <MenubarPrimitive.RadioItem
      data-slot="menubar-radio-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground focus:**:text-accent-foreground gap-2.5 rounded-xl py-2 pr-3 pl-8 text-sm data-disabled:opacity-50 [&_svg:not([class*='size-'])]:size-4 relative flex cursor-default items-center outline-hidden select-none data-disabled:pointer-events-none [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    >
      <span className="left-2 size-4 [&_svg:not([class*='size-'])]:size-4 pointer-events-none absolute flex items-center justify-center">
        <MenubarPrimitive.ItemIndicator>
          <HugeiconsIcon icon={Tick02Icon} strokeWidth={2} />
        </MenubarPrimitive.ItemIndicator>
      </span>
      {children}
    </MenubarPrimitive.RadioItem>
  );
}

function MenubarLabel({
  className,
  inset,
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Label> & {
  inset?: boolean;
}) {
  return (
    <MenubarPrimitive.Label
      data-slot="menubar-label"
      data-inset={inset}
      className={cn(
        "text-muted-foreground px-3.5 py-2.5 text-xs data-[inset]:pl-8",
        className,
      )}
      {...props}
    />
  );
}

function MenubarSeparator({
  className,
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Separator>) {
  return (
    <MenubarPrimitive.Separator
      data-slot="menubar-separator"
      className={cn("bg-border/50 -mx-1 my-1 h-px", className)}
      {...props}
    />
  );
}

function MenubarShortcut({
  className,
  ...props
}: React.ComponentProps<"span">) {
  return (
    <span
      data-slot="menubar-shortcut"
      className={cn(
        "text-muted-foreground group-focus/menubar-item:text-accent-foreground text-xs tracking-widest ml-auto",
        className,
      )}
      {...props}
    />
  );
}

function MenubarSub({
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.Sub>) {
  return <MenubarPrimitive.Sub data-slot="menubar-sub" {...props} />;
}

function MenubarSubTrigger({
  className,
  inset,
  children,
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.SubTrigger> & {
  inset?: boolean;
}) {
  return (
    <MenubarPrimitive.SubTrigger
      data-slot="menubar-sub-trigger"
      data-inset={inset}
      className={cn(
        "focus:bg-accent focus:text-accent-foreground data-open:bg-accent data-open:text-accent-foreground gap-2 rounded-xl px-3 py-2 text-sm data-[inset]:pl-8 [&_svg:not([class*='size-'])]:size-4 flex cursor-default items-center outline-none select-none",
        className,
      )}
      {...props}
    >
      {children}
      <HugeiconsIcon
        icon={ArrowRight01Icon}
        strokeWidth={2}
        className="ml-auto size-4"
      />
    </MenubarPrimitive.SubTrigger>
  );
}

function MenubarSubContent({
  className,
  ...props
}: React.ComponentProps<typeof MenubarPrimitive.SubContent>) {
  return (
    <MenubarPrimitive.SubContent
      data-slot="menubar-sub-content"
      className={cn(
        "bg-popover text-popover-foreground data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 ring-foreground/5 min-w-32 rounded-2xl p-1 shadow-2xl ring-1 duration-100 z-50 origin-(--radix-menubar-content-transform-origin) overflow-hidden",
        className,
      )}
      {...props}
    />
  );
}

export {
  Menubar,
  MenubarPortal,
  MenubarMenu,
  MenubarTrigger,
  MenubarContent,
  MenubarGroup,
  MenubarSeparator,
  MenubarLabel,
  MenubarItem,
  MenubarShortcut,
  MenubarCheckboxItem,
  MenubarRadioGroup,
  MenubarRadioItem,
  MenubarSub,
  MenubarSubTrigger,
  MenubarSubContent,
};


================================================
FILE: studio/frontend/src/components/ui/navigation-menu.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

/* eslint-disable react-refresh/only-export-components */

import { cva } from "class-variance-authority";
import { NavigationMenu as NavigationMenuPrimitive } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";
import { ArrowDown01Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";

export function NavigationMenu({
  className,
  children,
  viewport = true,
  ...props
}: React.ComponentProps<typeof NavigationMenuPrimitive.Root> & {
  viewport?: boolean;
}): React.ReactElement {
  return (
    <NavigationMenuPrimitive.Root
      data-slot="navigation-menu"
      data-viewport={viewport}
      className={cn(
        "max-w-max group/navigation-menu relative flex max-w-max flex-1 items-center justify-center",
        className,
      )}
      {...props}
    >
      {children}
      {viewport && <NavigationMenuViewport />}
    </NavigationMenuPrimitive.Root>
  );
}

export function NavigationMenuList({
  className,
  ...props
}: React.ComponentProps<
  typeof NavigationMenuPrimitive.List
>): React.ReactElement {
  return (
    <NavigationMenuPrimitive.List
      data-slot="navigation-menu-list"
      className={cn(
        "gap-0 group flex flex-1 list-none items-center justify-center",
        className,
      )}
      {...props}
    />
  );
}

export function NavigationMenuItem({
  className,
  ...props
}: React.ComponentProps<
  typeof NavigationMenuPrimitive.Item
>): React.ReactElement {
  return (
    <NavigationMenuPrimitive.Item
      data-slot="navigation-menu-item"
      className={cn("relative", className)}
      {...props}
    />
  );
}

export const navigationMenuTriggerStyle = cva(
  "bg-background hover:bg-muted focus:bg-muted data-open:hover:bg-muted data-open:focus:bg-muted data-open:bg-muted/50 focus-visible:ring-ring/50 data-popup-open:bg-muted/50 data-popup-open:hover:bg-muted rounded-2xl px-4.5 py-2.5 text-sm font-medium transition-all focus-visible:ring-[3px] focus-visible:outline-1 disabled:opacity-50 group/navigation-menu-trigger inline-flex h-9 w-max items-center justify-center disabled:pointer-events-none outline-none",
);

export function NavigationMenuTrigger({
  className,
  children,
  ...props
}: React.ComponentProps<
  typeof NavigationMenuPrimitive.Trigger
>): React.ReactElement {
  return (
    <NavigationMenuPrimitive.Trigger
      data-slot="navigation-menu-trigger"
      className={cn(navigationMenuTriggerStyle(), "group", className)}
      {...props}
    >
      {children}{" "}
      <HugeiconsIcon
        icon={ArrowDown01Icon}
        strokeWidth={2}
        className="relative top-[1px] ml-1 size-3 transition duration-300 group-data-open/navigation-menu-trigger:rotate-180 group-data-popup-open/navigation-menu-trigger:rotate-180"
        aria-hidden="true"
      />
    </NavigationMenuPrimitive.Trigger>
  );
}

export function NavigationMenuContent({
  className,
  ...props
}: React.ComponentProps<
  typeof NavigationMenuPrimitive.Content
>): React.ReactElement {
  return (
    <NavigationMenuPrimitive.Content
      data-slot="navigation-menu-content"
      className={cn(
        "data-[motion^=from-]:animate-in data-[motion^=to-]:animate-out data-[motion^=from-]:fade-in data-[motion^=to-]:fade-out data-[motion=from-end]:slide-in-from-right-52 data-[motion=from-start]:slide-in-from-left-52 data-[motion=to-end]:slide-out-to-right-52 data-[motion=to-start]:slide-out-to-left-52 group-data-[viewport=false]/navigation-menu:bg-popover group-data-[viewport=false]/navigation-menu:text-popover-foreground group-data-[viewport=false]/navigation-menu:data-open:animate-in group-data-[viewport=false]/navigation-menu:data-closed:animate-out group-data-[viewport=false]/navigation-menu:data-closed:zoom-out-95 group-data-[viewport=false]/navigation-menu:data-open:zoom-in-95 group-data-[viewport=false]/navigation-menu:data-open:fade-in-0 group-data-[viewport=false]/navigation-menu:data-closed:fade-out-0 group-data-[viewport=false]/navigation-menu:ring-foreground/5 p-2.5 pr-3 ease-[cubic-bezier(0.22,1,0.36,1)] group-data-[viewport=false]/navigation-menu:rounded-2xl group-data-[viewport=false]/navigation-menu:shadow-2xl group-data-[viewport=false]/navigation-menu:ring-1 group-data-[viewport=false]/navigation-menu:duration-300 top-0 left-0 w-full group-data-[viewport=false]/navigation-menu:top-full group-data-[viewport=false]/navigation-menu:mt-1.5 group-data-[viewport=false]/navigation-menu:overflow-hidden **:data-[slot=navigation-menu-link]:focus:ring-0 **:data-[slot=navigation-menu-link]:focus:outline-none md:absolute md:w-auto",
        className,
      )}
      {...props}
    />
  );
}

export function NavigationMenuViewport({
  className,
  ...props
}: React.ComponentProps<
  typeof NavigationMenuPrimitive.Viewport
>): React.ReactElement {
  return (
    <div
      className={cn(
        "absolute top-full left-0 isolate z-50 flex justify-center",
      )}
    >
      <NavigationMenuPrimitive.Viewport
        data-slot="navigation-menu-viewport"
        className={cn(
          "bg-popover text-popover-foreground data-open:animate-in data-closed:animate-out data-closed:zoom-out-95 data-open:zoom-in-90 ring-foreground/5 rounded-2xl shadow-2xl ring-1 duration-100 origin-top-center relative mt-1.5 h-[var(--radix-navigation-menu-viewport-height)] w-full overflow-hidden md:w-[var(--radix-navigation-menu-viewport-width)]",
          className,
        )}
        {...props}
      />
    </div>
  );
}

export function NavigationMenuLink({
  className,
  ...props
}: React.ComponentProps<
  typeof NavigationMenuPrimitive.Link
>): React.ReactElement {
  return (
    <NavigationMenuPrimitive.Link
      data-slot="navigation-menu-link"
      className={cn(
        "data-[active=true]:focus:bg-muted data-[active=true]:hover:bg-muted data-[active=true]:bg-muted/50 focus-visible:ring-ring/50 hover:bg-muted focus:bg-muted flex items-center gap-1.5 rounded-xl p-3 text-sm transition-all outline-none focus-visible:ring-[3px] focus-visible:outline-1 [&_svg:not([class*='size-'])]:size-4",
        className,
      )}
      {...props}
    />
  );
}

export function NavigationMenuIndicator({
  className,
  ...props
}: React.ComponentProps<
  typeof NavigationMenuPrimitive.Indicator
>): React.ReactElement {
  return (
    <NavigationMenuPrimitive.Indicator
      data-slot="navigation-menu-indicator"
      className={cn(
        "data-[state=visible]:animate-in data-[state=hidden]:animate-out data-[state=hidden]:fade-out data-[state=visible]:fade-in top-full z-[1] flex h-1.5 items-end justify-center overflow-hidden",
        className,
      )}
      {...props}
    >
      <div className="bg-border rounded-tl-sm shadow-md relative top-[60%] h-2 w-2 rotate-45" />
    </NavigationMenuPrimitive.Indicator>
  );
}


================================================
FILE: studio/frontend/src/components/ui/pagination.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type * as React from "react";

import { Button } from "@/components/ui/button";
import { cn } from "@/lib/utils";
import {
  ArrowLeft01Icon,
  ArrowRight01Icon,
  MoreHorizontalCircle01Icon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";

function Pagination({ className, ...props }: React.ComponentProps<"nav">) {
  return (
    <nav
      aria-label="pagination"
      data-slot="pagination"
      className={cn("mx-auto flex w-full justify-center", className)}
      {...props}
    />
  );
}

function PaginationContent({
  className,
  ...props
}: React.ComponentProps<"ul">) {
  return (
    <ul
      data-slot="pagination-content"
      className={cn("gap-1 flex items-center", className)}
      {...props}
    />
  );
}

function PaginationItem({ ...props }: React.ComponentProps<"li">) {
  return <li data-slot="pagination-item" {...props} />;
}

type PaginationLinkProps = {
  isActive?: boolean;
} & Pick<React.ComponentProps<typeof Button>, "size"> &
  React.ComponentProps<"a">;

function PaginationLink({
  className,
  isActive,
  size = "icon",
  ...props
}: PaginationLinkProps) {
  return (
    <Button
      asChild
      variant={isActive ? "outline" : "ghost"}
      size={size}
      className={cn(className)}
    >
      <a
        aria-current={isActive ? "page" : undefined}
        data-slot="pagination-link"
        data-active={isActive}
        {...props}
      />
    </Button>
  );
}

function PaginationPrevious({
  className,
  ...props
}: React.ComponentProps<typeof PaginationLink>) {
  return (
    <PaginationLink
      aria-label="Go to previous page"
      size="default"
      className={cn("pl-2!", className)}
      {...props}
    >
      <HugeiconsIcon
        icon={ArrowLeft01Icon}
        strokeWidth={2}
        data-icon="inline-start"
      />
      <span className="hidden sm:block">Previous</span>
    </PaginationLink>
  );
}

function PaginationNext({
  className,
  ...props
}: React.ComponentProps<typeof PaginationLink>) {
  return (
    <PaginationLink
      aria-label="Go to next page"
      size="default"
      className={cn("pr-2!", className)}
      {...props}
    >
      <span className="hidden sm:block">Next</span>
      <HugeiconsIcon
        icon={ArrowRight01Icon}
        strokeWidth={2}
        data-icon="inline-end"
      />
    </PaginationLink>
  );
}

function PaginationEllipsis({
  className,
  ...props
}: React.ComponentProps<"span">) {
  return (
    <span
      aria-hidden
      data-slot="pagination-ellipsis"
      className={cn(
        "size-9 items-center justify-center [&_svg:not([class*='size-'])]:size-4 flex items-center justify-center",
        className,
      )}
      {...props}
    >
      <HugeiconsIcon icon={MoreHorizontalCircle01Icon} strokeWidth={2} />
      <span className="sr-only">More pages</span>
    </span>
  );
}

export {
  Pagination,
  PaginationContent,
  PaginationEllipsis,
  PaginationItem,
  PaginationLink,
  PaginationNext,
  PaginationPrevious,
};


================================================
FILE: studio/frontend/src/components/ui/popover.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import { Popover as PopoverPrimitive } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";

function Popover({
  ...props
}: React.ComponentProps<typeof PopoverPrimitive.Root>) {
  return <PopoverPrimitive.Root data-slot="popover" {...props} />;
}

function PopoverTrigger({
  ...props
}: React.ComponentProps<typeof PopoverPrimitive.Trigger>) {
  return <PopoverPrimitive.Trigger data-slot="popover-trigger" {...props} />;
}

function PopoverContent({
  className,
  align = "center",
  sideOffset = 4,
  ...props
}: React.ComponentProps<typeof PopoverPrimitive.Content>) {
  return (
    <PopoverPrimitive.Portal>
      <PopoverPrimitive.Content
        data-slot="popover-content"
        align={align}
        sideOffset={sideOffset}
        className={cn(
          "bg-popover text-popover-foreground data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 shadow-border ring-1 ring-border flex flex-col gap-4 rounded-lg p-4 text-sm duration-100 z-50 w-72 origin-(--radix-popover-content-transform-origin) outline-hidden",
          className,
        )}
        {...props}
      />
    </PopoverPrimitive.Portal>
  );
}

function PopoverAnchor({
  ...props
}: React.ComponentProps<typeof PopoverPrimitive.Anchor>) {
  return <PopoverPrimitive.Anchor data-slot="popover-anchor" {...props} />;
}

function PopoverHeader({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="popover-header"
      className={cn("flex flex-col gap-1 text-sm", className)}
      {...props}
    />
  );
}

function PopoverTitle({ className, ...props }: React.ComponentProps<"h2">) {
  return (
    <div
      data-slot="popover-title"
      className={cn("text-base font-medium", className)}
      {...props}
    />
  );
}

function PopoverDescription({
  className,
  ...props
}: React.ComponentProps<"p">) {
  return (
    <p
      data-slot="popover-description"
      className={cn("text-muted-foreground", className)}
      {...props}
    />
  );
}

export {
  Popover,
  PopoverAnchor,
  PopoverContent,
  PopoverDescription,
  PopoverHeader,
  PopoverTitle,
  PopoverTrigger,
};


================================================
FILE: studio/frontend/src/components/ui/progress.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import { Progress as ProgressPrimitive } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";

function Progress({
  className,
  value,
  ...props
}: React.ComponentProps<typeof ProgressPrimitive.Root>) {
  return (
    <ProgressPrimitive.Root
      data-slot="progress"
      className={cn(
        "bg-foreground/[0.06] h-3 rounded-4xl relative flex w-full items-center overflow-x-hidden",
        className,
      )}
      {...props}
    >
      <ProgressPrimitive.Indicator
        data-slot="progress-indicator"
        className="bg-primary size-full flex-1 transition-all"
        style={{ transform: `translateX(-${100 - (value || 0)}%)` }}
      />
    </ProgressPrimitive.Root>
  );
}

export { Progress };


================================================
FILE: studio/frontend/src/components/ui/radio-group.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { RadioGroup as RadioGroupPrimitive } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";
import { CircleIcon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";

function RadioGroup({
  className,
  ...props
}: React.ComponentProps<typeof RadioGroupPrimitive.Root>) {
  return (
    <RadioGroupPrimitive.Root
      data-slot="radio-group"
      className={cn("grid gap-3 w-full", className)}
      {...props}
    />
  );
}

function RadioGroupItem({
  className,
  ...props
}: React.ComponentProps<typeof RadioGroupPrimitive.Item>) {
  return (
    <RadioGroupPrimitive.Item
      data-slot="radio-group-item"
      className={cn(
        "border-input text-primary dark:bg-input/30 focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 data-checked:bg-primary data-checked:border-primary flex size-4 rounded-full transition-none focus-visible:ring-[3px] aria-invalid:ring-[3px] group/radio-group-item peer relative aspect-square shrink-0 border outline-none after:absolute after:-inset-x-3 after:-inset-y-2 disabled:cursor-not-allowed disabled:opacity-50",
        className,
      )}
      {...props}
    >
      <RadioGroupPrimitive.Indicator
        data-slot="radio-group-indicator"
        className="group-aria-invalid/radio-group-item:text-destructive flex size-4 items-center justify-center text-white"
      >
        <HugeiconsIcon
          icon={CircleIcon}
          strokeWidth={2}
          className="absolute top-1/2 left-1/2 size-2 -translate-x-1/2 -translate-y-1/2 fill-current"
        />
      </RadioGroupPrimitive.Indicator>
    </RadioGroupPrimitive.Item>
  );
}

export { RadioGroup, RadioGroupItem };


================================================
FILE: studio/frontend/src/components/ui/resizable.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type * as React from "react";
import * as ResizablePrimitive from "react-resizable-panels";

import { cn } from "@/lib/utils";

function ResizablePanelGroup({
  className,
  ...props
}: React.ComponentProps<typeof ResizablePrimitive.Group>): React.ReactElement {
  return (
    <ResizablePrimitive.Group
      data-slot="resizable-panel-group"
      className={cn(
        "flex h-full w-full data-[panel-group-direction=vertical]:flex-col",
        className,
      )}
      {...props}
    />
  );
}

function ResizablePanel({
  ...props
}: React.ComponentProps<typeof ResizablePrimitive.Panel>): React.ReactElement {
  return <ResizablePrimitive.Panel data-slot="resizable-panel" {...props} />;
}

function ResizableHandle({
  withHandle,
  className,
  ...props
}: React.ComponentProps<typeof ResizablePrimitive.Separator> & {
  withHandle?: boolean;
}): React.ReactElement {
  return (
    <ResizablePrimitive.Separator
      data-slot="resizable-handle"
      className={cn(
        "bg-border focus-visible:ring-ring relative flex w-px items-center justify-center after:absolute after:inset-y-0 after:left-1/2 after:w-1 after:-translate-x-1/2 focus-visible:ring-1 focus-visible:ring-offset-1 focus-visible:outline-hidden data-[panel-group-direction=vertical]:h-px data-[panel-group-direction=vertical]:w-full data-[panel-group-direction=vertical]:after:left-0 data-[panel-group-direction=vertical]:after:h-1 data-[panel-group-direction=vertical]:after:w-full data-[panel-group-direction=vertical]:after:translate-x-0 data-[panel-group-direction=vertical]:after:-translate-y-1/2 [&[data-panel-group-direction=vertical]>div]:rotate-90",
        className,
      )}
      {...props}
    >
      {withHandle && (
        <div className="bg-border h-6 w-1 rounded-lg z-10 flex shrink-0" />
      )}
    </ResizablePrimitive.Separator>
  );
}

export { ResizablePanelGroup, ResizablePanel, ResizableHandle };


================================================
FILE: studio/frontend/src/components/ui/scroll-area.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import { ScrollArea as ScrollAreaPrimitive } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";

function ScrollArea({
  className,
  children,
  ...props
}: React.ComponentProps<typeof ScrollAreaPrimitive.Root>) {
  return (
    <ScrollAreaPrimitive.Root
      data-slot="scroll-area"
      className={cn("relative", className)}
      {...props}
    >
      <ScrollAreaPrimitive.Viewport
        data-slot="scroll-area-viewport"
        className="focus-visible:ring-ring/50 size-full rounded-[inherit] transition-[color,box-shadow] outline-none focus-visible:ring-[3px] focus-visible:outline-1"
      >
        {children}
      </ScrollAreaPrimitive.Viewport>
      <ScrollBar />
      <ScrollAreaPrimitive.Corner />
    </ScrollAreaPrimitive.Root>
  );
}

function ScrollBar({
  className,
  orientation = "vertical",
  ...props
}: React.ComponentProps<typeof ScrollAreaPrimitive.ScrollAreaScrollbar>) {
  return (
    <ScrollAreaPrimitive.ScrollAreaScrollbar
      data-slot="scroll-area-scrollbar"
      data-orientation={orientation}
      orientation={orientation}
      className={cn(
        "data-horizontal:h-2.5 data-horizontal:flex-col data-horizontal:border-t data-horizontal:border-t-transparent data-vertical:h-full data-vertical:w-2.5 data-vertical:border-l data-vertical:border-l-transparent flex touch-none p-px transition-colors select-none",
        className,
      )}
      {...props}
    >
      <ScrollAreaPrimitive.ScrollAreaThumb
        data-slot="scroll-area-thumb"
        className="rounded-full bg-border relative flex-1"
      />
    </ScrollAreaPrimitive.ScrollAreaScrollbar>
  );
}

export { ScrollArea, ScrollBar };


================================================
FILE: studio/frontend/src/components/ui/select.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import { Select as SelectPrimitive } from "radix-ui";
import type * as React from "react";
import { createContext, useContext, useState } from "react";

import { cn } from "@/lib/utils";
import { useDialogPortalContainer } from "@/components/ui/dialog";
import {
  ArrowDown01Icon,
  ArrowUp01Icon,
  Tick02Icon,
  UnfoldMoreIcon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";

const SelectOpenContext = createContext(false);

function Select({
  onOpenChange,
  ...props
}: React.ComponentProps<typeof SelectPrimitive.Root>) {
  const [isOpen, setIsOpen] = useState(false);
  return (
    <SelectOpenContext.Provider value={isOpen}>
      <SelectPrimitive.Root
        data-slot="select"
        onOpenChange={(open) => {
          setIsOpen(open);
          onOpenChange?.(open);
        }}
        {...props}
      />
    </SelectOpenContext.Provider>
  );
}

function SelectGroup({
  className,
  ...props
}: React.ComponentProps<typeof SelectPrimitive.Group>) {
  return (
    <SelectPrimitive.Group
      data-slot="select-group"
      className={cn("scroll-my-1 p-1", className)}
      {...props}
    />
  );
}

function SelectValue({
  ...props
}: React.ComponentProps<typeof SelectPrimitive.Value>) {
  return <SelectPrimitive.Value data-slot="select-value" {...props} />;
}

function SelectTrigger({
  className,
  size = "default",
  children,
  ...props
}: React.ComponentProps<typeof SelectPrimitive.Trigger> & {
  size?: "sm" | "default";
}) {
  const isOpen = useContext(SelectOpenContext);

  return (
    <SelectPrimitive.Trigger
      data-slot="select-trigger"
      data-size={size}
      style={{
        borderRadius: isOpen ? "12px" : undefined,
        transition: isOpen
          ? "border-radius 0ms"
          : "border-radius 150ms cubic-bezier(0.645, 0.045, 0.355, 1)",
      }}
      className={cn(
        "border-input data-[placeholder]:text-muted-foreground bg-input/30 dark:hover:bg-input/50 focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 gap-1.5 rounded-4xl border px-3 py-2 text-sm transition-colors focus-visible:ring-[3px] aria-invalid:ring-[3px] data-[size=default]:h-9 data-[size=sm]:h-8 *:data-[slot=select-value]:flex *:data-[slot=select-value]:gap-1.5 [&_svg:not([class*='size-'])]:size-4 flex w-fit items-center justify-between whitespace-nowrap outline-none disabled:cursor-not-allowed disabled:opacity-50 *:data-[slot=select-value]:line-clamp-1 *:data-[slot=select-value]:flex *:data-[slot=select-value]:items-center [&_svg]:pointer-events-none [&_svg]:shrink-0 cursor-pointer",
        className,
      )}
      {...props}
    >
      {children}
      <SelectPrimitive.Icon asChild>
        <HugeiconsIcon
          icon={UnfoldMoreIcon}
          strokeWidth={2}
          className="text-muted-foreground size-4 pointer-events-none"
        />
      </SelectPrimitive.Icon>
    </SelectPrimitive.Trigger>
  );
}

function SelectContent({
  className,
  children,
  position = "item-aligned",
  align = "center",
  container,
  ...props
}: React.ComponentProps<typeof SelectPrimitive.Content> & {
  container?: HTMLElement | null;
}) {
  const dialogContainer = useDialogPortalContainer();
  return (
    <SelectPrimitive.Portal container={container ?? dialogContainer ?? undefined}>
      <SelectPrimitive.Content
        data-slot="select-content"
        data-align-trigger={position === "item-aligned"}
        className={cn(
          "bg-popover text-popover-foreground data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 data-closed:zoom-out-95 data-open:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 shadow-border ring-1 ring-border min-w-36 rounded-xl p-1 corner-squircle duration-100 relative z-50 max-h-(--radix-select-content-available-height) origin-(--radix-select-content-transform-origin) overflow-x-hidden overflow-y-auto ",
          position === "popper" &&
            "data-[side=bottom]:translate-y-1 data-[side=left]:-translate-x-1 data-[side=right]:translate-x-1 data-[side=top]:-translate-y-1",
          className,
        )}
        position={position}
        align={align}
        {...props}
      >
        <SelectScrollUpButton />
        <SelectPrimitive.Viewport
          data-position={position}
          className={cn(
            "data-[position=popper]:h-[var(--radix-select-trigger-height)] data-[position=popper]:w-full data-[position=popper]:min-w-[var(--radix-select-trigger-width)]",
            position === "popper" && "",
          )}
        >
          {children}
        </SelectPrimitive.Viewport>
        <SelectScrollDownButton />
      </SelectPrimitive.Content>
    </SelectPrimitive.Portal>
  );
}

function SelectLabel({
  className,
  ...props
}: React.ComponentProps<typeof SelectPrimitive.Label>) {
  return (
    <SelectPrimitive.Label
      data-slot="select-label"
      className={cn("text-muted-foreground px-3 py-2.5 text-xs", className)}
      {...props}
    />
  );
}

function SelectItem({
  className,
  children,
  ...props
}: React.ComponentProps<typeof SelectPrimitive.Item>) {
  return (
    <SelectPrimitive.Item
      data-slot="select-item"
      className={cn(
        "focus:bg-accent focus:text-accent-foreground not-data-[variant=destructive]:focus:**:text-accent-foreground gap-2.5 rounded-xl corner-squircle py-2 pr-8 pl-3 text-sm [&_svg:not([class*='size-'])]:size-4 *:[span]:last:flex *:[span]:last:items-center *:[span]:last:gap-2 relative flex w-full cursor-pointer items-center outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        className,
      )}
      {...props}
    >
      <span className="pointer-events-none absolute right-2 flex size-4 items-center justify-center">
        <SelectPrimitive.ItemIndicator>
          <HugeiconsIcon
            icon={Tick02Icon}
            strokeWidth={2}
            className="pointer-events-none"
          />
        </SelectPrimitive.ItemIndicator>
      </span>
      <SelectPrimitive.ItemText>{children}</SelectPrimitive.ItemText>
    </SelectPrimitive.Item>
  );
}

function SelectSeparator({
  className,
  ...props
}: React.ComponentProps<typeof SelectPrimitive.Separator>) {
  return (
    <SelectPrimitive.Separator
      data-slot="select-separator"
      className={cn(
        "bg-border/50 -mx-1 my-1 h-px pointer-events-none",
        className,
      )}
      {...props}
    />
  );
}

function SelectScrollUpButton({
  className,
  ...props
}: React.ComponentProps<typeof SelectPrimitive.ScrollUpButton>) {
  return (
    <SelectPrimitive.ScrollUpButton
      data-slot="select-scroll-up-button"
      className={cn(
        "bg-popover z-10 flex cursor-default items-center justify-center py-1 [&_svg:not([class*='size-'])]:size-4",
        className,
      )}
      {...props}
    >
      <HugeiconsIcon icon={ArrowUp01Icon} strokeWidth={2} />
    </SelectPrimitive.ScrollUpButton>
  );
}

function SelectScrollDownButton({
  className,
  ...props
}: React.ComponentProps<typeof SelectPrimitive.ScrollDownButton>) {
  return (
    <SelectPrimitive.ScrollDownButton
      data-slot="select-scroll-down-button"
      className={cn(
        "bg-popover z-10 flex cursor-default items-center justify-center py-1 [&_svg:not([class*='size-'])]:size-4",
        className,
      )}
      {...props}
    >
      <HugeiconsIcon icon={ArrowDown01Icon} strokeWidth={2} />
    </SelectPrimitive.ScrollDownButton>
  );
}

export {
  Select,
  SelectContent,
  SelectGroup,
  SelectItem,
  SelectLabel,
  SelectScrollDownButton,
  SelectScrollUpButton,
  SelectSeparator,
  SelectTrigger,
  SelectValue,
};


================================================
FILE: studio/frontend/src/components/ui/separator.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Separator as SeparatorPrimitive } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";

function Separator({
  className,
  orientation = "horizontal",
  decorative = true,
  ...props
}: React.ComponentProps<typeof SeparatorPrimitive.Root>) {
  return (
    <SeparatorPrimitive.Root
      data-slot="separator"
      decorative={decorative}
      orientation={orientation}
      className={cn(
        "bg-border shrink-0 data-[orientation=horizontal]:h-px data-[orientation=horizontal]:w-full data-[orientation=vertical]:w-px data-[orientation=vertical]:self-stretch",
        className,
      )}
      {...props}
    />
  );
}

export { Separator };


================================================
FILE: studio/frontend/src/components/ui/sheet.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Dialog as SheetPrimitive } from "radix-ui";
import type * as React from "react";
import { useState } from "react";

import { Button } from "@/components/ui/button";
import { DialogPortalContainerContext } from "@/components/ui/dialog";
import { cn } from "@/lib/utils";
import { Cancel01Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";

function Sheet({ ...props }: React.ComponentProps<typeof SheetPrimitive.Root>) {
  return <SheetPrimitive.Root data-slot="sheet" {...props} />;
}

function SheetTrigger({
  ...props
}: React.ComponentProps<typeof SheetPrimitive.Trigger>) {
  return <SheetPrimitive.Trigger data-slot="sheet-trigger" {...props} />;
}

function SheetClose({
  ...props
}: React.ComponentProps<typeof SheetPrimitive.Close>) {
  return <SheetPrimitive.Close data-slot="sheet-close" {...props} />;
}

function SheetPortal({
  ...props
}: React.ComponentProps<typeof SheetPrimitive.Portal>) {
  return <SheetPrimitive.Portal data-slot="sheet-portal" {...props} />;
}

function SheetOverlay({
  className,
  position = "fixed",
  ...props
}: React.ComponentProps<typeof SheetPrimitive.Overlay> & {
  position?: "fixed" | "absolute";
}) {
  return (
    <SheetPrimitive.Overlay
      data-slot="sheet-overlay"
      className={cn(
        "data-open:animate-in data-closed:animate-out data-closed:fade-out-0 data-open:fade-in-0 bg-black/80 duration-100 data-ending-style:opacity-0 data-starting-style:opacity-0 supports-backdrop-filter:backdrop-blur-xs inset-0 z-50",
        position === "fixed" ? "fixed" : "absolute",
        className,
      )}
      {...props}
    />
  );
}

function SheetContent({
  className,
  children,
  side = "right",
  showCloseButton = true,
  container,
  position = "fixed",
  overlayClassName,
  overlayPosition,
  ...props
}: React.ComponentProps<typeof SheetPrimitive.Content> & {
  side?: "top" | "right" | "bottom" | "left";
  showCloseButton?: boolean;
  container?: HTMLElement | null;
  position?: "fixed" | "absolute";
  overlayClassName?: string;
  overlayPosition?: "fixed" | "absolute";
}) {
  const [contentEl, setContentEl] = useState<HTMLDivElement | null>(null);
  return (
    <SheetPortal container={container ?? undefined}>
      <SheetOverlay
        className={overlayClassName}
        position={overlayPosition ?? position}
      />
      <SheetPrimitive.Content
        ref={setContentEl}
        data-slot="sheet-content"
        data-side={side}
        className={cn(
          "bg-background data-open:animate-in data-closed:animate-out data-[side=right]:data-closed:slide-out-to-right-10 data-[side=right]:data-open:slide-in-from-right-10 data-[side=left]:data-closed:slide-out-to-left-10 data-[side=left]:data-open:slide-in-from-left-10 data-[side=top]:data-closed:slide-out-to-top-10 data-[side=top]:data-open:slide-in-from-top-10 data-closed:fade-out-0 data-open:fade-in-0 data-[side=bottom]:data-closed:slide-out-to-bottom-10 data-[side=bottom]:data-open:slide-in-from-bottom-10 z-50 flex flex-col bg-clip-padding text-sm shadow-lg transition duration-200 ease-in-out data-[side=bottom]:inset-x-0 data-[side=bottom]:bottom-0 data-[side=bottom]:h-auto data-[side=bottom]:border-t data-[side=left]:inset-y-0 data-[side=left]:left-0 data-[side=left]:h-full data-[side=left]:w-3/4 data-[side=left]:border-r data-[side=right]:inset-y-0 data-[side=right]:right-0 data-[side=right]:h-full data-[side=right]:w-3/4 data-[side=right]:border-l data-[side=top]:inset-x-0 data-[side=top]:top-0 data-[side=top]:h-auto data-[side=top]:border-b data-[side=left]:sm:max-w-sm data-[side=right]:sm:max-w-sm",
          position === "fixed" ? "fixed" : "absolute",
          className,
        )}
        {...props}
      >
        <DialogPortalContainerContext.Provider value={contentEl}>
          {children}
        </DialogPortalContainerContext.Provider>
        {showCloseButton && (
          <SheetPrimitive.Close data-slot="sheet-close" asChild>
            <Button
              variant="ghost"
              className="absolute top-4 right-4"
              size="icon-sm"
            >
              <HugeiconsIcon icon={Cancel01Icon} strokeWidth={2} />
              <span className="sr-only">Close</span>
            </Button>
          </SheetPrimitive.Close>
        )}
      </SheetPrimitive.Content>
    </SheetPortal>
  );
}

function SheetHeader({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="sheet-header"
      className={cn("gap-1.5 p-6 flex flex-col", className)}
      {...props}
    />
  );
}

function SheetFooter({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="sheet-footer"
      className={cn("gap-2 p-6 mt-auto flex flex-col", className)}
      {...props}
    />
  );
}

function SheetTitle({
  className,
  ...props
}: React.ComponentProps<typeof SheetPrimitive.Title>) {
  return (
    <SheetPrimitive.Title
      data-slot="sheet-title"
      className={cn("text-foreground text-base font-medium", className)}
      {...props}
    />
  );
}

function SheetDescription({
  className,
  ...props
}: React.ComponentProps<typeof SheetPrimitive.Description>) {
  return (
    <SheetPrimitive.Description
      data-slot="sheet-description"
      className={cn("text-muted-foreground text-sm", className)}
      {...props}
    />
  );
}

export {
  Sheet,
  SheetTrigger,
  SheetClose,
  SheetContent,
  SheetHeader,
  SheetFooter,
  SheetTitle,
  SheetDescription,
};


================================================
FILE: studio/frontend/src/components/ui/shine-border.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import * as React from "react"

import { cn } from "@/lib/utils"

interface ShineBorderProps extends React.HTMLAttributes<HTMLDivElement> {
  /**
   * Width of the border in pixels
   * @default 1
   */
  borderWidth?: number
  /**
   * Duration of the animation in seconds
   * @default 14
   */
  duration?: number
  /**
   * Color of the border, can be a single color or an array of colors
   * @default "#000000"
   */
  shineColor?: string | string[]
}

/**
 * Shine Border
 *
 * An animated background border effect component with configurable properties.
 */
export function ShineBorder({
  borderWidth = 1,
  duration = 14,
  shineColor = "#000000",
  className,
  style,
  ...props
}: ShineBorderProps) {
  return (
    <div
      style={
        {
          "--border-width": `${borderWidth}px`,
          "--duration": `${duration}s`,
          backgroundImage: `radial-gradient(transparent,transparent, ${
            Array.isArray(shineColor) ? shineColor.join(",") : shineColor
          },transparent,transparent)`,
          backgroundSize: "300% 300%",
          mask: `linear-gradient(#fff 0 0) content-box, linear-gradient(#fff 0 0)`,
          WebkitMask: `linear-gradient(#fff 0 0) content-box, linear-gradient(#fff 0 0)`,
          WebkitMaskComposite: "xor",
          maskComposite: "exclude",
          padding: "var(--border-width)",
          ...style,
        } as React.CSSProperties
      }
      className={cn(
        "motion-safe:animate-shine pointer-events-none absolute inset-0 size-full rounded-[inherit] will-change-[background-position]",
        className
      )}
      {...props}
    />
  )
}


================================================
FILE: studio/frontend/src/components/ui/sidebar.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client"

import * as React from "react"
import { cva, type VariantProps } from "class-variance-authority"
import { Slot } from "radix-ui"

import { cn } from "@/lib/utils"
import { Button } from "@/components/ui/button"
import { Input } from "@/components/ui/input"
import { Separator } from "@/components/ui/separator"
import {
  Sheet,
  SheetContent,
  SheetDescription,
  SheetHeader,
  SheetTitle,
} from "@/components/ui/sheet"
import { Skeleton } from "@/components/ui/skeleton"
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip"
import { useIsMobile } from "@/hooks/use-mobile"
import { HugeiconsIcon } from "@hugeicons/react"
import { SidebarLeftIcon } from "@hugeicons/core-free-icons"

const SIDEBAR_COOKIE_NAME = "sidebar_state"
const SIDEBAR_COOKIE_MAX_AGE = 60 * 60 * 24 * 7
const SIDEBAR_WIDTH = "16rem"
const SIDEBAR_WIDTH_MOBILE = "18rem"
const SIDEBAR_WIDTH_ICON = "3rem"
const SIDEBAR_KEYBOARD_SHORTCUT = "b"

type SidebarContextProps = {
  state: "expanded" | "collapsed"
  open: boolean
  setOpen: (open: boolean) => void
  openMobile: boolean
  setOpenMobile: (open: boolean) => void
  isMobile: boolean
  toggleSidebar: () => void
}

const SidebarContext = React.createContext<SidebarContextProps | null>(null)

function useSidebar() {
  const context = React.useContext(SidebarContext)
  if (!context) {
    throw new Error("useSidebar must be used within a SidebarProvider.")
  }

  return context
}

function SidebarProvider({
  defaultOpen = true,
  open: openProp,
  onOpenChange: setOpenProp,
  className,
  style,
  children,
  ...props
}: React.ComponentProps<"div"> & {
  defaultOpen?: boolean
  open?: boolean
  onOpenChange?: (open: boolean) => void
}) {
  const isMobile = useIsMobile()
  const [openMobile, setOpenMobile] = React.useState(false)

  // This is the internal state of the sidebar.
  // We use openProp and setOpenProp for control from outside the component.
  const [_open, _setOpen] = React.useState(defaultOpen)
  const open = openProp ?? _open
  const setOpen = React.useCallback(
    (value: boolean | ((value: boolean) => boolean)) => {
      const openState = typeof value === "function" ? value(open) : value
      if (setOpenProp) {
        setOpenProp(openState)
      } else {
        _setOpen(openState)
      }

      // This sets the cookie to keep the sidebar state.
      document.cookie = `${SIDEBAR_COOKIE_NAME}=${openState}; path=/; max-age=${SIDEBAR_COOKIE_MAX_AGE}`
    },
    [setOpenProp, open]
  )

  // Helper to toggle the sidebar.
  const toggleSidebar = React.useCallback(() => {
    return isMobile ? setOpenMobile((open) => !open) : setOpen((open) => !open)
  }, [isMobile, setOpen, setOpenMobile])

  // Adds a keyboard shortcut to toggle the sidebar.
  React.useEffect(() => {
    const handleKeyDown = (event: KeyboardEvent) => {
      if (
        event.key === SIDEBAR_KEYBOARD_SHORTCUT &&
        (event.metaKey || event.ctrlKey)
      ) {
        event.preventDefault()
        toggleSidebar()
      }
    }

    window.addEventListener("keydown", handleKeyDown)
    return () => window.removeEventListener("keydown", handleKeyDown)
  }, [toggleSidebar])

  // We add a state so that we can do data-state="expanded" or "collapsed".
  // This makes it easier to style the sidebar with Tailwind classes.
  const state = open ? "expanded" : "collapsed"

  const contextValue = React.useMemo<SidebarContextProps>(
    () => ({
      state,
      open,
      setOpen,
      isMobile,
      openMobile,
      setOpenMobile,
      toggleSidebar,
    }),
    [state, open, setOpen, isMobile, openMobile, setOpenMobile, toggleSidebar]
  )

  return (
    <SidebarContext.Provider value={contextValue}>
      <div
        data-slot="sidebar-wrapper"
        style={
          {
            "--sidebar-width": SIDEBAR_WIDTH,
            "--sidebar-width-icon": SIDEBAR_WIDTH_ICON,
            ...style,
          } as React.CSSProperties
        }
        className={cn(
          "group/sidebar-wrapper has-data-[variant=inset]:bg-sidebar flex min-h-svh w-full",
          className
        )}
        {...props}
      >
        {children}
      </div>
    </SidebarContext.Provider>
  )
}

function Sidebar({
  side = "left",
  variant = "sidebar",
  collapsible = "offcanvas",
  className,
  children,
  dir,
  ...props
}: React.ComponentProps<"div"> & {
  side?: "left" | "right"
  variant?: "sidebar" | "floating" | "inset"
  collapsible?: "offcanvas" | "icon" | "none"
}) {
  const { isMobile, state, openMobile, setOpenMobile } = useSidebar()

  if (collapsible === "none") {
    return (
      <div
        data-slot="sidebar"
        className={cn(
          "bg-sidebar text-sidebar-foreground flex h-full w-(--sidebar-width) flex-col",
          className
        )}
        {...props}
      >
        {children}
      </div>
    )
  }

  if (isMobile) {
    return (
      <Sheet open={openMobile} onOpenChange={setOpenMobile} {...props}>
        <SheetContent
          dir={dir}
          data-sidebar="sidebar"
          data-slot="sidebar"
          data-mobile="true"
          className="bg-sidebar text-sidebar-foreground w-(--sidebar-width) p-0 [&>button]:hidden"
          style={
            {
              "--sidebar-width": SIDEBAR_WIDTH_MOBILE,
            } as React.CSSProperties
          }
          side={side}
        >
          <SheetHeader className="sr-only">
            <SheetTitle>Sidebar</SheetTitle>
            <SheetDescription>Displays the mobile sidebar.</SheetDescription>
          </SheetHeader>
          <div className="flex h-full w-full flex-col">{children}</div>
        </SheetContent>
      </Sheet>
    )
  }

  return (
    <div
      className="group peer text-sidebar-foreground hidden md:block"
      data-state={state}
      data-collapsible={state === "collapsed" ? collapsible : ""}
      data-variant={variant}
      data-side={side}
      data-slot="sidebar"
    >
      {/* This is what handles the sidebar gap on desktop */}
      <div
        data-slot="sidebar-gap"
        className={cn(
          "transition-[width] duration-200 ease-linear relative w-(--sidebar-width) bg-transparent",
          "group-data-[collapsible=offcanvas]:w-0",
          "group-data-[side=right]:rotate-180",
          variant === "floating" || variant === "inset"
            ? "group-data-[collapsible=icon]:w-[calc(var(--sidebar-width-icon)+(--spacing(4)))]"
            : "group-data-[collapsible=icon]:w-(--sidebar-width-icon)"
        )}
      />
      <div
        data-slot="sidebar-container"
        data-side={side}
        className={cn(
          "fixed inset-y-0 z-10 hidden h-svh w-(--sidebar-width) transition-[left,right,width] duration-200 ease-linear data-[side=left]:left-0 data-[side=left]:group-data-[collapsible=offcanvas]:left-[calc(var(--sidebar-width)*-1)] data-[side=right]:right-0 data-[side=right]:group-data-[collapsible=offcanvas]:right-[calc(var(--sidebar-width)*-1)] md:flex",
          // Adjust the padding for floating and inset variants.
          variant === "floating" || variant === "inset"
            ? "p-2 group-data-[collapsible=icon]:w-[calc(var(--sidebar-width-icon)+(--spacing(4))+2px)]"
            : "group-data-[collapsible=icon]:w-(--sidebar-width-icon) group-data-[side=left]:border-r group-data-[side=right]:border-l",
          className
        )}
        {...props}
      >
        <div
          data-sidebar="sidebar"
          data-slot="sidebar-inner"
          className="bg-sidebar group-data-[variant=floating]:ring-sidebar-border group-data-[variant=floating]:rounded-lg group-data-[variant=floating]:shadow-sm group-data-[variant=floating]:ring-1 flex size-full flex-col"
        >
          {children}
        </div>
      </div>
    </div>
  )
}

function SidebarTrigger({
  className,
  onClick,
  ...props
}: React.ComponentProps<typeof Button>) {
  const { toggleSidebar } = useSidebar()

  return (
    <Button
      data-sidebar="trigger"
      data-slot="sidebar-trigger"
      variant="ghost"
      size="icon-sm"
      className={cn(className)}
      onClick={(event) => {
        onClick?.(event)
        toggleSidebar()
      }}
      {...props}
    >
      <HugeiconsIcon icon={SidebarLeftIcon} strokeWidth={2} />
      <span className="sr-only">Toggle Sidebar</span>
    </Button>
  )
}

function SidebarRail({ className, ...props }: React.ComponentProps<"button">) {
  const { toggleSidebar } = useSidebar()

  return (
    <button
      data-sidebar="rail"
      data-slot="sidebar-rail"
      aria-label="Toggle Sidebar"
      tabIndex={-1}
      onClick={toggleSidebar}
      title="Toggle Sidebar"
      className={cn(
        "hover:after:bg-sidebar-border absolute inset-y-0 z-20 hidden w-4 transition-all ease-linear group-data-[side=left]:-right-4 group-data-[side=right]:left-0 after:absolute after:inset-y-0 after:start-1/2 after:w-[2px] sm:flex ltr:-translate-x-1/2 rtl:-translate-x-1/2",
        "in-data-[side=left]:cursor-w-resize in-data-[side=right]:cursor-e-resize",
        "[[data-side=left][data-state=collapsed]_&]:cursor-e-resize [[data-side=right][data-state=collapsed]_&]:cursor-w-resize",
        "hover:group-data-[collapsible=offcanvas]:bg-sidebar group-data-[collapsible=offcanvas]:translate-x-0 group-data-[collapsible=offcanvas]:after:left-full",
        "[[data-side=left][data-collapsible=offcanvas]_&]:-right-2",
        "[[data-side=right][data-collapsible=offcanvas]_&]:-left-2",
        className
      )}
      {...props}
    />
  )
}

function SidebarInset({ className, ...props }: React.ComponentProps<"main">) {
  return (
    <main
      data-slot="sidebar-inset"
      className={cn(
        "bg-background md:peer-data-[variant=inset]:m-2 md:peer-data-[variant=inset]:ml-0 md:peer-data-[variant=inset]:rounded-xl md:peer-data-[variant=inset]:shadow-sm md:peer-data-[variant=inset]:peer-data-[state=collapsed]:ml-2 relative flex w-full flex-1 flex-col",
        className
      )}
      {...props}
    />
  )
}

function SidebarInput({
  className,
  ...props
}: React.ComponentProps<typeof Input>) {
  return (
    <Input
      data-slot="sidebar-input"
      data-sidebar="input"
      className={cn("bg-background h-8 w-full shadow-none", className)}
      {...props}
    />
  )
}

function SidebarHeader({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="sidebar-header"
      data-sidebar="header"
      className={cn("gap-2 p-2 flex flex-col", className)}
      {...props}
    />
  )
}

function SidebarFooter({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="sidebar-footer"
      data-sidebar="footer"
      className={cn("gap-2 p-2 flex flex-col", className)}
      {...props}
    />
  )
}

function SidebarSeparator({
  className,
  ...props
}: React.ComponentProps<typeof Separator>) {
  return (
    <Separator
      data-slot="sidebar-separator"
      data-sidebar="separator"
      className={cn("bg-sidebar-border mx-2 w-auto", className)}
      {...props}
    />
  )
}

function SidebarContent({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="sidebar-content"
      data-sidebar="content"
      className={cn(
        "no-scrollbar gap-2 flex min-h-0 flex-1 flex-col overflow-auto group-data-[collapsible=icon]:overflow-hidden",
        className
      )}
      {...props}
    />
  )
}

function SidebarGroup({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="sidebar-group"
      data-sidebar="group"
      className={cn(
        "p-2 relative flex w-full min-w-0 flex-col",
        className
      )}
      {...props}
    />
  )
}

function SidebarGroupLabel({
  className,
  asChild = false,
  ...props
}: React.ComponentProps<"div"> & { asChild?: boolean }) {
  const Comp = asChild ? Slot.Root : "div"

  return (
    <Comp
      data-slot="sidebar-group-label"
      data-sidebar="group-label"
      className={cn(
        "text-sidebar-foreground/70 ring-sidebar-ring h-8 rounded-md px-2 text-xs font-medium transition-[margin,opacity] duration-200 ease-linear group-data-[collapsible=icon]:-mt-8 group-data-[collapsible=icon]:opacity-0 focus-visible:ring-2 [&>svg]:size-4 flex shrink-0 items-center outline-hidden [&>svg]:shrink-0",
        className
      )}
      {...props}
    />
  )
}

function SidebarGroupAction({
  className,
  asChild = false,
  ...props
}: React.ComponentProps<"button"> & { asChild?: boolean }) {
  const Comp = asChild ? Slot.Root : "button"

  return (
    <Comp
      data-slot="sidebar-group-action"
      data-sidebar="group-action"
      className={cn(
        "text-sidebar-foreground ring-sidebar-ring hover:bg-sidebar-accent hover:text-sidebar-accent-foreground absolute top-3.5 right-3 w-5 rounded-md p-0 focus-visible:ring-2 [&>svg]:size-4 flex aspect-square items-center justify-center outline-hidden transition-transform group-data-[collapsible=icon]:hidden after:absolute after:-inset-2 md:after:hidden [&>svg]:shrink-0",
        className
      )}
      {...props}
    />
  )
}

function SidebarGroupContent({
  className,
  ...props
}: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="sidebar-group-content"
      data-sidebar="group-content"
      className={cn("text-sm w-full", className)}
      {...props}
    />
  )
}

function SidebarMenu({ className, ...props }: React.ComponentProps<"ul">) {
  return (
    <ul
      data-slot="sidebar-menu"
      data-sidebar="menu"
      className={cn("gap-1 flex w-full min-w-0 flex-col", className)}
      {...props}
    />
  )
}

function SidebarMenuItem({ className, ...props }: React.ComponentProps<"li">) {
  return (
    <li
      data-slot="sidebar-menu-item"
      data-sidebar="menu-item"
      className={cn("group/menu-item relative", className)}
      {...props}
    />
  )
}

const sidebarMenuButtonVariants = cva(
  "ring-sidebar-ring hover:bg-sidebar-accent hover:text-sidebar-accent-foreground active:bg-sidebar-accent active:text-sidebar-accent-foreground data-active:bg-sidebar-accent data-active:text-sidebar-accent-foreground data-open:hover:bg-sidebar-accent data-open:hover:text-sidebar-accent-foreground gap-2 rounded-lg corner-squircle p-2 text-left text-sm transition-[width,height,padding] group-has-data-[sidebar=menu-action]/menu-item:pr-8 group-data-[collapsible=icon]:size-8! group-data-[collapsible=icon]:p-2! focus-visible:ring-2 data-active:font-medium peer/menu-button flex w-full items-center overflow-hidden outline-hidden group/menu-button disabled:pointer-events-none disabled:opacity-50 aria-disabled:pointer-events-none aria-disabled:opacity-50 [&>span:last-child]:truncate [&_svg]:size-4 [&_svg]:shrink-0",
  {
    variants: {
      variant: {
        default: "hover:bg-sidebar-accent hover:text-sidebar-accent-foreground",
        outline: "bg-background hover:bg-sidebar-accent hover:text-sidebar-accent-foreground shadow-[0_0_0_1px_hsl(var(--sidebar-border))] hover:shadow-[0_0_0_1px_hsl(var(--sidebar-accent))]",
      },
      size: {
        default: "h-9 text-sm",
        sm: "h-8 text-xs",
        lg: "h-12 text-sm group-data-[collapsible=icon]:p-0!",
      },
    },
    defaultVariants: {
      variant: "default",
      size: "default",
    },
  }
)

function SidebarMenuButton({
  asChild = false,
  isActive = false,
  variant = "default",
  size = "default",
  tooltip,
  className,
  ...props
}: React.ComponentProps<"button"> & {
  asChild?: boolean
  isActive?: boolean
  tooltip?: string | React.ComponentProps<typeof TooltipContent>
} & VariantProps<typeof sidebarMenuButtonVariants>) {
  const Comp = asChild ? Slot.Root : "button"
  const { isMobile, state } = useSidebar()

  const button = (
    <Comp
      data-slot="sidebar-menu-button"
      data-sidebar="menu-button"
      data-size={size}
      data-active={isActive}
      className={cn(sidebarMenuButtonVariants({ variant, size }), className)}
      {...props}
    />
  )

  if (!tooltip) {
    return button
  }

  if (typeof tooltip === "string") {
    tooltip = {
      children: tooltip,
    }
  }

  return (
    <Tooltip>
      <TooltipTrigger asChild>{button}</TooltipTrigger>
      <TooltipContent
        side="right"
        align="center"
        hidden={state !== "collapsed" || isMobile}
        {...tooltip}
      />
    </Tooltip>
  )
}

function SidebarMenuAction({
  className,
  asChild = false,
  showOnHover = false,
  ...props
}: React.ComponentProps<"button"> & {
  asChild?: boolean
  showOnHover?: boolean
}) {
  const Comp = asChild ? Slot.Root : "button"

  return (
    <Comp
      data-slot="sidebar-menu-action"
      data-sidebar="menu-action"
      className={cn(
        "text-sidebar-foreground ring-sidebar-ring hover:bg-sidebar-accent hover:text-sidebar-accent-foreground peer-hover/menu-button:text-sidebar-accent-foreground absolute top-1.5 right-1 aspect-square w-5 rounded-md p-0 peer-data-[size=default]/menu-button:top-2 peer-data-[size=lg]/menu-button:top-2.5 peer-data-[size=sm]/menu-button:top-1 focus-visible:ring-2 [&>svg]:size-4 flex items-center justify-center outline-hidden transition-transform group-data-[collapsible=icon]:hidden after:absolute after:-inset-2 md:after:hidden [&>svg]:shrink-0",
        showOnHover &&
          "peer-data-active/menu-button:text-sidebar-accent-foreground group-focus-within/menu-item:opacity-100 group-hover/menu-item:opacity-100 data-open:opacity-100 md:opacity-0",
        className
      )}
      {...props}
    />
  )
}

function SidebarMenuBadge({
  className,
  ...props
}: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="sidebar-menu-badge"
      data-sidebar="menu-badge"
      className={cn(
        "text-sidebar-foreground peer-hover/menu-button:text-sidebar-accent-foreground peer-data-active/menu-button:text-sidebar-accent-foreground pointer-events-none absolute right-1 flex h-5 min-w-5 rounded-md px-1 text-xs font-medium peer-data-[size=default]/menu-button:top-1.5 peer-data-[size=lg]/menu-button:top-2.5 peer-data-[size=sm]/menu-button:top-1 flex items-center justify-center tabular-nums select-none group-data-[collapsible=icon]:hidden",
        className
      )}
      {...props}
    />
  )
}

function SidebarMenuSkeleton({
  className,
  showIcon = false,
  ...props
}: React.ComponentProps<"div"> & {
  showIcon?: boolean
}) {
  // Random width between 50 to 90%.
  const [width] = React.useState(() => {
    return `${Math.floor(Math.random() * 40) + 50}%`
  })

  return (
    <div
      data-slot="sidebar-menu-skeleton"
      data-sidebar="menu-skeleton"
      className={cn("h-8 gap-2 rounded-md px-2 flex items-center", className)}
      {...props}
    >
      {showIcon && (
        <Skeleton
          className="size-4 rounded-md"
          data-sidebar="menu-skeleton-icon"
        />
      )}
      <Skeleton
        className="h-4 max-w-(--skeleton-width) flex-1"
        data-sidebar="menu-skeleton-text"
        style={
          {
            "--skeleton-width": width,
          } as React.CSSProperties
        }
      />
    </div>
  )
}

function SidebarMenuSub({ className, ...props }: React.ComponentProps<"ul">) {
  return (
    <ul
      data-slot="sidebar-menu-sub"
      data-sidebar="menu-sub"
      className={cn("border-sidebar-border mx-3.5 translate-x-px gap-1 border-l px-2.5 py-0.5 group-data-[collapsible=icon]:hidden flex min-w-0 flex-col", className)}
      {...props}
    />
  )
}

function SidebarMenuSubItem({
  className,
  ...props
}: React.ComponentProps<"li">) {
  return (
    <li
      data-slot="sidebar-menu-sub-item"
      data-sidebar="menu-sub-item"
      className={cn("group/menu-sub-item relative", className)}
      {...props}
    />
  )
}

function SidebarMenuSubButton({
  asChild = false,
  size = "md",
  isActive = false,
  className,
  ...props
}: React.ComponentProps<"a"> & {
  asChild?: boolean
  size?: "sm" | "md"
  isActive?: boolean
}) {
  const Comp = asChild ? Slot.Root : "a"

  return (
    <Comp
      data-slot="sidebar-menu-sub-button"
      data-sidebar="menu-sub-button"
      data-size={size}
      data-active={isActive}
      className={cn(
        "text-sidebar-foreground ring-sidebar-ring hover:bg-sidebar-accent hover:text-sidebar-accent-foreground active:bg-sidebar-accent active:text-sidebar-accent-foreground [&>svg]:text-sidebar-accent-foreground data-active:bg-sidebar-accent data-active:text-sidebar-accent-foreground h-7 gap-2 rounded-md px-2 focus-visible:ring-2 data-[size=md]:text-sm data-[size=sm]:text-xs [&>svg]:size-4 flex min-w-0 -translate-x-px items-center overflow-hidden outline-hidden group-data-[collapsible=icon]:hidden disabled:pointer-events-none disabled:opacity-50 aria-disabled:pointer-events-none aria-disabled:opacity-50 [&>span:last-child]:truncate [&>svg]:shrink-0",
        className
      )}
      {...props}
    />
  )
}

export {
  Sidebar,
  SidebarContent,
  SidebarFooter,
  SidebarGroup,
  SidebarGroupAction,
  SidebarGroupContent,
  SidebarGroupLabel,
  SidebarHeader,
  SidebarInput,
  SidebarInset,
  SidebarMenu,
  SidebarMenuAction,
  SidebarMenuBadge,
  SidebarMenuButton,
  SidebarMenuItem,
  SidebarMenuSkeleton,
  SidebarMenuSub,
  SidebarMenuSubButton,
  SidebarMenuSubItem,
  SidebarProvider,
  SidebarRail,
  SidebarSeparator,
  SidebarTrigger,
  useSidebar,
}


================================================
FILE: studio/frontend/src/components/ui/skeleton.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { cn } from "@/lib/utils";

function Skeleton({ className, ...props }: React.ComponentProps<"div">) {
  return (
    <div
      data-slot="skeleton"
      className={cn("bg-muted rounded-xl animate-pulse", className)}
      {...props}
    />
  );
}

export { Skeleton };


================================================
FILE: studio/frontend/src/components/ui/slider.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Slider as SliderPrimitive } from "radix-ui";
import * as React from "react";

import { cn } from "@/lib/utils";

const THUMB_SIZE_PX = 16;

function getThumbInBoundsOffset(width: number, percent: number) {
  const halfWidth = width / 2;
  const halfPercent = 50;

  if (percent <= 0) return halfWidth;
  if (percent >= 100) return -halfWidth;

  return halfWidth - (percent / halfPercent) * halfWidth;
}

function Slider({
  className,
  defaultValue,
  value,
  min = 0,
  max = 100,
  orientation = "horizontal",
  onValueChange,
  ...props
}: React.ComponentProps<typeof SliderPrimitive.Root>) {
  const isControlled = Array.isArray(value);
  const [uncontrolledValues, setUncontrolledValues] =
    React.useState<number[]>(() =>
      Array.isArray(defaultValue) ? defaultValue : [min, max],
    );

  const values = isControlled ? value : uncontrolledValues;
  const handleValueChange = React.useCallback(
    (nextValues: number[]) => {
      if (!isControlled) {
        setUncontrolledValues(nextValues);
      }
      onValueChange?.(nextValues);
    },
    [isControlled, onValueChange],
  );
  const isSingleThumbHorizontal =
    values.length === 1 && orientation === "horizontal";
  const fillPercent = isSingleThumbHorizontal
    ? Math.min(
        100,
        Math.max(
          0,
          max === min ? 0 : (((values[0] ?? min) - min) / (max - min)) * 100,
        ),
      )
    : null;
  const fillWidth =
    fillPercent === null
      ? undefined
      : fillPercent <= 0
        ? "0%"
        : `calc(${fillPercent}% + ${getThumbInBoundsOffset(THUMB_SIZE_PX, fillPercent)}px)`;

  return (
    <SliderPrimitive.Root
      data-slot="slider"
      defaultValue={defaultValue}
      value={value}
      min={min}
      max={max}
      orientation={orientation}
      onValueChange={handleValueChange}
      className={cn(
        "data-vertical:min-h-40 relative flex w-full touch-none items-center select-none data-disabled:opacity-50 data-vertical:h-full data-vertical:w-auto data-vertical:flex-col",
        className,
      )}
      {...props}
    >
      <SliderPrimitive.Track
        data-slot="slider-track"
        className="bg-muted rounded-4xl data-horizontal:h-3 data-horizontal:w-full data-vertical:h-full data-vertical:w-3 bg-muted relative grow overflow-hidden data-horizontal:w-full data-vertical:h-full cursor-pointer"
      >
        <SliderPrimitive.Range
          data-slot="slider-range"
          className={cn(
            "bg-primary absolute select-none data-horizontal:h-full data-vertical:w-full",
            isSingleThumbHorizontal && "opacity-0",
          )}
        />
        {isSingleThumbHorizontal && (
          <div
            aria-hidden={true}
            className={cn(
              "absolute inset-y-0 left-0 bg-primary pointer-events-none",
              fillPercent === 100 ? "rounded-4xl" : "rounded-l-4xl",
            )}
            style={{ width: fillWidth }}
          />
        )}
      </SliderPrimitive.Track>
      {Array.from({ length: values.length }, (_, index) => (
        <SliderPrimitive.Thumb
          data-slot="slider-thumb"
          key={index}
          className="border-primary ring-ring/50 relative z-10 size-4 rounded-4xl border bg-white shadow-sm block shrink-0 select-none cursor-pointer disabled:pointer-events-none disabled:opacity-50 transition-transform duration-100 ease-out hover:scale-110 hover:ring-4 active:scale-95 focus-visible:ring-4 focus-visible:outline-hidden"
        />
      ))}
    </SliderPrimitive.Root>
  );
}

export { Slider };


================================================
FILE: studio/frontend/src/components/ui/sonner.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  Alert02Icon,
  CheckmarkCircle02Icon,
  InformationCircleIcon,
  Loading03Icon,
  MultiplicationSignCircleIcon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { useTheme } from "next-themes";
import { Toaster as Sonner, type ToasterProps } from "sonner";

const Toaster = ({ ...props }: ToasterProps) => {
  const { theme = "system" } = useTheme();

  return (
    <Sonner
      theme={theme as ToasterProps["theme"]}
      className="toaster group"
      duration={5000}
      icons={{
        success: (
          <HugeiconsIcon
            icon={CheckmarkCircle02Icon}
            strokeWidth={2}
            className="size-4"
          />
        ),
        info: (
          <HugeiconsIcon
            icon={InformationCircleIcon}
            strokeWidth={2}
            className="size-4"
          />
        ),
        warning: (
          <HugeiconsIcon
            icon={Alert02Icon}
            strokeWidth={2}
            className="size-4"
          />
        ),
        error: (
          <HugeiconsIcon
            icon={MultiplicationSignCircleIcon}
            strokeWidth={2}
            className="size-4"
          />
        ),
        loading: (
          <HugeiconsIcon
            icon={Loading03Icon}
            strokeWidth={2}
            className="size-4 animate-spin"
          />
        ),
      }}
      style={
        {
          "--normal-bg": "var(--popover)",
          "--normal-text": "var(--popover-foreground)",
          "--normal-border": "var(--border)",
          "--border-radius": "var(--radius)",
        } as React.CSSProperties
      }
      toastOptions={{
        classNames: {
          toast: "cn-toast",
          description: "!text-muted-foreground",
          closeButton: "!top-3 !right-3 !translate-y-0",
        },
      }}
      {...props}
    />
  );
};

export { Toaster };


================================================
FILE: studio/frontend/src/components/ui/sparkles-text.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { motion } from "motion/react";
import {
  type CSSProperties,
  type ReactElement,
  type ReactNode,
  useEffect,
  useState,
} from "react";

import { cn } from "@/lib/utils";

type SparkleItem = {
  id: string;
  x: string;
  y: string;
  color: string;
  delay: number;
  scale: number;
  lifespan: number;
};

function Sparkle({ id, x, y, color, delay, scale }: SparkleItem): ReactElement {
  return (
    <motion.svg
      key={id}
      className="pointer-events-none absolute z-20"
      initial={{ opacity: 0, left: x, top: y }}
      animate={{
        opacity: [0, 1, 0],
        scale: [0, scale, 0],
        rotate: [75, 120, 150],
      }}
      transition={{ duration: 0.8, repeat: Number.POSITIVE_INFINITY, delay }}
      width="21"
      height="21"
      viewBox="0 0 21 21"
    >
      <title>Sparkle</title>
      <path
        d="M9.82531 0.843845C10.0553 0.215178 10.9446 0.215178 11.1746 0.843845L11.8618 2.72026C12.4006 4.19229 12.3916 6.39157 13.5 7.5C14.6084 8.60843 16.8077 8.59935 18.2797 9.13822L20.1561 9.82534C20.7858 10.0553 20.7858 10.9447 20.1561 11.1747L18.2797 11.8618C16.8077 12.4007 14.6084 12.3916 13.5 13.5C12.3916 14.6084 12.4006 16.8077 11.8618 18.2798L11.1746 20.1562C10.9446 20.7858 10.0553 20.7858 9.82531 20.1562L9.13819 18.2798C8.59932 16.8077 8.60843 14.6084 7.5 13.5C6.39157 12.3916 4.19225 12.4007 2.72023 11.8618L0.843814 11.1747C0.215148 10.9447 0.215148 10.0553 0.843814 9.82534L2.72023 9.13822C4.19225 8.59935 6.39157 8.60843 7.5 7.5C8.60843 6.39157 8.59932 4.19229 9.13819 2.72026L9.82531 0.843845Z"
        fill={color}
      />
    </motion.svg>
  );
}

interface SparklesTextProps {
  /**
   * @default <div />
   * @type ReactElement
   * @description
   * The component to be rendered as the text
   * */
  as?: ReactElement;

  /**
   * @default ""
   * @type string
   * @description
   * The className of the text
   */
  className?: string;

  /**
   * @required
   * @type ReactNode
   * @description
   * The content to be displayed
   * */
  children: ReactNode;

  /**
   * @default 10
   * @type number
   * @description
   * The count of sparkles
   * */
  sparklesCount?: number;

  /**
   * @default "{first: '#9E7AFF', second: '#FE8BBB'}"
   * @type string
   * @description
   * The colors of the sparkles
   * */
  colors?: {
    first: string;
    second: string;
  };
}

export function SparklesText({
  children,
  colors = { first: "#9E7AFF", second: "#FE8BBB" },
  className,
  sparklesCount = 10,
  ...props
}: SparklesTextProps): ReactElement {
  const [sparkles, setSparkles] = useState<SparkleItem[]>([]);

  useEffect(() => {
    const generateStar = (): SparkleItem => {
      const starX = `${Math.random() * 100}%`;
      const starY = `${Math.random() * 100}%`;
      const color = Math.random() > 0.5 ? colors.first : colors.second;
      const delay = Math.random() * 2;
      const scale = Math.random() * 1 + 0.3;
      const lifespan = Math.random() * 10 + 5;
      const id = `${starX}-${starY}-${Date.now()}`;
      return { id, x: starX, y: starY, color, delay, scale, lifespan };
    };

    const initializeStars = () => {
      const newSparkles = Array.from({ length: sparklesCount }, generateStar);
      setSparkles(newSparkles);
    };

    const updateStars = () => {
      setSparkles((currentSparkles) =>
        currentSparkles.map((star) => {
          if (star.lifespan <= 0) {
            return generateStar();
          }
          return { ...star, lifespan: star.lifespan - 0.1 };
        }),
      );
    };

    initializeStars();
    const interval = setInterval(updateStars, 100);

    return () => clearInterval(interval);
  }, [colors.first, colors.second, sparklesCount]);

  return (
    <div
      className={cn("text-6xl font-bold", className)}
      {...props}
      style={
        {
          "--sparkles-first-color": `${colors.first}`,
          "--sparkles-second-color": `${colors.second}`,
        } as CSSProperties
      }
    >
      <span className="relative inline-block">
        {sparkles.map((sparkle) => (
          <Sparkle key={sparkle.id} {...sparkle} />
        ))}
        <strong>{children}</strong>
      </span>
    </div>
  );
}


================================================
FILE: studio/frontend/src/components/ui/spinner.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { cn } from "@/lib/utils"
import { HugeiconsIcon } from "@hugeicons/react"
import { Loading03Icon } from "@hugeicons/core-free-icons"

function Spinner({ className }: { className?: string }) {
  return (
    <HugeiconsIcon icon={Loading03Icon} strokeWidth={2} role="status" aria-label="Loading" className={cn("size-4 animate-spin", className)} />
  )
}

export { Spinner }


================================================
FILE: studio/frontend/src/components/ui/switch.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Switch as SwitchPrimitive } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";

function Switch({
  className,
  size = "default",
  ...props
}: React.ComponentProps<typeof SwitchPrimitive.Root> & {
  size?: "sm" | "default";
}) {
  return (
    <SwitchPrimitive.Root
      data-slot="switch"
      data-size={size}
      className={cn(
        "data-checked:bg-primary data-unchecked:bg-input focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 dark:data-unchecked:bg-input/80 shrink-0 rounded-full border border-transparent focus-visible:ring-[3px] aria-invalid:ring-[3px] data-[size=default]:h-[18.4px] data-[size=default]:w-[32px] data-[size=sm]:h-[14px] data-[size=sm]:w-[24px] peer group/switch relative inline-flex items-center transition-all outline-none after:absolute after:-inset-x-3 after:-inset-y-2 data-disabled:cursor-not-allowed data-disabled:opacity-50",
        className,
      )}
      {...props}
    >
      <SwitchPrimitive.Thumb
        data-slot="switch-thumb"
        className="bg-background dark:data-unchecked:bg-foreground dark:data-checked:bg-primary-foreground rounded-full group-data-[size=default]/switch:size-4 group-data-[size=sm]/switch:size-3 group-data-[size=default]/switch:data-checked:translate-x-[calc(100%-2px)] group-data-[size=sm]/switch:data-checked:translate-x-[calc(100%-2px)] group-data-[size=default]/switch:data-unchecked:translate-x-0 group-data-[size=sm]/switch:data-unchecked:translate-x-0 pointer-events-none block ring-0 transition-transform"
      />
    </SwitchPrimitive.Root>
  );
}

export { Switch };


================================================
FILE: studio/frontend/src/components/ui/table.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type * as React from "react";

import { cn } from "@/lib/utils";

function Table({ className, ...props }: React.ComponentProps<"table">) {
  return (
    <div
      data-slot="table-container"
      className="relative w-full overflow-x-auto"
    >
      <table
        data-slot="table"
        className={cn("w-full caption-bottom text-sm", className)}
        {...props}
      />
    </div>
  );
}

function TableHeader({ className, ...props }: React.ComponentProps<"thead">) {
  return (
    <thead
      data-slot="table-header"
      className={cn("[&_tr]:border-b", className)}
      {...props}
    />
  );
}

function TableBody({ className, ...props }: React.ComponentProps<"tbody">) {
  return (
    <tbody
      data-slot="table-body"
      className={cn("[&_tr:last-child]:border-0", className)}
      {...props}
    />
  );
}

function TableFooter({ className, ...props }: React.ComponentProps<"tfoot">) {
  return (
    <tfoot
      data-slot="table-footer"
      className={cn(
        "bg-muted/50 border-t font-medium [&>tr]:last:border-b-0",
        className,
      )}
      {...props}
    />
  );
}

function TableRow({ className, ...props }: React.ComponentProps<"tr">) {
  return (
    <tr
      data-slot="table-row"
      className={cn(
        "hover:bg-muted/50 data-[state=selected]:bg-muted border-b transition-colors",
        className,
      )}
      {...props}
    />
  );
}

function TableHead({ className, ...props }: React.ComponentProps<"th">) {
  return (
    <th
      data-slot="table-head"
      className={cn(
        "text-foreground h-12 px-3 text-left align-middle font-medium whitespace-nowrap [&:has([role=checkbox])]:pr-0",
        className,
      )}
      {...props}
    />
  );
}

function TableCell({ className, ...props }: React.ComponentProps<"td">) {
  return (
    <td
      data-slot="table-cell"
      className={cn(
        "p-3 align-middle whitespace-nowrap [&:has([role=checkbox])]:pr-0",
        className,
      )}
      {...props}
    />
  );
}

function TableCaption({
  className,
  ...props
}: React.ComponentProps<"caption">) {
  return (
    <caption
      data-slot="table-caption"
      className={cn("text-muted-foreground mt-4 text-sm", className)}
      {...props}
    />
  );
}

export {
  Table,
  TableHeader,
  TableBody,
  TableFooter,
  TableHead,
  TableRow,
  TableCell,
  TableCaption,
};


================================================
FILE: studio/frontend/src/components/ui/tabs.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

/* eslint-disable react-refresh/only-export-components */

import { type VariantProps, cva } from "class-variance-authority";
import { motion } from "motion/react";
import { Tabs as TabsPrimitive } from "radix-ui";
import * as React from "react";

import { cn } from "@/lib/utils";

const TabsContext = React.createContext<{ value?: string; id: string }>({
  id: "",
});

export function Tabs({
  className,
  orientation = "horizontal",
  value,
  defaultValue,
  onValueChange,
  ...props
}: React.ComponentProps<typeof TabsPrimitive.Root>): React.ReactElement {
  const [internal, setInternal] = React.useState(defaultValue ?? "");
  const current = value ?? internal;
  const id = React.useId();

  return (
    <TabsContext.Provider value={{ value: current, id }}>
      <TabsPrimitive.Root
        data-slot="tabs"
        data-orientation={orientation}
        value={current}
        onValueChange={(v) => {
          setInternal(v);
          onValueChange?.(v);
        }}
        className={cn(
          "gap-2 group/tabs flex data-[orientation=horizontal]:flex-col",
          className,
        )}
        {...props}
      />
    </TabsContext.Provider>
  );
}

export const tabsListVariants = cva(
  "rounded-4xl p-[3px]  group-data-horizontal/tabs:h-9 group-data-vertical/tabs:rounded-2xl data-[variant=line]:rounded-none group/tabs-list text-muted-foreground inline-flex w-fit items-center justify-center group-data-[orientation=vertical]/tabs:h-fit group-data-[orientation=vertical]/tabs:flex-col",
  {
    variants: {
      variant: {
        default: "bg-muted",
        line: "gap-1 bg-transparent",
      },
    },
    defaultVariants: {
      variant: "default",
    },
  },
);

export function TabsList({
  className,
  variant = "default",
  ...props
}: React.ComponentProps<typeof TabsPrimitive.List> &
  VariantProps<typeof tabsListVariants>): React.ReactElement {
  return (
    <TabsPrimitive.List
      data-slot="tabs-list"
      data-variant={variant}
      className={cn(tabsListVariants({ variant }), className)}
      {...props}
    />
  );
}

export function TabsTrigger({
  className,
  value,
  children,
  ...props
}: React.ComponentProps<typeof TabsPrimitive.Trigger>): React.ReactElement {
  const ctx = React.useContext(TabsContext);
  const isActive = ctx.value === value;

  return (
    <TabsPrimitive.Trigger
      data-slot="tabs-trigger"
      value={value}
      className={cn(
        "gap-1.5 rounded-xl corner-squircle border border-transparent px-2 py-1 text-sm font-medium group-data-vertical/tabs:px-2.5 group-data-vertical/tabs:py-1.5 [&_svg:not([class*='size-'])]:size-4 focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:outline-ring text-foreground/60 hover:text-foreground dark:text-muted-foreground dark:hover:text-foreground relative inline-flex h-[calc(100%-1px)] flex-1 items-center justify-center whitespace-nowrap transition-colors group-data-[orientation=vertical]/tabs:w-full group-data-[orientation=vertical]/tabs:justify-start focus-visible:ring-[3px] focus-visible:outline-1 disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
        "group-data-[variant=line]/tabs-list:bg-transparent group-data-[variant=line]/tabs-list:data-active:bg-transparent dark:group-data-[variant=line]/tabs-list:data-active:border-transparent dark:group-data-[variant=line]/tabs-list:data-active:bg-transparent",
        "data-active:text-foreground dark:data-active:text-foreground",
        "after:bg-foreground after:absolute after:opacity-0 after:transition-opacity group-data-[orientation=horizontal]/tabs:after:inset-x-0 group-data-[orientation=horizontal]/tabs:after:bottom-[-5px] group-data-[orientation=horizontal]/tabs:after:h-0.5 group-data-[orientation=vertical]/tabs:after:inset-y-0 group-data-[orientation=vertical]/tabs:after:-right-1 group-data-[orientation=vertical]/tabs:after:w-0.5 group-data-[variant=line]/tabs-list:data-active:after:opacity-100",
        className,
      )}
      {...props}
    >
      {isActive && (
        <motion.span
          layoutId={`tab-bg-${ctx.id}`}
          className="absolute inset-0 rounded-xl bg-background dark:bg-input/30 dark:border dark:border-input"
          transition={{
            type: "spring",
            stiffness: 500,
            damping: 35,
            mass: 0.5,
          }}
        />
      )}
      <span className="relative z-10">{children}</span>
    </TabsPrimitive.Trigger>
  );
}

export function TabsContent({
  className,
  ...props
}: React.ComponentProps<typeof TabsPrimitive.Content>): React.ReactElement {
  return (
    <TabsPrimitive.Content
      data-slot="tabs-content"
      className={cn("text-sm flex-1 outline-none", className)}
      {...props}
    />
  );
}


================================================
FILE: studio/frontend/src/components/ui/terminal.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { cn } from "@/lib/utils"
import {
  Children,
  cloneElement,
  isValidElement,
  useEffect,
  useRef,
  useState,
} from "react"
import type { ElementType, ReactElement, ReactNode } from "react"

type TerminalProps = {
  children: ReactNode
  className?: string
  sequence?: boolean
  startOnView?: boolean
}

type InternalLineProps = {
  __isActive?: boolean
  __onDone?: () => void
  __sequence?: boolean
}

function useStartOnView(enabled: boolean): {
  ref: React.RefObject<HTMLDivElement | null>
  started: boolean
} {
  const ref = useRef<HTMLDivElement | null>(null)
  const [isInView, setIsInView] = useState(false)
  const started = !enabled || isInView

  useEffect(() => {
    if (!enabled) {
      return
    }

    const node = ref.current
    if (!node) {
      return
    }

    const observer = new IntersectionObserver(
      ([entry]) => {
        if (entry?.isIntersecting) {
          setIsInView(true)
          observer.disconnect()
        }
      },
      { threshold: 0.2 }
    )

    observer.observe(node)
    return () => observer.disconnect()
  }, [enabled])

  return { ref, started }
}

export function Terminal({
  children,
  className,
  sequence = true,
  startOnView = true,
}: TerminalProps): ReactElement {
  const { ref, started } = useStartOnView(startOnView)
  const childElements = Children.toArray(children).filter(isValidElement)
  const [activeIndex, setActiveIndex] = useState(0)
  const visibleIndex = sequence
    ? started
      ? activeIndex
      : -1
    : Number.MAX_SAFE_INTEGER

  function handleLineDone(index: number): void {
    if (!sequence) {
      return
    }

    setActiveIndex((prev) => {
      if (prev !== index) {
        return prev
      }
      return Math.min(index + 1, childElements.length)
    })
  }

  return (
    <div
      ref={ref}
      className={cn(
        "w-full rounded-2xl border border-border bg-card px-6 py-5 font-mono text-sm text-foreground shadow-2xl",
        className
      )}
    >
      {childElements.map((child, index) =>
        cloneElement(child, {
          __sequence: sequence,
          __isActive: !sequence || visibleIndex >= index,
          __onDone: () => handleLineDone(index),
          key: child.key ?? index,
        } as InternalLineProps)
      )}
    </div>
  )
}

type AnimatedSpanProps = InternalLineProps & {
  children: ReactNode
  className?: string
  delay?: number
  startOnView?: boolean
}

export function AnimatedSpan({
  children,
  className,
  delay = 0,
  startOnView = false,
  __isActive,
  __sequence,
  __onDone,
}: AnimatedSpanProps): ReactElement {
  const { ref, started } = useStartOnView(startOnView)
  const [visible, setVisible] = useState(false)
  const doneRef = useRef(false)
  const onDoneRef = useRef(__onDone)
  const shouldStart = __sequence ? __isActive : started

  useEffect(() => {
    onDoneRef.current = __onDone
  }, [__onDone])

  useEffect(() => {
    if (!shouldStart || doneRef.current) {
      return
    }

    const timeout = window.setTimeout(() => {
      setVisible(true)
      doneRef.current = true
      onDoneRef.current?.()
    }, delay)

    return () => window.clearTimeout(timeout)
  }, [delay, shouldStart])

  return (
    <div
      ref={ref}
      className={cn(
        "min-h-5 transition-opacity duration-300",
        visible ? "opacity-100" : "opacity-0",
        className
      )}
    >
      {children}
    </div>
  )
}

type TypingAnimationProps = InternalLineProps & {
  children: string
  className?: string
  duration?: number
  delay?: number
  as?: ElementType
  startOnView?: boolean
}

export function TypingAnimation({
  children,
  className,
  duration = 60,
  delay = 0,
  as: Component = "span",
  startOnView = true,
  __isActive,
  __sequence,
  __onDone,
}: TypingAnimationProps): ReactElement {
  const { ref, started } = useStartOnView(startOnView)
  const [typed, setTyped] = useState("")
  const doneRef = useRef(false)
  const onDoneRef = useRef(__onDone)
  const shouldStart = __sequence ? __isActive : started

  useEffect(() => {
    onDoneRef.current = __onDone
  }, [__onDone])

  useEffect(() => {
    if (!shouldStart || doneRef.current) {
      return
    }

    let index = 0
    let intervalId: number | null = null
    const startTimer = window.setTimeout(() => {
      intervalId = window.setInterval(() => {
        index += 1
        setTyped(children.slice(0, index))

        if (index >= children.length) {
          if (intervalId) {
            window.clearInterval(intervalId)
          }
          doneRef.current = true
          onDoneRef.current?.()
        }
      }, duration)
    }, delay)

    return () => {
      window.clearTimeout(startTimer)
      if (intervalId) {
        window.clearInterval(intervalId)
      }
    }
  }, [children, delay, duration, shouldStart])

  return (
    <div ref={ref} className="min-h-5">
      <Component className={cn("whitespace-pre-wrap", className)}>{typed}</Component>
    </div>
  )
}


================================================
FILE: studio/frontend/src/components/ui/textarea.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type * as React from "react";

import { cn } from "@/lib/utils";

function Textarea({ className, ...props }: React.ComponentProps<"textarea">) {
  return (
    <textarea
      data-slot="textarea"
      className={cn(
        "border-input bg-input/30 focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:aria-invalid:border-destructive/50 resize-none rounded-xl border px-3 py-3 text-base transition-colors focus-visible:ring-[3px] aria-invalid:ring-[3px] md:text-sm placeholder:text-muted-foreground flex field-sizing-content min-h-16 w-full outline-none disabled:cursor-not-allowed disabled:opacity-50",
        className,
      )}
      {...props}
    />
  );
}

export { Textarea };


================================================
FILE: studio/frontend/src/components/ui/toggle-group.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

import type { VariantProps } from "class-variance-authority";
import { ToggleGroup as ToggleGroupPrimitive } from "radix-ui";
import * as React from "react";

import { toggleVariants } from "@/components/ui/toggle";
import { cn } from "@/lib/utils";

const ToggleGroupContext = React.createContext<
  VariantProps<typeof toggleVariants> & {
    spacing?: number;
    orientation?: "horizontal" | "vertical";
  }
>({
  size: "default",
  variant: "default",
  spacing: 0,
  orientation: "horizontal",
});

function ToggleGroup({
  className,
  variant,
  size,
  spacing = 0,
  orientation = "horizontal",
  children,
  ...props
}: React.ComponentProps<typeof ToggleGroupPrimitive.Root> &
  VariantProps<typeof toggleVariants> & {
    spacing?: number;
    orientation?: "horizontal" | "vertical";
  }) {
  return (
    <ToggleGroupPrimitive.Root
      data-slot="toggle-group"
      data-variant={variant}
      data-size={size}
      data-spacing={spacing}
      data-orientation={orientation}
      style={{ "--gap": spacing } as React.CSSProperties}
      className={cn(
        "data-[spacing=0]:data-[variant=outline]:rounded-4xl group/toggle-group flex w-fit flex-row items-center gap-[--spacing(var(--gap))] data-[orientation=vertical]:flex-col data-[orientation=vertical]:items-stretch",
        className,
      )}
      {...props}
    >
      <ToggleGroupContext.Provider
        value={{ variant, size, spacing, orientation }}
      >
        {children}
      </ToggleGroupContext.Provider>
    </ToggleGroupPrimitive.Root>
  );
}

function ToggleGroupItem({
  className,
  children,
  variant = "default",
  size = "default",
  ...props
}: React.ComponentProps<typeof ToggleGroupPrimitive.Item> &
  VariantProps<typeof toggleVariants>) {
  const context = React.useContext(ToggleGroupContext);

  return (
    <ToggleGroupPrimitive.Item
      data-slot="toggle-group-item"
      data-variant={context.variant || variant}
      data-size={context.size || size}
      data-spacing={context.spacing}
      className={cn(
        "data-[state=on]:bg-muted group-data-[spacing=0]/toggle-group:rounded-none group-data-[spacing=0]/toggle-group:px-3 group-data-[spacing=0]/toggle-group:shadow-none group-data-horizontal/toggle-group:data-[spacing=0]:first:rounded-l-4xl group-data-vertical/toggle-group:data-[spacing=0]:first:rounded-t-xl group-data-horizontal/toggle-group:data-[spacing=0]:last:rounded-r-4xl group-data-vertical/toggle-group:data-[spacing=0]:last:rounded-b-xl shrink-0 focus:z-10 focus-visible:z-10 group-data-horizontal/toggle-group:data-[spacing=0]:data-[variant=outline]:border-l-0 group-data-vertical/toggle-group:data-[spacing=0]:data-[variant=outline]:border-t-0 group-data-horizontal/toggle-group:data-[spacing=0]:data-[variant=outline]:first:border-l group-data-vertical/toggle-group:data-[spacing=0]:data-[variant=outline]:first:border-t",
        toggleVariants({
          variant: context.variant || variant,
          size: context.size || size,
        }),
        className,
      )}
      {...props}
    >
      {children}
    </ToggleGroupPrimitive.Item>
  );
}

export { ToggleGroup, ToggleGroupItem };


================================================
FILE: studio/frontend/src/components/ui/toggle.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"use client";

/* eslint-disable react-refresh/only-export-components */

import { type VariantProps, cva } from "class-variance-authority";
import { Toggle as TogglePrimitive } from "radix-ui";
import type * as React from "react";

import { cn } from "@/lib/utils";

export const toggleVariants = cva(
  "hover:text-foreground aria-pressed:bg-muted focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive gap-1 rounded-4xl text-sm font-medium transition-colors [&_svg:not([class*='size-'])]:size-4 group/toggle hover:bg-muted inline-flex items-center justify-center whitespace-nowrap outline-none focus-visible:ring-[3px] disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
  {
    variants: {
      variant: {
        default: "bg-transparent",
        outline: "border-input hover:bg-muted border bg-transparent",
      },
      size: {
        default: "h-9 min-w-9 rounded-[min(var(--radius-2xl),12px)] px-2.5",
        sm: "h-8 min-w-8 px-3",
        lg: "h-10 min-w-10 px-2.5",
      },
    },
    defaultVariants: {
      variant: "default",
      size: "default",
    },
  },
);

export function Toggle({
  className,
  variant = "default",
  size = "default",
  ...props
}: React.ComponentProps<typeof TogglePrimitive.Root> &
  VariantProps<typeof toggleVariants>): React.ReactElement {
  return (
    <TogglePrimitive.Root
      data-slot="toggle"
      className={cn(toggleVariants({ variant, size, className }))}
      {...props}
    />
  );
}


================================================
FILE: studio/frontend/src/components/ui/tooltip.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Tooltip as TooltipPrimitive } from "radix-ui";
import { createContext, useCallback, useContext, useState } from "react";
import type * as React from "react";

import { cn } from "@/lib/utils";

type ToggleFn = () => void;
const TooltipToggleCtx = createContext<ToggleFn | null>(null);

function TooltipProvider({
  delayDuration = 400,
  ...props
}: React.ComponentProps<typeof TooltipPrimitive.Provider>) {
  return (
    <TooltipPrimitive.Provider
      data-slot="tooltip-provider"
      delayDuration={delayDuration}
      {...props}
    />
  );
}

function Tooltip({
  open: controlledOpen,
  onOpenChange: controlledOnOpenChange,
  ...props
}: React.ComponentProps<typeof TooltipPrimitive.Root>) {
  const isControlled = controlledOpen !== undefined;
  const [clickOpen, setClickOpen] = useState(false);

  const onOpenChange = useCallback(
    (nextOpen: boolean) => {
      if (!nextOpen) setClickOpen(false);
      controlledOnOpenChange?.(nextOpen);
    },
    [controlledOnOpenChange],
  );

  const toggle = useCallback(() => {
    setClickOpen((prev) => !prev);
  }, []);

  return (
    <TooltipProvider>
      <TooltipToggleCtx.Provider value={toggle}>
        <TooltipPrimitive.Root
          data-slot="tooltip"
          open={isControlled ? controlledOpen : clickOpen || undefined}
          onOpenChange={onOpenChange}
          {...props}
        />
      </TooltipToggleCtx.Provider>
    </TooltipProvider>
  );
}

function TooltipTrigger({
  onClick,
  ...props
}: React.ComponentProps<typeof TooltipPrimitive.Trigger>) {
  const toggle = useContext(TooltipToggleCtx);

  const handleClick = useCallback(
    (e: React.MouseEvent<HTMLButtonElement>) => {
      e.preventDefault();
      toggle?.();
      onClick?.(e);
    },
    [toggle, onClick],
  );

  return (
    <TooltipPrimitive.Trigger
      data-slot="tooltip-trigger"
      onClick={handleClick}
      {...props}
    />
  );
}

function TooltipContent({
  className,
  sideOffset = 0,
  children,
  ...props
}: React.ComponentProps<typeof TooltipPrimitive.Content>) {
  return (
    <TooltipPrimitive.Portal>
      <TooltipPrimitive.Content
        data-slot="tooltip-content"
        sideOffset={sideOffset}
        className={cn(
          "data-open:animate-in data-open:fade-in-0 data-open:zoom-in-95 data-[state=delayed-open]:animate-in data-[state=delayed-open]:fade-in-0 data-[state=delayed-open]:zoom-in-95 data-closed:animate-out data-closed:fade-out-0 data-closed:zoom-out-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 rounded-2xl corner-squircle px-3 py-1.5 text-xs **:data-[slot=kbd]:rounded-4xl bg-foreground text-background border border-foreground/40 shadow-lg z-[999999] w-fit max-w-xs origin-(--radix-tooltip-content-transform-origin)",
          className,
        )}
        {...props}
      >
        {children}
        <TooltipPrimitive.Arrow className="size-2.5 translate-y-[calc(-50%_-_2px)] rotate-45 rounded-[2px] data-[side=left]:translate-x-[-1.5px] data-[side=right]:translate-x-[1.5px] bg-foreground fill-foreground z-[999999] translate-y-[calc(-50%_-_2px)]" />
      </TooltipPrimitive.Content>
    </TooltipPrimitive.Portal>
  );
}

export { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger };


================================================
FILE: studio/frontend/src/config/env.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { create } from "zustand";

export const env = {
  MODE: import.meta.env.MODE,
  DEV: import.meta.env.DEV,
  PROD: import.meta.env.PROD,
  BASE_URL: import.meta.env.BASE_URL,
} as const;

// ── Platform / device type ──────────────────────────────────

export type DeviceType = "mac" | "windows" | "linux" | string;

interface PlatformState {
  deviceType: DeviceType;
  chatOnly: boolean;
  fetched: boolean;
  isChatOnly: () => boolean;
}

export const usePlatformStore = create<PlatformState>()((_, get) => ({
  deviceType: "linux",
  chatOnly: false,
  fetched: false,
  isChatOnly: () => get().chatOnly,
}));

export async function fetchDeviceType(): Promise<DeviceType> {
  const { fetched } = usePlatformStore.getState();
  if (fetched) return usePlatformStore.getState().deviceType;

  try {
    const res = await fetch("/api/health");
    if (res.ok) {
      const data = (await res.json()) as { device_type?: string; chat_only?: boolean };
      const deviceType = data.device_type ?? "linux";
      const chatOnly = data.chat_only ?? deviceType === "mac";
      usePlatformStore.setState({ deviceType, chatOnly, fetched: true });
      return deviceType;
    }
  } catch (err) {
    console.warn("[platform] Failed to fetch device type, will retry", err);
  }

  return usePlatformStore.getState().deviceType;
}


================================================
FILE: studio/frontend/src/config/training.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ModelType, StepConfig } from "@/types/training";
import type { PipelineType } from "@huggingface/hub";

export const STEPS: StepConfig[] = [
  {
    number: 1,
    title: "Model Type",
    subtitle: "Select type",
    description: "Choose the type of model you want to fine-tune",
  },
  {
    number: 2,
    title: "Model",
    subtitle: "Select model",
    description: "Choose a base model and training method",
  },
  {
    number: 3,
    title: "Dataset",
    subtitle: "Add dataset",
    description: "Select or upload a training dataset",
  },
  {
    number: 4,
    title: "Parameters",
    subtitle: "Configure",
    description: "Fine-tune your training hyperparameters",
  },
  {
    number: 5,
    title: "Summary",
    subtitle: "Review",
    description: "Review your configuration before starting",
  },
];

export const MODEL_TYPES: ReadonlyArray<{
  value: ModelType;
  label: string;
  description: string;
}> = [
  {
    value: "text",
    label: "Text",
    description: "Language models",
  },
    {
      value: "vision",
      label: "Vision",
      description: "Image understanding models",
    },
    {
      value: "audio",
      label: "Audio",
      description: "Audio and speech models",
    },
    {
      value: "embeddings",
      label: "Embeddings",
      description: "Text embedding models",
    },
  ];

export const CONTEXT_LENGTHS = [512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144];

export const TARGET_MODULES = [
  "q_proj",
  "k_proj",
  "v_proj",
  "o_proj",
  "gate_proj",
  "up_proj",
  "down_proj",
];

export const OPTIMIZER_OPTIONS: ReadonlyArray<{ value: string; label: string }> = [
  { value: "adamw_8bit", label: "AdamW 8-bit" },
  { value: "paged_adamw_8bit", label: "Paged AdamW 8-bit" },
  { value: "adamw_bnb_8bit", label: "AdamW BNB 8-bit" },
  { value: "paged_adamw_32bit", label: "Paged AdamW 32-bit" },
  { value: "adamw_torch", label: "AdamW (PyTorch)" },
  { value: "adamw_torch_fused", label: "AdamW (PyTorch Fused)" },
];

export const LR_SCHEDULER_OPTIONS: ReadonlyArray<{ value: string; label: string }> = [
  { value: "linear", label: "Linear" },
  { value: "cosine", label: "Cosine" },
];

export const DEFAULT_HYPERPARAMS = {
  epochs: 3,
  contextLength: 2048,
  learningRate: 2e-4,
  optimizerType: "adamw_8bit",
  lrSchedulerType: "linear",
  loraRank: 16,
  loraAlpha: 32,
  loraDropout: 0.05,
  loraVariant: "lora" as const,
  batchSize: 4,
  gradientAccumulation: 8,
  weightDecay: 0.01,
  warmupSteps: 5,
  maxSteps: 60,
  saveSteps: 0,
  evalSteps: 0.00,
  packing: false,
  trainOnCompletions: false,
  gradientCheckpointing: "unsloth" as const,
  randomSeed: 3407,
  enableWandb: false,
  wandbToken: "",
  wandbProject: "llm-finetuning",
  enableTensorboard: false,
  tensorboardDir: "runs",
  logFrequency: 10,
  trustRemoteCode: false,
  finetuneVisionLayers: true,
  finetuneLanguageLayers: true,
  finetuneAttentionModules: true,
  finetuneMLPModules: true,
  targetModules: TARGET_MODULES,
};

export const MODEL_TYPE_TO_HF_TASK: Record<ModelType, PipelineType> = {
  text: "text-generation",
  vision: "image-text-to-text",
  audio: "text-to-speech",
  embeddings: "feature-extraction",
};


export const PRIORITY_TRAINING_MODELS: readonly string[] = [
  "unsloth/Qwen3.5-2B",
  "unsloth/Qwen3.5-9B",
  "unsloth/gpt-oss-20b",
  "unsloth/NVIDIA-Nemotron-3-Nano-4B",
  "unsloth/Qwen3-0.6B",
  "unsloth/gemma-3-4b-it",
  "unsloth/embeddinggemma-300m",
  "unsloth/orpheus-3b-0.1-ft",
  "unsloth/Llama-3.1-8B-Instruct",
  "unsloth/Llama-3.2-3B-Instruct",
];

/** Pin priority models to the top of a list of model IDs, preserving their defined order. */
export function applyPriorityOrdering(ids: string[]): string[] {
  const idSet = new Set(ids);
  const pinned = PRIORITY_TRAINING_MODELS.filter((id) => idSet.has(id));
  const pinnedSet = new Set(pinned);
  const rest = ids.filter((id) => !pinnedSet.has(id));
  return [...pinned, ...rest];
}


================================================
FILE: studio/frontend/src/features/auth/api.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  clearAuthTokens,
  getAuthToken,
  getRefreshToken,
  mustChangePassword,
  storeAuthTokens,
} from "./session";

type RefreshResponse = {
  access_token: string;
  refresh_token: string;
  must_change_password: boolean;
};

let isRedirecting = false;

async function isPasswordChangeRequiredResponse(response: Response): Promise<boolean> {
  if (response.status !== 403) return false;

  try {
    const payload = (await response.clone().json()) as { detail?: string };
    return payload.detail === "Password change required";
  } catch {
    return false;
  }
}

async function redirectToAuth(): Promise<void> {
  if (isRedirecting) return;
  isRedirecting = true;

  let target = "/login";
  try {
    const res = await fetch("/api/auth/status");
    if (res.ok) {
      const data = (await res.json()) as { requires_password_change: boolean };
      if (data.requires_password_change || mustChangePassword()) target = "/change-password";
    }
  } catch {
    // Fall through to /login on error
  }

  window.location.href = target;
}

export async function refreshSession(): Promise<boolean> {
  const refreshToken = getRefreshToken();
  if (!refreshToken) return false;

  try {
    const response = await fetch("/api/auth/refresh", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ refresh_token: refreshToken }),
    });

    if (!response.ok) {
      clearAuthTokens();
      return false;
    }

    const payload = (await response.json()) as RefreshResponse;
    storeAuthTokens(
      payload.access_token,
      payload.refresh_token,
      payload.must_change_password,
    );
    return true;
  } catch {
    return false;
  }
}

export async function authFetch(
  input: RequestInfo | URL,
  init?: RequestInit,
): Promise<Response> {
  const headers = new Headers(init?.headers);
  const accessToken = getAuthToken();
  if (accessToken) {
    headers.set("Authorization", `Bearer ${accessToken}`);
  }

  let response: Response;
  try {
    response = await fetch(input, { ...init, headers });
  } catch (err) {
    if (err instanceof TypeError) {
      throw new Error("Studio isn't running -- please relaunch it.");
    }
    throw err;
  }
  if (await isPasswordChangeRequiredResponse(response)) {
    void redirectToAuth();
    return response;
  }
  if (response.status !== 401) return response;

  const refreshed = await refreshSession();
  if (!refreshed) {
    clearAuthTokens();
    void redirectToAuth();
    return response;
  }

  if (mustChangePassword()) {
    void redirectToAuth();
    return response;
  }

  const retryHeaders = new Headers(init?.headers);
  const newToken = getAuthToken();
  if (newToken) {
    retryHeaders.set("Authorization", `Bearer ${newToken}`);
  } else {
    clearAuthTokens();
  }

  return fetch(input, { ...init, headers: retryHeaders });
}

export function logout(): void {
  clearAuthTokens();
}


================================================
FILE: studio/frontend/src/features/auth/change-password-page.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { LightRays } from "@/components/ui/light-rays";
import { Card } from "@/components/ui/card";
import { AuthForm } from "./components/auth-form";

export function ChangePasswordPage() {
  return (
    <div className="relative flex min-h-screen items-center justify-center overflow-hidden bg-background px-4 py-8 sm:px-6 sm:py-10 md:px-10">
      <LightRays
        count={6}
        color="rgba(34, 197, 94, 0.25)"
        blur={34}
        speed={15}
        length="70vh"
        style={{ opacity: 0.4 }}
      />
      <Card className="relative z-10 w-full max-w-sm px-5 py-6 shadow-border ring-1 ring-border sm:px-6 sm:py-8">
        <AuthForm mode="change-password" />
      </Card>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/auth/components/auth-form.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { Link, useNavigate } from "@tanstack/react-router";
import { Eye, EyeOff } from "lucide-react";
import { useEffect, useState } from "react";
import type { ReactElement } from "react";
import type { SyntheticEvent } from "react";
import { refreshSession } from "../api";

// Bootstrap credentials injected into index.html by the backend
// (only present while default admin must_change_password is true)
declare global {
  interface Window {
    __UNSLOTH_BOOTSTRAP__?: { username: string; password: string };
  }
}

import {
  clearAuthTokens,
  getAuthToken,
  getPostAuthRoute,
  hasAuthToken,
  hasRefreshToken,
  mustChangePassword,
  resetOnboardingDone,
  setMustChangePassword,
  storeAuthTokens,
} from "../session";

type AuthMode = "login" | "change-password";

type AuthStatusResponse = {
  initialized: boolean;
  requires_password_change: boolean;
};

type TokenResponse = {
  access_token: string;
  refresh_token: string;
  must_change_password: boolean;
};

async function loginWithPassword(
  username: string,
  password: string,
): Promise<TokenResponse> {
  const response = await fetch("/api/auth/login", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
      username: username.trim(),
      password,
    }),
  });

  if (!response.ok) {
    const errorPayload = (await response.json().catch(() => null)) as { detail?: string } | null;
    throw new Error(errorPayload?.detail ?? "Login failed.");
  }

  return (await response.json()) as TokenResponse;
}

type AuthFormProps = {
  mode: AuthMode;
};

const HIDDEN_LOGIN_USERNAME = "unsloth";

export function AuthForm({ mode }: AuthFormProps): ReactElement | null {
  const navigate = useNavigate();
  const isLoginMode = mode === "login";
  const [showPassword, setShowPassword] = useState(false);
  const username = HIDDEN_LOGIN_USERNAME;
  const [password, setPassword] = useState("");
  const [newPassword, setNewPassword] = useState("");
  const [confirmPassword, setConfirmPassword] = useState("");
  const [loading, setLoading] = useState(false);
  const [statusLoading, setStatusLoading] = useState(true);
  const [initialized, setInitialized] = useState<boolean | null>(null);
  const [requiresPasswordChange, setRequiresPasswordChange] = useState(false);
  const [error, setError] = useState<string | null>(null);

  useEffect(() => {
    let canceled = false;

    async function initializeAuthForm(): Promise<void> {
      // Always check the server first — localStorage flags can be stale
      // (e.g. tokens from a previous install attempt).  The server's
      // /api/auth/status is the source of truth for requires_password_change.
      try {
        const response = await fetch("/api/auth/status");
        if (!response.ok) throw new Error("Failed to load auth status.");
        const result = (await response.json()) as AuthStatusResponse;
        if (!canceled) {
          setInitialized(result.initialized);
          setRequiresPasswordChange(result.requires_password_change);

          // Redirect between login ↔ change-password based on server state
          if (mode === "login" && result.requires_password_change) {
            navigate({ to: "/change-password" });
            return;
          }
          if (mode === "change-password" && !result.requires_password_change && !mustChangePassword()) {
            navigate({ to: "/login" });
            return;
          }

          // On login page, if user already has a valid session and no
          // password change is required, skip straight to the app.
          if (isLoginMode && !result.requires_password_change) {
            if (hasRefreshToken()) {
              const refreshed = await refreshSession();
              if (refreshed) {
                if (!canceled) setStatusLoading(false);
                navigate({ to: getPostAuthRoute() });
                return;
              }
            }
            if (hasAuthToken()) {
              if (!canceled) setStatusLoading(false);
              navigate({ to: getPostAuthRoute() });
              return;
            }
          }
        }
      } catch (err: unknown) {
        if (!canceled) {
          setError(err instanceof Error ? err.message : "Failed to load.");
        }
      } finally {
        if (!canceled) setStatusLoading(false);
      }
    }

    void initializeAuthForm();

    return () => {
      canceled = true;
    };
  }, [navigate]);

  // Seed password from bootstrap credentials injected into HTML
  useEffect(() => {
    const bootstrap = window.__UNSLOTH_BOOTSTRAP__;
    if (bootstrap) {
      if (!isLoginMode && !password) {
        setPassword(bootstrap.password);
      }
    }
  }, []);

  const blockedByState =
    initialized === false ||
    (mode === "login" && requiresPasswordChange) ||
    (mode === "change-password" && !requiresPasswordChange && !mustChangePassword());

  let helperText: string | null = null;
  if (initialized === false) {
    helperText = "Auth is still bootstrapping the default admin account.";
  } else if (isLoginMode && requiresPasswordChange) {
    helperText = "Sign in once with the seeded credentials to change the password.";
  } else if (!isLoginMode && !requiresPasswordChange && !mustChangePassword()) {
    helperText = "Password already updated. Use the login screen.";
  }
  const title = isLoginMode ? "Welcome back" : "Setup your account";
  const subtitle = isLoginMode  
    ? "Sign in with your password."
    : "Choose a new password";
  const submitLabel = isLoginMode ? "Login" : "Change password";
  const showSwitchLink = !isLoginMode;
  const switchText = "Password already setup? ";
  const switchLinkTo = "/login";
  const switchLinkText = "Back to login";
  const currentPassword = password || window.__UNSLOTH_BOOTSTRAP__?.password || "";
  const invalidChangePasswordForm =
    !isLoginMode &&
    (newPassword.length < 8 || newPassword !== confirmPassword || currentPassword === newPassword);
  const showPasswordMismatchWarning =
    !isLoginMode &&
    newPassword.length > 0 &&
    confirmPassword.length > 0 &&
    newPassword !== confirmPassword;

  async function handleSubmit(event: SyntheticEvent<HTMLFormElement>) {
    event.preventDefault();
    setError(null);

    if (!isLoginMode) {
      if (!currentPassword) {
        setError("Unable to initialize setup. Reload the page and try again.");
        return;
      }
      if (newPassword.length < 8) {
        setError("New password must be at least 8 characters.");
        return;
      }
      if (newPassword !== confirmPassword) {
        setError("Passwords do not match.");
        return;
      }
      if (currentPassword === newPassword) {
        setError("New password must be different from your current password.");
        return;
      }
    }

    setLoading(true);
    try {
      let token: TokenResponse;

      if (isLoginMode) {
        token = await loginWithPassword(username, password);
      } else {
        let accessToken = getAuthToken();

        if (hasRefreshToken()) {
          const refreshed = await refreshSession();
          accessToken = getAuthToken();
          if (!refreshed) {
            clearAuthTokens();
            accessToken = null;
          }
        }

        if (!accessToken) {
          const bootstrapToken = await loginWithPassword(username, currentPassword);
          storeAuthTokens(
            bootstrapToken.access_token,
            bootstrapToken.refresh_token,
            bootstrapToken.must_change_password,
          );
          setMustChangePassword(bootstrapToken.must_change_password);
          accessToken = bootstrapToken.access_token;
        }

        const response = await fetch("/api/auth/change-password", {
          method: "POST",
          headers: {
            "Content-Type": "application/json",
            Authorization: `Bearer ${accessToken}`,
          },
          body: JSON.stringify({
            current_password: currentPassword,
            new_password: newPassword,
          }),
        });

        if (!response.ok) {
          let message = "Password update failed.";
          const errorPayload = (await response
            .json()
            .catch(() => null)) as { detail?: string } | null;
          if (errorPayload?.detail) message = errorPayload.detail;
          throw new Error(message);
        }

        token = (await response.json()) as TokenResponse;
      }

      if (!isLoginMode) {
        resetOnboardingDone();
        setRequiresPasswordChange(false);
        setMustChangePassword(false);
      } else {
        setMustChangePassword(token.must_change_password);
      }
      storeAuthTokens(
        token.access_token,
        token.refresh_token,
        token.must_change_password,
      );
      navigate({ to: getPostAuthRoute() });
    } catch (err: unknown) {
      setError(err instanceof Error ? err.message : "Auth failed.");
    } finally {
      setLoading(false);
    }
  }

  if (statusLoading && initialized === null && error === null) return null;

  return (
    <div className="w-full max-w-sm space-y-6">
      <div className="space-y-1.5 text-center">
        <img
          src="/Sloth emojis/large sloth wave.png"
          alt="Unsloth waving mascot"
          className="mx-auto mb-2 h-20 w-20 object-contain"
        />
        <h2 className="text-2xl font-semibold text-foreground">{title}</h2>
        <p className="text-muted-foreground">{subtitle}</p>
      </div>
      <form className="space-y-5" onSubmit={handleSubmit}>
        {isLoginMode && (
          <div className="space-y-2">
            <Label htmlFor="password">Password</Label>
            <div className="relative">
              <Input
                id="password"
                type={showPassword ? "text" : "password"}
                className="pr-10"
                autoComplete="current-password"
                value={password}
                onChange={(event) => setPassword(event.target.value)}
                minLength={8}
                required
              />
              <Button
                type="button"
                variant="ghost"
                size="icon"
                className="absolute right-0 top-0 h-full px-3 text-muted-foreground hover:bg-transparent"
                onClick={() => setShowPassword((prev) => !prev)}
              >
                {showPassword ? (
                  <EyeOff className="h-4 w-4" />
                ) : (
                  <Eye className="h-4 w-4" />
                )}
              </Button>
            </div>
          </div>
        )}

        {!isLoginMode && (
          <>
            <div className="space-y-2">
              <Label htmlFor="new-password">New password</Label>
              <div className="relative">
                <Input
                  id="new-password"
                  type={showPassword ? "text" : "password"}
                  className="pr-10"
                  autoComplete="new-password"
                  value={newPassword}
                  onChange={(event) => setNewPassword(event.target.value)}
                  minLength={8}
                  required
                />
                <Button
                  type="button"
                  variant="ghost"
                  size="icon"
                  className="absolute right-0 top-0 h-full px-3 text-muted-foreground hover:bg-transparent"
                  onClick={() => setShowPassword((prev) => !prev)}
                >
                  {showPassword ? (
                    <EyeOff className="h-4 w-4" />
                  ) : (
                    <Eye className="h-4 w-4" />
                  )}
                </Button>
              </div>
            </div>
            <div className="space-y-2">
              <Label htmlFor="confirm-password">Confirm password</Label>
              <Input
                id="confirm-password"
                type="password"
                autoComplete="new-password"
                value={confirmPassword}
                onChange={(event) => setConfirmPassword(event.target.value)}
                minLength={8}
                required
              />
            </div>
            <p
              className={`min-h-4 text-xs ${
                showPasswordMismatchWarning ? "text-destructive" : "text-muted-foreground"
              }`}
              aria-live="polite"
            >
              {showPasswordMismatchWarning
                ? "Please ensure passwords match."
                : "Must be at least 8 characters."}
            </p>
          </>
        )}

        {helperText && (
          <p className="text-center text-sm text-amber-600">{helperText}</p>
        )}
        {error && <p className="text-center text-sm text-destructive">{error}</p>}

        <Button
          type="submit"
          className="w-full"
          disabled={
            loading ||
            statusLoading ||
            blockedByState ||
            (isLoginMode && password.length < 8) ||
            invalidChangePasswordForm
          }
        >
          {loading ? "Please wait..." : submitLabel}
        </Button>
      </form>

      {showSwitchLink && (
        <p className="text-center text-sm text-muted-foreground">
          {switchText}
          <Link to={switchLinkTo} className="text-primary hover:underline">
            {switchLinkText}
          </Link>
        </p>
      )}
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/auth/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export { LoginPage } from "./login-page";
export { ChangePasswordPage } from "./change-password-page";
export { authFetch, refreshSession } from "./api";
export {
  getPostAuthRoute,
  hasAuthToken,
  hasRefreshToken,
  isOnboardingDone,
  markOnboardingDone,
  mustChangePassword,
  resetOnboardingDone,
  setMustChangePassword,
} from "./session";


================================================
FILE: studio/frontend/src/features/auth/login-page.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { LightRays } from "@/components/ui/light-rays";
import { Card } from "@/components/ui/card";
import { AuthForm } from "./components/auth-form";

export function LoginPage() {
  return (
    <div className="relative flex min-h-screen items-center justify-center overflow-hidden bg-background px-4 py-8 sm:px-6 sm:py-10 md:px-10">
      <LightRays
        count={6}
        color="rgba(34, 197, 94, 0.25)"
        blur={34}
        speed={15}
        length="70vh"
        style={{ opacity: 0.4 }}
      />
      <Card className="relative z-10 w-full max-w-sm px-5 py-6 shadow-border ring-1 ring-border sm:px-6 sm:py-8">
        <AuthForm mode="login" />
      </Card>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/auth/session.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { usePlatformStore } from "@/config/env";

export const AUTH_TOKEN_KEY = "unsloth_auth_token";
export const AUTH_REFRESH_TOKEN_KEY = "unsloth_auth_refresh_token";
export const ONBOARDING_DONE_KEY = "unsloth_onboarding_done";
export const AUTH_MUST_CHANGE_PASSWORD_KEY = "unsloth_auth_must_change_password";

type PostAuthRoute = "/onboarding" | "/studio" | "/change-password" | "/chat";

function canUseStorage(): boolean {
  return typeof window !== "undefined";
}

export function hasAuthToken(): boolean {
  if (!canUseStorage()) return false;
  return Boolean(localStorage.getItem(AUTH_TOKEN_KEY));
}

export function hasRefreshToken(): boolean {
  if (!canUseStorage()) return false;
  return Boolean(localStorage.getItem(AUTH_REFRESH_TOKEN_KEY));
}

export function getAuthToken(): string | null {
  if (!canUseStorage()) return null;
  return localStorage.getItem(AUTH_TOKEN_KEY);
}

export function getRefreshToken(): string | null {
  if (!canUseStorage()) return null;
  return localStorage.getItem(AUTH_REFRESH_TOKEN_KEY);
}

export function storeAuthTokens(
  accessToken: string,
  refreshToken: string,
  mustChangePassword = false,
): void {
  if (!canUseStorage()) return;
  localStorage.setItem(AUTH_TOKEN_KEY, accessToken);
  localStorage.setItem(AUTH_REFRESH_TOKEN_KEY, refreshToken);
  localStorage.setItem(AUTH_MUST_CHANGE_PASSWORD_KEY, String(mustChangePassword));
}

export function clearAuthTokens(): void {
  if (!canUseStorage()) return;
  localStorage.removeItem(AUTH_TOKEN_KEY);
  localStorage.removeItem(AUTH_REFRESH_TOKEN_KEY);
  localStorage.removeItem(AUTH_MUST_CHANGE_PASSWORD_KEY);
}

export function mustChangePassword(): boolean {
  if (!canUseStorage()) return false;
  return localStorage.getItem(AUTH_MUST_CHANGE_PASSWORD_KEY) === "true";
}

export function setMustChangePassword(required: boolean): void {
  if (!canUseStorage()) return;
  localStorage.setItem(AUTH_MUST_CHANGE_PASSWORD_KEY, String(required));
}

export function isOnboardingDone(): boolean {
  if (!canUseStorage()) return false;
  return localStorage.getItem(ONBOARDING_DONE_KEY) === "true";
}

export function markOnboardingDone(): void {
  if (!canUseStorage()) return;
  localStorage.setItem(ONBOARDING_DONE_KEY, "true");
}

export function resetOnboardingDone(): void {
  if (!canUseStorage()) return;
  localStorage.removeItem(ONBOARDING_DONE_KEY);
}

export function getPostAuthRoute(): PostAuthRoute {
  if (mustChangePassword()) return "/change-password";
  if (usePlatformStore.getState().isChatOnly()) return "/chat";
  return isOnboardingDone() ? "/studio" : "/onboarding";
}


================================================
FILE: studio/frontend/src/features/chat/api/chat-adapter.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ChatModelAdapter } from "@assistant-ui/react";
import type { MessageTiming, ToolCallMessagePart } from "@assistant-ui/core";
import { toast } from "sonner";
import {
  generateAudio,
  listCachedGguf,
  listCachedModels,
  listGgufVariants,
  loadModel,
  streamChatCompletions,
} from "./chat-api";
import { db } from "../db";
import { useChatRuntimeStore } from "../stores/chat-runtime-store";
import type { ChatModelSummary } from "../types/runtime";
import {
  hasClosedThinkTag,
  parseAssistantContent,
} from "../utils/parse-assistant-content";

type RunMessages = Parameters<ChatModelAdapter["run"]>[0]["messages"];
type RunMessage = RunMessages[number];

/** Tracks which user messages were sent with an audio file (messageId → filename). */
export const sentAudioNames = new Map<string, string>();

/** Parse "Title: ...\nURL: ...\nSnippet: ..." blocks into source content parts. */
function parseSourcesFromResult(raw: string): { type: "source"; sourceType: "url"; id: string; url: string; title: string }[] {
  if (!raw) return [];
  const blocks = raw.split(/\n---\n/).filter(Boolean);
  const sources: { type: "source"; sourceType: "url"; id: string; url: string; title: string }[] = [];
  for (const block of blocks) {
    const titleMatch = block.match(/Title:\s*(.+)/);
    const urlMatch = block.match(/URL:\s*(.+)/);
    if (titleMatch && urlMatch) {
      const url = urlMatch[1].trim();
      sources.push({
        type: "source" as const,
        sourceType: "url" as const,
        id: url,
        url,
        title: titleMatch[1].trim(),
      });
    }
  }
  return sources;
}

function estimateTokenCount(text: string): number | undefined {
  const trimmed = text.trim();
  if (!trimmed) {
    return undefined;
  }
  return Math.max(1, Math.round(trimmed.length / 4));
}

function buildTiming(
  streamStartTime: number,
  totalChunks: number,
  firstTokenTime?: number,
  totalStreamTime?: number,
  tokenCount?: number,
  toolCallCount = 0,
): MessageTiming {
  return {
    streamStartTime,
    firstTokenTime,
    totalStreamTime,
    tokenCount,
    tokensPerSecond:
      typeof totalStreamTime === "number" &&
      totalStreamTime > 0 &&
      typeof tokenCount === "number"
        ? tokenCount / (totalStreamTime / 1000)
        : undefined,
    totalChunks,
    toolCallCount,
  };
}

function collectTextParts(message: RunMessage): string[] {
  const textParts = message.content
    .filter((part) => part.type === "text")
    .map((part) => part.text);

  if ("attachments" in message && (message.attachments?.length ?? 0) > 0) {
    for (const attachment of message.attachments ?? []) {
      for (const part of attachment.content ?? []) {
        if (part.type === "text") {
          textParts.push(part.text);
        }
      }
    }
  }

  return textParts;
}

function toOpenAIMessage(message: RunMessage): {
  role: "system" | "user" | "assistant";
  content: string;
} | null {
  if (
    message.role !== "system" &&
    message.role !== "user" &&
    message.role !== "assistant"
  ) {
    return null;
  }

  let content = collectTextParts(message).join("\n");
  // Strip inline audio base64 from prior assistant messages to avoid
  // inflating token counts (e.g. audio-player responses with embedded WAV).
  if (message.role === "assistant") {
    content = content.replace(
      /data:audio\/[a-z0-9.+-]+;base64,[A-Za-z0-9+/=]+/g,
      "[audio]",
    );
  }

  return { role: message.role, content };
}

function extractImageBase64(input: string): string | undefined {
  if (!input) {
    return undefined;
  }
  if (input.startsWith("data:")) {
    const commaIndex = input.indexOf(",");
    return commaIndex >= 0 ? input.slice(commaIndex + 1) : undefined;
  }
  return input;
}

function findLatestUserImageBase64(messages: RunMessages): string | undefined {
  for (let i = messages.length - 1; i >= 0; i -= 1) {
    const message = messages[i];
    if (!message || message.role !== "user") {
      continue;
    }

    // Image in message.content (e.g. compare view appends content with image parts)
    for (const part of message.content ?? []) {
      if (part.type === "image" && "image" in part) {
        const encoded = extractImageBase64(part.image);
        if (encoded) return encoded;
      }
    }

    // Image in message.attachments (e.g. chat composer)
    if ("attachments" in message && (message.attachments?.length ?? 0) > 0) {
      for (const attachment of message.attachments ?? []) {
        for (const part of attachment.content ?? []) {
          if (part.type !== "image") {
            continue;
          }
          const encoded = extractImageBase64(part.image);
          if (encoded) {
            return encoded;
          }
        }
      }
    }
  }

  return undefined;
}

function findLatestUserAudioBase64(messages: RunMessages): string | undefined {
  // Check message content parts (from compare view's CompareMessagePart with type: "audio")
  for (let i = messages.length - 1; i >= 0; i -= 1) {
    const message = messages[i];
    if (!message || message.role !== "user") continue;

    for (const part of message.content ?? []) {
      if (part.type === "audio" && "audio" in part) {
        const audioPart = (part as unknown as { type: "audio"; audio: string | { data: string; format: string } }).audio;
        const raw = typeof audioPart === "string" ? audioPart : audioPart?.data;
        if (raw) return raw.startsWith("data:") ? raw.split(",")[1] : raw;
      }
    }
  }

  // Check the runtime store (from main composer's audio upload)
  const pendingAudio = useChatRuntimeStore.getState().pendingAudioBase64;
  return pendingAudio ?? undefined;
}

async function resolveUseAdapter(
  threadId: string | undefined,
): Promise<boolean | undefined> {
  if (!threadId) {
    return undefined;
  }
  try {
    const thread = await db.threads.get(threadId);
    if (!thread?.pairId) {
      return undefined;
    }
    // model1/model2 threads don't use the adapter toggle — each side
    // loads its own model via /api/inference/load before generation.
    if (thread.modelType === "model1" || thread.modelType === "model2") {
      return undefined;
    }
    return thread.modelType === "lora";
  } catch {
    return undefined;
  }
}

/** Wait for an in-progress model load to finish (polls store every 500ms). */
function waitForModelReady(abortSignal?: AbortSignal): Promise<void> {
  return new Promise((resolve, reject) => {
    const check = () => {
      if (abortSignal?.aborted) { reject(new Error("Aborted")); return; }
      if (!useChatRuntimeStore.getState().modelLoading) { resolve(); return; }
      setTimeout(check, 500);
    };
    check();
  });
}

/**
 * Auto-load the smallest downloaded model when the user tries to chat
 * without selecting one. Prefers GGUF (picks smallest cached variant),
 * falls back to smallest cached safetensors model.
 */
async function autoLoadSmallestModel(): Promise<boolean> {
  const toastId = toast("Loading a model…", {
    description: "Auto-selecting the smallest downloaded model.",
    duration: 5000,
    closeButton: true,
  });
  try {
    const [ggufRepos, modelRepos] = await Promise.all([
      listCachedGguf().catch(() => []),
      listCachedModels().catch(() => []),
    ]);

    // Try GGUF first: pick the repo with the smallest total size,
    // then pick its smallest downloaded variant.
    if (ggufRepos.length > 0) {
      const sorted = [...ggufRepos].sort((a, b) => a.size_bytes - b.size_bytes);
      for (const repo of sorted) {
        try {
          const variants = await listGgufVariants(repo.repo_id);
          const downloaded = variants.variants
            .filter((v) => v.downloaded)
            .sort((a, b) => a.size_bytes - b.size_bytes);
          if (downloaded.length > 0) {
            const variant = downloaded[0];
            const loadResp = await loadModel({
              model_path: repo.repo_id,
              hf_token: null,
              max_seq_length: 4096,
              load_in_4bit: true,
              is_lora: false,
              gguf_variant: variant.quant,
              trust_remote_code: false,
            });
            useChatRuntimeStore.getState().setCheckpoint(repo.repo_id, variant.quant);
            const store = useChatRuntimeStore.getState();
            store.setParams({ ...store.params, maxTokens: loadResp.context_length ?? 131072 });
            // Add model to store so the selector shows the name
            const autoModel: ChatModelSummary = {
              id: repo.repo_id,
              name: loadResp.display_name ?? repo.repo_id,
              isVision: loadResp.is_vision ?? false,
              isLora: loadResp.is_lora ?? false,
              isGguf: loadResp.is_gguf ?? false,
              isAudio: loadResp.is_audio ?? false,
              audioType: loadResp.audio_type ?? null,
              hasAudioInput: loadResp.has_audio_input ?? false,
            };
            const existingModels = store.models;
            if (!existingModels.some((m) => m.id === repo.repo_id)) {
              store.setModels([...existingModels, autoModel]);
            }
            useChatRuntimeStore.setState({
              ggufContextLength: loadResp.context_length ?? 131072,
              supportsReasoning: loadResp.supports_reasoning ?? false,
              reasoningEnabled: loadResp.supports_reasoning ?? false,
              supportsTools: loadResp.supports_tools ?? false,
              toolsEnabled: false,
              codeToolsEnabled: false,
              defaultChatTemplate: loadResp.chat_template ?? null,
              chatTemplateOverride: null,
            });
            toast.success(`Loaded ${repo.repo_id} (${variant.quant})`, { id: toastId });
            return true;
          }
        } catch {
          continue;
        }
      }
    }

    // Fall back to safetensors models
    if (modelRepos.length > 0) {
      const sorted = [...modelRepos].sort((a, b) => a.size_bytes - b.size_bytes);
      for (const repo of sorted) {
        try {
          const sfLoadResp = await loadModel({
            model_path: repo.repo_id,
            hf_token: null,
            max_seq_length: 4096,
            load_in_4bit: true,
            is_lora: false,
            gguf_variant: null,
            trust_remote_code: false,
          });
          useChatRuntimeStore.getState().setCheckpoint(repo.repo_id);
          const store = useChatRuntimeStore.getState();
          store.setParams({ ...store.params, maxTokens: 4096 });
          const sfModel: ChatModelSummary = {
            id: repo.repo_id,
            name: sfLoadResp.display_name ?? repo.repo_id,
            isVision: sfLoadResp.is_vision ?? false,
            isLora: sfLoadResp.is_lora ?? false,
            isGguf: sfLoadResp.is_gguf ?? false,
          };
          if (!store.models.some((m) => m.id === repo.repo_id)) {
            store.setModels([...store.models, sfModel]);
          }
          toast.success(`Loaded ${repo.repo_id}`, { id: toastId });
          return true;
        } catch {
          continue;
        }
      }
    }

    // No cached models found — try downloading a small default GGUF
    toast("Downloading a small model…", {
      id: toastId,
      description: "No downloaded models found. Fetching Qwen3.5-4B (UD-Q4_K_XL).",
      duration: 30000,
    });
    try {
      const loadResp = await loadModel({
        model_path: "unsloth/Qwen3.5-4B-GGUF",
        hf_token: null,
        max_seq_length: 4096,
        load_in_4bit: true,
        is_lora: false,
        gguf_variant: "UD-Q4_K_XL",
        trust_remote_code: false,
      });
      useChatRuntimeStore.getState().setCheckpoint("unsloth/Qwen3.5-4B-GGUF", "UD-Q4_K_XL");
      const store = useChatRuntimeStore.getState();
      store.setParams({ ...store.params, maxTokens: loadResp.context_length ?? 131072 });
      const defaultModel: ChatModelSummary = {
        id: "unsloth/Qwen3.5-4B-GGUF",
        name: loadResp.display_name ?? "Qwen3.5-4B-GGUF",
        isVision: loadResp.is_vision ?? false,
        isLora: false,
        isGguf: true,
      };
      if (!store.models.some((m) => m.id === "unsloth/Qwen3.5-4B-GGUF")) {
        store.setModels([...store.models, defaultModel]);
      }
      useChatRuntimeStore.setState({
        ggufContextLength: loadResp.context_length ?? 131072,
        supportsReasoning: loadResp.supports_reasoning ?? false,
        reasoningEnabled: loadResp.supports_reasoning ?? false,
        supportsTools: loadResp.supports_tools ?? false,
        toolsEnabled: false,
        defaultChatTemplate: loadResp.chat_template ?? null,
        chatTemplateOverride: null,
      });
      toast.success("Loaded Qwen3.5-4B (UD-Q4_K_XL)", { id: toastId });
      return true;
    } catch {
      toast.dismiss(toastId);
      return false;
    }
  } catch {
    toast.dismiss(toastId);
    return false;
  }
}

export function createOpenAIStreamAdapter(): ChatModelAdapter {
  return {
    async *run({ messages, abortSignal, unstable_threadId }) {
      const runtime = useChatRuntimeStore.getState();
      const { params } = runtime;

      // Wait for in-progress model load to finish before inferring
      if (runtime.modelLoading) {
        toast.info("Waiting for model to finish loading…");
        await waitForModelReady(abortSignal);
      }

      if (!useChatRuntimeStore.getState().params.checkpoint) {
        // Auto-load the smallest downloaded model
        const loaded = await autoLoadSmallestModel();
        if (!loaded) {
          toast.error("No model loaded", {
            description: "Pick a model in the top bar, then retry.",
          });
          throw new Error("Load a model first.");
        }
      }

      const {
        supportsTools,
        toolsEnabled,
        codeToolsEnabled,
      } = runtime;

      const outboundMessages = messages
        .map(toOpenAIMessage)
        .filter((message): message is NonNullable<typeof message> =>
          Boolean(message),
        );

      if (params.systemPrompt.trim()) {
        outboundMessages.unshift({
          role: "system",
          content: params.systemPrompt.trim(),
        });
      }
      const imageBase64 = findLatestUserImageBase64(messages);
      const audioBase64 = findLatestUserAudioBase64(messages);
      // Clear pending audio from store after extracting (consumed on send)
      if (audioBase64) {
        const audioName = runtime.pendingAudioName;
        if (audioName) {
          const lastUserMsg = [...messages].reverse().find((m) => m.role === "user");
          if (lastUserMsg) sentAudioNames.set(lastUserMsg.id, audioName);
        }
        runtime.clearPendingAudio();
      }
      const useAdapter = await resolveUseAdapter(unstable_threadId);

      // ── Audio model path (non-streaming) ─────────────────────
      const activeModel = runtime.models.find(
        (m) => m.id === params.checkpoint,
      );
      if (activeModel?.isAudio && !activeModel?.hasAudioInput) {
        const threadKey = unstable_threadId || "__default";
        runtime.setThreadRunning(threadKey, true);
        try {
          yield {
            content: [{ type: "text" as const, text: "Generating audio..." }],
          };

          const result = await generateAudio(
            {
              model: params.checkpoint,
              messages: outboundMessages,
              stream: false,
              temperature: params.temperature,
              top_p: params.topP,
              max_tokens: params.maxTokens,
              top_k: params.topK,
              min_p: params.minP,
              repetition_penalty: params.repetitionPenalty,
              presence_penalty: params.presencePenalty,
              ...(useAdapter === undefined ? {} : { use_adapter: useAdapter }),
            },
            abortSignal,
          );

          const audioUrl = `data:audio/wav;base64,${result.audio.data}`;
          yield {
            content: [
              {
                type: "text" as const,
                text: `<audio-player src="${audioUrl}" />`,
              },
            ],
          };
        } catch (err) {
          if (!abortSignal.aborted) {
            toast.error("Audio generation failed", {
              description:
                err instanceof Error ? err.message : "Unknown error",
            });
          }
          throw err;
        } finally {
          runtime.setThreadRunning(threadKey, false);
        }
        return;
      }

      const threadKey = unstable_threadId || "__default";
      let waitingFirstChunk = true;
      let firstTokenSettled = false;
      const streamStartTime = Date.now();
      let firstTokenTime: number | undefined;
      let totalChunks = 0;
      let resolveFirstToken: (() => void) | null = null;
      let rejectFirstToken: ((err: unknown) => void) | null = null;
      const firstTokenPromise = new Promise<void>((resolve, reject) => {
        resolveFirstToken = resolve;
        rejectFirstToken = reject;
      });
      // Avoid unhandled rejections if toast.promise never attached.
      void firstTokenPromise.catch(() => {});

      function settleFirstTokenOk(): void {
        if (firstTokenSettled) return;
        firstTokenSettled = true;
        resolveFirstToken?.();
      }

      function settleFirstTokenErr(err: unknown): void {
        if (firstTokenSettled) return;
        firstTokenSettled = true;
        rejectFirstToken?.(err);
      }

      const warmupDelayMs = 450;
      const warmupTimer = setTimeout(() => {
        if (!waitingFirstChunk) return;
        if (abortSignal.aborted) return;
        runtime.setGeneratingStatus("waiting");
      }, warmupDelayMs);
      runtime.setThreadRunning(threadKey, true);
      let cumulativeText = "";
      let reasoningStartAt: number | null = null;
      let reasoningDuration = 0;
      // Tool call content parts — accumulated and yielded cumulatively.
      // result is set directly on the tool-call part when tool_end arrives.
      const toolCallParts: ToolCallMessagePart[] = [];

      try {
        const { supportsReasoning, reasoningEnabled } = runtime;
        const stream = streamChatCompletions(
          {
            model: params.checkpoint,
            messages: outboundMessages,
            stream: true,
            temperature: params.temperature,
            top_p: params.topP,
            max_tokens: params.maxTokens,
            top_k: params.topK,
            min_p: params.minP,
            repetition_penalty: params.repetitionPenalty,
            presence_penalty: params.presencePenalty,
            image_base64: imageBase64,
            audio_base64: audioBase64,
            ...(useAdapter === undefined ? {} : { use_adapter: useAdapter }),
            ...(supportsReasoning ? { enable_thinking: reasoningEnabled } : {}),
            ...(supportsTools && (toolsEnabled || codeToolsEnabled)
              ? {
                  enable_tools: true,
                  enabled_tools: [
                    ...(toolsEnabled ? ["web_search"] : []),
                    ...(codeToolsEnabled ? ["python", "terminal"] : []),
                  ],
                  auto_heal_tool_calls: useChatRuntimeStore.getState().autoHealToolCalls,
                  max_tool_calls_per_message: useChatRuntimeStore.getState().maxToolCallsPerMessage,
                  tool_call_timeout: (() => {
                    const mins = useChatRuntimeStore.getState().toolCallTimeout;
                    return mins >= 9999 ? 9999 : mins * 60;
                  })(),
                  session_id: unstable_threadId || undefined,
                }
              : {}),
          },
          abortSignal,
        );

        for await (const chunk of stream) {
          // Handle tool status events
          const toolStatusText = (chunk as unknown as { _toolStatus?: string })._toolStatus;
          if (toolStatusText !== undefined) {
            runtime.setToolStatus(toolStatusText || null);
            continue;
          }

          // Emit tool-call content parts for assistant-ui.
          // On tool_start: add a new tool-call part (renders in "running" state).
          // On tool_end: set result on the existing part (transitions to "complete").
          const toolEvent = (chunk as unknown as { _toolEvent?: Record<string, unknown> })._toolEvent;
          if (toolEvent !== undefined) {
            if (toolEvent.type === "tool_start") {
              const id = (toolEvent.tool_call_id as string) || `${toolEvent.tool_name}_${Date.now()}`;
              const toolArgs = (toolEvent.arguments ?? {}) as ToolCallMessagePart["args"];
              toolCallParts.push({
                type: "tool-call" as const,
                toolCallId: id,
                toolName: toolEvent.tool_name as string,
                argsText: JSON.stringify(toolArgs),
                args: toolArgs,
              });
            } else if (toolEvent.type === "tool_end") {
              const id = (toolEvent.tool_call_id as string) ||
                toolCallParts[toolCallParts.length - 1]?.toolCallId || "";
              const idx = toolCallParts.findIndex((p) => p.toolCallId === id);
              if (idx !== -1) {
                toolCallParts[idx] = { ...toolCallParts[idx], result: toolEvent.result as string };
              }
            }
            // Yield cumulative state so tool UI updates (tools first, text after)
            const textParts = parseAssistantContent(cumulativeText);
            yield {
              content: [...toolCallParts, ...textParts],
              metadata: {
                timing: buildTiming(streamStartTime, totalChunks, firstTokenTime),
                custom: { reasoningDuration },
              },
            };
            continue;
          }

          totalChunks += 1;
          const delta = chunk.choices?.[0]?.delta?.content;
          if (!delta) {
            continue;
          }
          if (waitingFirstChunk) {
            waitingFirstChunk = false;
            firstTokenTime = Date.now() - streamStartTime;
            settleFirstTokenOk();
            runtime.setGeneratingStatus(null);
          }

          cumulativeText += delta;
          const parts = parseAssistantContent(cumulativeText);

          if (parts.some((part) => part.type === "reasoning") && !reasoningStartAt) {
            reasoningStartAt = Date.now();
          }
          if (hasClosedThinkTag(cumulativeText) && reasoningStartAt && !reasoningDuration) {
            reasoningDuration = Math.round((Date.now() - reasoningStartAt) / 1000);
          }

          if (parts.length > 0 || toolCallParts.length > 0) {
            yield {
              content: [...toolCallParts, ...parts],
              metadata: {
                timing: buildTiming(
                  streamStartTime,
                  totalChunks,
                  firstTokenTime,
                ),
                custom: { reasoningDuration },
              },
            };
          }
        }
        settleFirstTokenOk();

        // Extract source parts from completed web_search tool calls
        const sourceParts = toolCallParts.flatMap((tc) => {
          if (tc.toolName !== "web_search" || !tc.result) return [];
          return parseSourcesFromResult(typeof tc.result === "string" ? tc.result : "");
        });

        yield {
          content: [
            ...toolCallParts,
            ...parseAssistantContent(cumulativeText),
            ...sourceParts,
          ],
          metadata: {
            timing: buildTiming(
              streamStartTime,
              totalChunks,
              firstTokenTime,
              Date.now() - streamStartTime,
              estimateTokenCount(cumulativeText),
              toolCallParts.length,
            ),
            custom: { reasoningDuration },
          },
        };
      } catch (err) {
        settleFirstTokenErr(err instanceof Error ? err : new Error("Generation failed"));
        if (!abortSignal.aborted) {
          toast.error("Generation failed", {
            description: err instanceof Error ? err.message : "Unknown error",
          });
        }
        throw err;
      } finally {
        runtime.setGeneratingStatus(null);
        runtime.setToolStatus(null);
        clearTimeout(warmupTimer);
        if (waitingFirstChunk) {
          if (!firstTokenSettled) {
            if (abortSignal.aborted) {
              settleFirstTokenErr(new Error("Cancelled"));
            } else {
              settleFirstTokenErr(new Error("No tokens received"));
            }
          } else {
            settleFirstTokenOk();
          }
        }
        runtime.setThreadRunning(threadKey, false);
      }
    },
  };
}


================================================
FILE: studio/frontend/src/features/chat/api/chat-api.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { authFetch } from "@/features/auth";
import type {
  AudioGenerationResponse,
  GgufVariantsResponse,
  InferenceStatusResponse,
  ListLorasResponse,
  ListModelsResponse,
  LoadModelRequest,
  LoadModelResponse,
  OpenAIChatChunk,
  OpenAIChatCompletionsRequest,
  UnloadModelRequest,
  ValidateModelResponse,
} from "../types/api";

function parseErrorText(status: number, body: unknown): string {
  if (
    body &&
    typeof body === "object" &&
    "detail" in body &&
    typeof body.detail === "string"
  ) {
    return body.detail;
  }
  if (
    body &&
    typeof body === "object" &&
    "message" in body &&
    typeof body.message === "string"
  ) {
    return body.message;
  }
  return `Request failed (${status})`;
}

async function parseJsonOrThrow<T>(response: Response): Promise<T> {
  const body = await response.json().catch(() => null);
  if (!response.ok) {
    throw new Error(parseErrorText(response.status, body));
  }
  return body as T;
}

export async function listModels(): Promise<ListModelsResponse> {
  const response = await authFetch("/api/models/list");
  return parseJsonOrThrow<ListModelsResponse>(response);
}

export async function listLoras(outputsDir?: string): Promise<ListLorasResponse> {
  const query = outputsDir
    ? `?${new URLSearchParams({ outputs_dir: outputsDir }).toString()}`
    : "";
  const response = await authFetch(`/api/models/loras${query}`);
  return parseJsonOrThrow<ListLorasResponse>(response);
}

export async function getInferenceStatus(): Promise<InferenceStatusResponse> {
  const response = await authFetch("/api/inference/status");
  return parseJsonOrThrow<InferenceStatusResponse>(response);
}

export async function loadModel(
  payload: LoadModelRequest,
): Promise<LoadModelResponse> {
  const response = await authFetch("/api/inference/load", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(payload),
  });
  return parseJsonOrThrow<LoadModelResponse>(response);
}

export async function validateModel(
  payload: LoadModelRequest,
): Promise<ValidateModelResponse> {
  const response = await authFetch("/api/inference/validate", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      model_path: payload.model_path,
      hf_token: payload.hf_token,
      gguf_variant: payload.gguf_variant ?? null,
    }),
  });
  return parseJsonOrThrow<ValidateModelResponse>(response);
}

export async function unloadModel(payload: UnloadModelRequest): Promise<void> {
  const response = await authFetch("/api/inference/unload", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(payload),
  });
  await parseJsonOrThrow<unknown>(response);
}

export interface CachedGgufRepo {
  repo_id: string;
  size_bytes: number;
  cache_path: string;
}

export async function getGgufDownloadProgress(
  repoId: string,
  variant: string,
  expectedBytes: number,
): Promise<{ downloaded_bytes: number; expected_bytes: number; progress: number }> {
  const params = new URLSearchParams({
    repo_id: repoId,
    variant,
    expected_bytes: String(expectedBytes),
  });
  const response = await authFetch(`/api/models/gguf-download-progress?${params}`);
  return parseJsonOrThrow(response);
}

export async function getDownloadProgress(
  repoId: string,
): Promise<{ downloaded_bytes: number; expected_bytes: number; progress: number }> {
  const params = new URLSearchParams({ repo_id: repoId });
  const response = await authFetch(`/api/models/download-progress?${params}`);
  return parseJsonOrThrow(response);
}

export async function listCachedGguf(): Promise<CachedGgufRepo[]> {
  const response = await authFetch("/api/models/cached-gguf");
  const data = await parseJsonOrThrow<{ cached: CachedGgufRepo[] }>(response);
  return data.cached;
}

export interface CachedModelRepo {
  repo_id: string;
  size_bytes: number;
}

export async function listCachedModels(): Promise<CachedModelRepo[]> {
  const response = await authFetch("/api/models/cached-models");
  const data = await parseJsonOrThrow<{ cached: CachedModelRepo[] }>(response);
  return data.cached;
}

export async function deleteCachedModel(repoId: string, variant?: string): Promise<void> {
  const payload: Record<string, string> = { repo_id: repoId };
  if (variant) payload.variant = variant;
  const response = await authFetch("/api/models/delete-cached", {
    method: "DELETE",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(payload),
  });
  await parseJsonOrThrow<unknown>(response);
}

export async function listGgufVariants(
  repoId: string,
  hfToken?: string,
): Promise<GgufVariantsResponse> {
  const params = new URLSearchParams({ repo_id: repoId });
  if (hfToken) params.set("hf_token", hfToken);
  const response = await authFetch(`/api/models/gguf-variants?${params}`);
  return parseJsonOrThrow<GgufVariantsResponse>(response);
}

function parseSseEvent(rawEvent: string): string[] {
  const dataLines: string[] = [];
  for (const line of rawEvent.split(/\r?\n/)) {
    if (line.startsWith("data:")) {
      dataLines.push(line.slice(5).trimStart());
    }
  }
  return dataLines;
}

export async function* streamChatCompletions(
  payload: OpenAIChatCompletionsRequest,
  signal: AbortSignal,
): AsyncGenerator<OpenAIChatChunk> {
  const response = await authFetch("/v1/chat/completions", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(payload),
    signal,
  });

  if (!response.ok) {
    const body = await response.json().catch(() => null);
    throw new Error(parseErrorText(response.status, body));
  }

  if (!response.body) {
    throw new Error("Stream response missing body");
  }

  const reader = response.body.getReader();
  const decoder = new TextDecoder();
  let buffer = "";

  while (true) {
    const { done, value } = await reader.read();
    if (done) {
      break;
    }

    buffer += decoder.decode(value, { stream: true });

    let separatorIndex = buffer.search(/\r?\n\r?\n/);
    while (separatorIndex >= 0) {
      const rawEvent = buffer.slice(0, separatorIndex);
      const separatorLength = buffer[separatorIndex] === "\r" ? 4 : 2;
      buffer = buffer.slice(separatorIndex + separatorLength);

      const dataLines = parseSseEvent(rawEvent);
      if (dataLines.length === 0) {
        separatorIndex = buffer.search(/\r?\n\r?\n/);
        continue;
      }

      const dataText = dataLines.join("\n");
      if (dataText === "[DONE]") {
        return;
      }

      const parsed = JSON.parse(dataText) as
        | OpenAIChatChunk
        | { type?: string; content?: string; error?: { message?: string } };
      if ("error" in parsed && parsed.error) {
        throw new Error(parsed.error.message || "Stream error");
      }
      // Tool status events are custom SSE payloads, not OpenAI chunks
      if ("type" in parsed && parsed.type === "tool_status") {
        yield { _toolStatus: parsed.content ?? "" } as unknown as OpenAIChatChunk;
        separatorIndex = buffer.search(/\r?\n\r?\n/);
        continue;
      }
      // Tool start/end events carry full input/output for the tool outputs panel
      if ("type" in parsed && (parsed.type === "tool_start" || parsed.type === "tool_end")) {
        yield { _toolEvent: parsed } as unknown as OpenAIChatChunk;
        separatorIndex = buffer.search(/\r?\n\r?\n/);
        continue;
      }
      yield parsed as OpenAIChatChunk;
      separatorIndex = buffer.search(/\r?\n\r?\n/);
    }
  }
}

export async function generateAudio(
  payload: OpenAIChatCompletionsRequest,
  signal: AbortSignal,
): Promise<AudioGenerationResponse> {
  const response = await authFetch("/api/inference/chat/completions", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ ...payload, stream: false }),
    signal,
  });

  if (!response.ok) {
    const body = await response.json().catch(() => null);
    throw new Error(parseErrorText(response.status, body));
  }

  return (await response.json()) as AudioGenerationResponse;
}


================================================
FILE: studio/frontend/src/features/chat/chat-page.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  type LoraModelOption,
  type ModelOption,
  ModelSelector,
} from "@/components/assistant-ui/model-selector";
import { Thread } from "@/components/assistant-ui/thread";
import { Button } from "@/components/ui/button";
import { SidebarProvider, SidebarTrigger, useSidebar } from "@/components/ui/sidebar";
import {
  Sheet,
  SheetContent,
  SheetDescription,
  SheetHeader,
  SheetTitle,
} from "@/components/ui/sheet";
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
import { cn } from "@/lib/utils";
import {
  ColumnInsertIcon,
  PencilEdit02Icon,
  Settings04Icon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import {
  type CSSProperties,
  type ReactElement,
  type ReactNode,
  memo,
  useCallback,
  useEffect,
  useMemo,
  useRef,
  useState,
} from "react";
import { toast } from "sonner";
import { GuidedTour, useGuidedTourController } from "@/features/tour";
import { ChatSettingsPanel } from "./chat-settings-sheet";
import { ModelLoadInlineStatus } from "./components/model-load-status";
import { db } from "./db";
import { useChatModelRuntime } from "./hooks/use-chat-model-runtime";
import {
  clearTrainingCompareHandoff,
  getTrainingCompareHandoff,
} from "./lib/training-compare-handoff";
import { ChatRuntimeProvider } from "./runtime-provider";
import { useChatRuntimeStore } from "./stores/chat-runtime-store";
import {
  type CompareHandle,
  CompareHandlesProvider,
  RegisterCompareHandle,
  SharedComposer,
} from "./shared-composer";
import { ThreadSidebar } from "./thread-sidebar";
import type { ChatView, MessageRecord } from "./types";
import { buildChatTourSteps } from "./tour";

type LoraCandidate = {
  id: string;
  baseModel: string;
  updatedAt?: number;
};

function normalizeModelRef(value: string | null | undefined): string {
  return value?.trim().toLowerCase() ?? "";
}

function pickBestLoraForBase(
  loras: LoraCandidate[],
  baseModel: string | null,
): LoraCandidate | null {
  if (loras.length === 0) return null;
  const sorted = [...loras].sort(
    (a, b) => (b.updatedAt ?? -1) - (a.updatedAt ?? -1),
  );
  const normalizedBase = normalizeModelRef(baseModel);
  if (!normalizedBase) return sorted[0];

  const exact = sorted.find(
    (lora) => normalizeModelRef(lora.baseModel) === normalizedBase,
  );
  if (exact) return exact;

  const partial = sorted.find((lora) => {
    const normalizedLoraBase = normalizeModelRef(lora.baseModel);
    if (!normalizedLoraBase) return false;
    return (
      normalizedLoraBase.includes(normalizedBase) ||
      normalizedBase.includes(normalizedLoraBase)
    );
  });
  return partial ?? sorted[0];
}

function messageHasImage(message: MessageRecord): boolean {
  const contentParts = Array.isArray(message.content) ? message.content : [];
  if (contentParts.some((part) => part.type === "image")) {
    return true;
  }
  const attachments = Array.isArray(message.attachments) ? message.attachments : [];
  for (const attachment of attachments) {
    const parts = Array.isArray(attachment.content) ? attachment.content : [];
    for (const part of parts as Array<{ type?: string }>) {
      if (part?.type === "image") {
        return true;
      }
    }
  }
  return false;
}

const SingleContent = memo(function SingleContent({
  threadId,
  newThreadNonce,
}: { threadId?: string; newThreadNonce?: string }): ReactElement {
  return (
    <ChatRuntimeProvider
      modelType="base"
      initialThreadId={threadId}
      newThreadNonce={newThreadNonce}
    >
      <div className="min-h-0 flex-1">
        <Thread />
      </div>
    </ChatRuntimeProvider>
  );
});

type CompareModelSelection = {
  id: string;
  isLora: boolean;
  ggufVariant?: string;
};

/**
 * Detect if this is a LoRA base-vs-fine-tuned compare.
 * Returns true when the loaded checkpoint is a LoRA — in that case
 * we use the fast simultaneous base/lora adapter-toggle path.
 */
function useIsLoraCompare(): boolean {
  return useChatRuntimeStore((s) => {
    const cp = s.params.checkpoint;
    return cp ? s.loras.some((l) => l.id === cp) : false;
  });
}

const CompareContent = memo(function CompareContent({
  pairId,
  models,
  loraModels,
}: { pairId: string; models: ModelOption[]; loraModels: LoraModelOption[] }): ReactElement {
  const isLoraCompare = useIsLoraCompare();

  return isLoraCompare
    ? <LoraCompareContent pairId={pairId} />
    : <GeneralCompareContent pairId={pairId} models={models} loraModels={loraModels} />;
});

/** Fast path: same model, adapter on/off, simultaneous generation. */
const LoraCompareContent = memo(function LoraCompareContent({
  pairId,
}: { pairId: string }): ReactElement {
  const handlesRef = useRef<Record<string, CompareHandle>>({});
  const [baseThreadId, setBaseThreadId] = useState<string>();
  const [loraThreadId, setLoraThreadId] = useState<string>();

  useEffect(() => {
    let isActive = true;
    db.threads
      .where("pairId")
      .equals(pairId)
      .toArray()
      .then((threads) => {
        if (!isActive) return;
        setBaseThreadId(threads.find((t) => t.modelType === "base")?.id);
        setLoraThreadId(threads.find((t) => t.modelType === "lora")?.id);
      });
    return () => { isActive = false; };
  }, [pairId]);

  return (
    <CompareHandlesProvider handlesRef={handlesRef}>
      <div className="flex min-h-0 flex-1 flex-col">
        <div
          data-tour="chat-compare-view"
          className="grid min-h-0 flex-1 grid-cols-1 px-0 md:grid-cols-2"
        >
          <div className="flex min-h-0 flex-col">
            <div className="px-3 py-1.5">
              <span className="text-[10px] font-semibold uppercase tracking-wider text-muted-foreground">
                Base Model
              </span>
            </div>
            <div className="min-h-0 flex-1">
              <ChatRuntimeProvider modelType="base" pairId={pairId} initialThreadId={baseThreadId}>
                <RegisterCompareHandle name="base" />
                <Thread hideComposer={true} hideWelcome={true} />
              </ChatRuntimeProvider>
            </div>
          </div>
          <div className="flex min-h-0 flex-col border-t border-border/60 md:border-t-0 md:border-l">
            <div className="px-3 py-1.5 text-start md:text-end">
              <span className="text-[10px] font-semibold uppercase tracking-wider text-primary">
                Fine-tuned (LoRA)
              </span>
            </div>
            <div className="min-h-0 flex-1">
              <ChatRuntimeProvider modelType="lora" pairId={pairId} initialThreadId={loraThreadId}>
                <RegisterCompareHandle name="lora" />
                <Thread hideComposer={true} hideWelcome={true} />
              </ChatRuntimeProvider>
            </div>
          </div>
        </div>
        <div className="mx-auto w-full max-w-4xl px-4 py-4">
          <SharedComposer handlesRef={handlesRef} />
        </div>
      </div>
    </CompareHandlesProvider>
  );
});

/** General path: any two models, sequential load → generate. */
const GeneralCompareContent = memo(function GeneralCompareContent({
  pairId,
  models,
  loraModels,
}: { pairId: string; models: ModelOption[]; loraModels: LoraModelOption[] }): ReactElement {
  const handlesRef = useRef<Record<string, CompareHandle>>({});
  const [model1ThreadId, setModel1ThreadId] = useState<string>();
  const [model2ThreadId, setModel2ThreadId] = useState<string>();

  const globalCheckpoint = useChatRuntimeStore((s) => s.params.checkpoint);
  const globalGgufVariant = useChatRuntimeStore((s) => s.activeGgufVariant);
  const [model1, setModel1] = useState<CompareModelSelection>({
    id: globalCheckpoint || "",
    isLora: false,
    ggufVariant: globalGgufVariant ?? undefined,
  });
  const [model2, setModel2] = useState<CompareModelSelection>({ id: "", isLora: false });

  useEffect(() => {
    let isActive = true;
    db.threads
      .where("pairId")
      .equals(pairId)
      .toArray()
      .then((threads) => {
        if (!isActive) return;
        setModel1ThreadId(
          threads.find((t) => t.modelType === "model1" || t.modelType === "base")?.id,
        );
        setModel2ThreadId(
          threads.find((t) => t.modelType === "model2" || t.modelType === "lora")?.id,
        );
      });
    return () => { isActive = false; };
  }, [pairId]);

  return (
    <CompareHandlesProvider handlesRef={handlesRef}>
      <div className="flex min-h-0 flex-1 flex-col">
        <div
          data-tour="chat-compare-view"
          className="grid min-h-0 flex-1 grid-cols-1 px-0 md:grid-cols-2"
        >
          <div className="flex min-h-0 flex-col">
            <div className="flex items-center gap-2 px-3 py-1.5">
              <span className="text-[10px] font-semibold uppercase tracking-wider text-muted-foreground">
                Model 1
              </span>
              <ModelSelector
                models={models}
                loraModels={loraModels}
                value={model1.id}
                onValueChange={(id, meta) => setModel1({ id, isLora: meta.isLora, ggufVariant: meta.ggufVariant })}
                variant="ghost"
                size="sm"
                className="max-w-[50%]"
              />
            </div>
            <div className="min-h-0 flex-1">
              <ChatRuntimeProvider
                modelType="model1"
                pairId={pairId}
                initialThreadId={model1ThreadId}
              >
                <RegisterCompareHandle name="model1" />
                <Thread hideComposer={true} hideWelcome={true} />
              </ChatRuntimeProvider>
            </div>
          </div>
          <div className="flex min-h-0 flex-col border-t border-border/60 md:border-t-0 md:border-l">
            <div className="flex items-center gap-2 px-3 py-1.5 md:justify-end">
              <span className="text-[10px] font-semibold uppercase tracking-wider text-primary">
                Model 2
              </span>
              <ModelSelector
                models={models}
                loraModels={loraModels}
                value={model2.id}
                onValueChange={(id, meta) => setModel2({ id, isLora: meta.isLora, ggufVariant: meta.ggufVariant })}
                variant="ghost"
                size="sm"
                className="max-w-[50%]"
              />
            </div>
            <div className="min-h-0 flex-1">
              <ChatRuntimeProvider
                modelType="model2"
                pairId={pairId}
                initialThreadId={model2ThreadId}
              >
                <RegisterCompareHandle name="model2" />
                <Thread hideComposer={true} hideWelcome={true} />
              </ChatRuntimeProvider>
            </div>
          </div>
        </div>
        <div className="mx-auto w-full max-w-4xl px-4 py-4">
          <SharedComposer handlesRef={handlesRef} model1={model1} model2={model2} />
        </div>
      </div>
    </CompareHandlesProvider>
  );
});

function InlineSidebar({
  children,
  side = "left",
}: {
  children: ReactNode;
  side?: "left" | "right";
}) {
  const { state, isMobile, openMobile, setOpenMobile } = useSidebar();
  const collapsed = state === "collapsed";

  if (isMobile) {
    return (
      <Sheet open={openMobile} onOpenChange={setOpenMobile}>
        <SheetContent side={side} className="w-[18rem] p-0">
          <SheetHeader className="sr-only">
            <SheetTitle>Chat sidebar</SheetTitle>
            <SheetDescription>Chat threads and actions</SheetDescription>
          </SheetHeader>
          <div className="h-full overflow-auto">{children}</div>
        </SheetContent>
      </Sheet>
    );
  }

  return (
    <div
      className="group shrink-0 h-full pb-3.5"
      data-state={state}
      data-collapsible={collapsed ? "offcanvas" : ""}
      data-side={side}
    >
      <aside
        data-sidebar="sidebar"
        className={cn(
          "bg-muted/70 text-sidebar-foreground h-full overflow-hidden rounded-2xl corner-squircle transition-[width] duration-200 ease-linear",
          !collapsed &&
          side === "right" && "border-l border-sidebar-border/70",
          collapsed ? "w-0" : "w-(--sidebar-width)",
        )}
      >
        <div className="flex h-full w-(--sidebar-width) flex-col">
          {children}
        </div>
      </aside>
    </div>
  );
}

function TopBarActions({
  onNewThread,
  onNewCompare,
  showCompare,
}: { onNewThread: () => void; onNewCompare: () => void; showCompare: boolean }) {
  const { state } = useSidebar();
  if (state !== "collapsed") {
    return null;
  }
  return (
    <>
      <Tooltip>
        <TooltipTrigger asChild={true}>
          <Button variant="ghost" size="icon-sm" onClick={onNewThread}>
            <HugeiconsIcon icon={PencilEdit02Icon} strokeWidth={2} />
          </Button>
        </TooltipTrigger>
        <TooltipContent side="bottom">New Chat</TooltipContent>
      </Tooltip>
      {showCompare ? (
        <Tooltip>
          <TooltipTrigger asChild={true}>
            <Button variant="ghost" size="icon-sm" onClick={onNewCompare}>
              <HugeiconsIcon icon={ColumnInsertIcon} strokeWidth={2} />
            </Button>
          </TooltipTrigger>
          <TooltipContent side="bottom">Compare</TooltipContent>
        </Tooltip>
      ) : null}
    </>
  );
}

export function ChatPage(): ReactElement {
  const [view, setView] = useState<ChatView>({
    mode: "single",
    newThreadNonce: crypto.randomUUID(),
  });
  const [settingsOpen, setSettingsOpen] = useState(false);
  const [modelSelectorOpen, setModelSelectorOpen] = useState(false);
  const [modelSelectorLocked, setModelSelectorLocked] = useState(false);
  const [sidebarOpen, setSidebarOpen] = useState(true);
  const [viewBeforeCompare, setViewBeforeCompare] = useState<ChatView | null>(
    null,
  );
  const inferenceParams = useChatRuntimeStore((state) => state.params);
  const setInferenceParams = useChatRuntimeStore((state) => state.setParams);
  const activeGgufVariant = useChatRuntimeStore((state) => state.activeGgufVariant);
  const autoTitle = useChatRuntimeStore((state) => state.autoTitle);
  const setAutoTitle = useChatRuntimeStore((state) => state.setAutoTitle);
  const modelsFromStore = useChatRuntimeStore((state) => state.models);
  const lorasFromStore = useChatRuntimeStore((state) => state.loras);
  const modelsError = useChatRuntimeStore((state) => state.modelsError);
  const activeThreadId = useChatRuntimeStore((state) => state.activeThreadId);
  const {
    refresh,
    selectModel,
    ejectModel,
    cancelLoading,
    loadingModel,
    loadProgress,
    loadToastDismissed,
  } =
    useChatModelRuntime();
  const refreshRef = useRef(refresh);
  const selectModelRef = useRef(selectModel);

  useEffect(() => {
    refreshRef.current = refresh;
    selectModelRef.current = selectModel;
  }, [refresh, selectModel]);
  const canCompare = useMemo(() => {
    return Boolean(inferenceParams.checkpoint);
  }, [inferenceParams.checkpoint]);

  const handleCheckpointChange = useCallback(
    (value: string, meta?: { isLora: boolean; ggufVariant?: string; isDownloaded?: boolean; expectedBytes?: number }) => {
      const store = useChatRuntimeStore.getState();
      const currentCheckpoint = store.params.checkpoint;
      const currentVariant = store.activeGgufVariant;
      if (!value || (value === currentCheckpoint && (meta?.ggufVariant ?? null) === (currentVariant ?? null))) return;
      void (async () => {
        let showImageCompatibilityWarning = false;
        if (view.mode === "single" && activeThreadId) {
          const thread = await db.threads.get(activeThreadId);
          if (thread?.modelId && thread.modelId !== value) {
            const messages = await db.messages
              .where("threadId")
              .equals(activeThreadId)
              .toArray();
            if (messages.length > 0) {
              const hasImage = messages.some(messageHasImage);
              const targetModel = modelsFromStore.find((model) => model.id === value);
              showImageCompatibilityWarning =
                hasImage && targetModel?.isVision === false;
            }
          }
        }

        if (showImageCompatibilityWarning) {
          toast.warning("Selected model may not handle earlier images", {
            description:
              "This chat already includes images. Text-only models can ignore them or fail on follow-up replies.",
            duration: 6000,
          });
        }
        await selectModel({
          id: value,
          isLora: meta?.isLora,
          ggufVariant: meta?.ggufVariant,
          isDownloaded: meta?.isDownloaded,
          expectedBytes: meta?.expectedBytes,
        });
      })();
    },
    [activeThreadId, modelsFromStore, selectModel, view],
  );
  const handleEject = useCallback(() => {
    void ejectModel();
  }, [ejectModel]);
  const handleNewThread = useCallback(
    () => {
      useChatRuntimeStore.getState().setActiveThreadId(null);
      setView({ mode: "single", newThreadNonce: crypto.randomUUID() });
    },
    [],
  );
  const handleNewCompare = useCallback(
    () => setView({ mode: "compare", pairId: crypto.randomUUID() }),
    [],
  );

  const openModelSelector = useCallback(() => {
    setModelSelectorLocked(true);
    setModelSelectorOpen(true);
  }, []);

  const closeModelSelector = useCallback(() => {
    setModelSelectorLocked(false);
    setModelSelectorOpen(false);
  }, []);

  const handleModelSelectorOpenChange = useCallback(
    (open: boolean) => {
      if (!open && modelSelectorLocked) return;
      setModelSelectorOpen(open);
    },
    [modelSelectorLocked],
  );
  const openSettings = useCallback(() => setSettingsOpen(true), []);
  const closeSettings = useCallback(() => setSettingsOpen(false), []);
  const openSidebar = useCallback(() => setSidebarOpen(true), []);

  const enterCompare = useCallback(() => {
    setViewBeforeCompare((prev) => prev ?? view);
    setView({ mode: "compare", pairId: crypto.randomUUID() });
  }, [view]);

  const exitCompare = useCallback(() => {
    if (!viewBeforeCompare) return;
    setView(viewBeforeCompare);
    setViewBeforeCompare(null);
  }, [viewBeforeCompare]);

  const handleThreadSelect = useCallback(
    (nextView: ChatView) => {
      setView(nextView);
    },
    [],
  );

  const models = useMemo<ModelOption[]>(
    () =>
      modelsFromStore.map((model) => ({
        id: model.id,
        name: model.name,
        description: model.description,
      })),
    [modelsFromStore],
  );

  const loraModels = useMemo<LoraModelOption[]>(
    () =>
      lorasFromStore.map((lora) => ({
        id: lora.id,
        name: lora.name,
        baseModel: lora.baseModel,
        updatedAt: lora.updatedAt,
        source: lora.source,
        exportType: lora.exportType,
      })),
    [lorasFromStore],
  );

  useEffect(() => {
    if (getTrainingCompareHandoff()) return;
    void refresh();
  }, [refresh]);

  useEffect(() => {
    const handoff = getTrainingCompareHandoff();
    if (!handoff) return;
    console.info("[chat-handoff] received", handoff);
    function clearHandoff(): void {
      clearTrainingCompareHandoff();
    }

    let canceled = false;
    void (async () => {
      try {
        console.info("[chat-handoff] refreshing models+loras");
        await refreshRef.current();
        if (canceled) return;

        const state = useChatRuntimeStore.getState();
        const targetLora = pickBestLoraForBase(state.loras, handoff.baseModel);
        if (targetLora) {
          console.info("[chat-handoff] loading lora", {
            id: targetLora.id,
            baseModel: targetLora.baseModel,
          });
          await selectModelRef.current({ id: targetLora.id, isLora: true });
          if (canceled) return;
          setView({ mode: "compare", pairId: crypto.randomUUID() });
          clearHandoff();
          console.info("[chat-handoff] loaded lora + opened compare");
          return;
        }

        if (
          handoff.baseModel &&
          state.models.some((model) => model.id === handoff.baseModel)
        ) {
          console.info("[chat-handoff] no lora match, loading base", {
            id: handoff.baseModel,
          });
          await selectModelRef.current({ id: handoff.baseModel, isLora: false });
          if (canceled) return;
        } else {
          console.warn("[chat-handoff] no lora/base match found", {
            requestedBaseModel: handoff.baseModel,
            loraCount: state.loras.length,
            modelCount: state.models.length,
          });
        }
        clearHandoff();
        console.info("[chat-handoff] completed");
      } catch (error) {
        console.error("[chat-handoff] failed", error);
        clearHandoff();
      }
    })();

    return () => {
      canceled = true;
    };
  }, []);

  const tourSteps = useMemo(
    () =>
      buildChatTourSteps({
        canCompare,
        openModelSelector,
        closeModelSelector,
        openSettings,
        closeSettings,
        openSidebar,
        enterCompare,
        exitCompare,
      }),
    [
      canCompare,
      closeModelSelector,
      closeSettings,
      enterCompare,
      exitCompare,
      openModelSelector,
      openSettings,
      openSidebar,
    ],
  );

  const tour = useGuidedTourController({
    id: "chat",
    steps: tourSteps,
  });

  useEffect(() => {
    if (tour.open) return;
    if (!modelSelectorLocked) return;
    const timeoutId = window.setTimeout(() => {
      setModelSelectorLocked(false);
      setModelSelectorOpen(false);
    }, 0);
    return () => window.clearTimeout(timeoutId);
  }, [modelSelectorLocked, tour.open]);

  return (
    <div className="h-[calc(100dvh-4rem)] bg-background overflow-hidden">
      <GuidedTour {...tour.tourProps} />
      <SidebarProvider
        defaultOpen={true}
        open={sidebarOpen}
        onOpenChange={setSidebarOpen}
        className="!min-h-0 h-full w-full max-w-7xl mx-auto px-2 sm:px-4"
        style={
          {
            "--sidebar-width": "14rem",
            "--sidebar-width-icon": "3rem",
          } as CSSProperties
        }
      >
        <InlineSidebar>
          <ThreadSidebar
            view={view}
            onSelect={handleThreadSelect}
            onNewThread={handleNewThread}
            onNewCompare={handleNewCompare}
            showCompare={canCompare}
          />
        </InlineSidebar>

        <div className="flex min-h-0 min-w-0 flex-1 flex-col">
          <div className="flex h-11 shrink-0 items-center px-1.5 sm:px-2">
            <div className="flex items-center gap-1">
              <SidebarTrigger />
              <TopBarActions
                onNewThread={handleNewThread}
                onNewCompare={handleNewCompare}
                showCompare={canCompare}
              />
              <ModelSelector
                models={models}
                loraModels={loraModels}
                value={inferenceParams.checkpoint}
                activeGgufVariant={activeGgufVariant}
                onValueChange={handleCheckpointChange}
                onEject={handleEject}
                variant="ghost"
                open={modelSelectorOpen}
                onOpenChange={handleModelSelectorOpenChange}
                triggerDataTour="chat-model-selector"
                contentDataTour="chat-model-selector-popover"
                className="max-w-[62vw] sm:max-w-none"
              />
              {loadingModel && loadToastDismissed ? (
                <ModelLoadInlineStatus
                  label={
                    loadProgress?.phase === "starting"
                      ? "Starting model…"
                      : loadingModel.isDownloaded
                        ? "Loading model…"
                        : "Downloading model…"
                  }
                  title={loadingModel.isDownloaded
                    ? `Loading ${loadingModel.displayName} from cache.`
                    : `Loading ${loadingModel.displayName}. This may include downloading.`}
                  progressPercent={loadProgress?.percent}
                  progressLabel={loadProgress?.label}
                  onStop={cancelLoading}
                />
              ) : null}
            </div>
            {modelsError && (
              <div className="ml-2 text-xs text-destructive truncate max-w-[28rem]">
                {modelsError}
              </div>
            )}
            <div className="flex-1" />
            <button
              type="button"
              onClick={() => setSettingsOpen((o) => !o)}
              className="flex h-9 w-9 items-center justify-center rounded-md text-muted-foreground transition-colors hover:bg-accent hover:text-foreground"
              title="Inference settings"
              data-tour="chat-settings"
            >
              <HugeiconsIcon icon={Settings04Icon} className="size-5" />
            </button>
          </div>

          {view.mode === "single" ? (
            <SingleContent
              key={view.threadId ?? view.newThreadNonce ?? "new"}
              threadId={view.threadId}
              newThreadNonce={view.newThreadNonce}
            />
          ) : (
            <CompareContent key={view.pairId} pairId={view.pairId} models={models} loraModels={loraModels} />
          )}
        </div>

        <ChatSettingsPanel
          open={settingsOpen}
          onOpenChange={setSettingsOpen}
          params={inferenceParams}
          onParamsChange={setInferenceParams}
          autoTitle={autoTitle}
          onAutoTitleChange={setAutoTitle}
          onReloadModel={() => {
            const state = useChatRuntimeStore.getState();
            if (state.params.checkpoint) {
              selectModel({
                id: state.params.checkpoint,
                ggufVariant: state.activeGgufVariant ?? undefined,
                forceReload: true,
                isDownloaded: true,
                loadingDescription: "Reloading with updated chat template.",
              });
            }
          }}
        />
      </SidebarProvider>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/chat/chat-settings-sheet.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import { Slider } from "@/components/ui/slider";
import { Textarea } from "@/components/ui/textarea";
import {
  ArrowDown01Icon,
  CodeIcon,
  Delete02Icon,
  FloppyDiskIcon,
  PencilEdit01Icon,
  Settings02Icon,
  SlidersHorizontalIcon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { AnimatePresence, motion } from "motion/react";
import {
  Sheet,
  SheetContent,
  SheetDescription,
  SheetHeader,
  SheetTitle,
} from "@/components/ui/sheet";
import { useIsMobile } from "@/hooks/use-mobile";
import type { ReactNode } from "react";
import { useState } from "react";
import {
  DEFAULT_INFERENCE_PARAMS,
  type InferenceParams,
} from "./types/runtime";
import { useChatRuntimeStore } from "./stores/chat-runtime-store";
import { Switch } from "@/components/ui/switch";

export const defaultInferenceParams = DEFAULT_INFERENCE_PARAMS;
export type { InferenceParams } from "./types/runtime";

export interface Preset {
  name: string;
  params: InferenceParams;
}

const BUILTIN_PRESETS: Preset[] = [
  { name: "Default", params: { ...defaultInferenceParams } },
  {
    name: "Creative",
    params: {
      ...defaultInferenceParams,
      temperature: 1.5,
      topP: 1.0,
      topK: 0,
      minP: 0.1,
      repetitionPenalty: 1.0,
    },
  },
  {
    name: "Precise",
    params: {
      ...defaultInferenceParams,
      temperature: 0.1,
      topP: 0.95,
      topK: 80,
      minP: 0.01,
      repetitionPenalty: 1.0,
    },
  },
];

function ParamSlider({
  label,
  value,
  min,
  max,
  step,
  onChange,
  displayValue,
}: {
  label: string;
  value: number;
  min: number;
  max: number;
  step: number;
  onChange: (v: number) => void;
  displayValue?: string;
}) {
  return (
    <div className="space-y-2">
      <div className="flex items-center justify-between">
        <span className="text-xs font-medium">{label}</span>
        <span className="text-xs tabular-nums text-muted-foreground">
          {displayValue ?? value}
        </span>
      </div>
      <Slider
        min={min}
        max={max}
        step={step}
        value={[value]}
        onValueChange={([v]) => onChange(v)}
      />
    </div>
  );
}

function CollapsibleSection({
  icon,
  label,
  children,
  defaultOpen = false,
}: {
  icon: Parameters<typeof HugeiconsIcon>[0]["icon"];
  label: string;
  children?: ReactNode;
  defaultOpen?: boolean;
}) {
  const [open, setOpen] = useState(defaultOpen);

  return (
    <div>
      <button
        type="button"
        onClick={() => setOpen(!open)}
        className="flex w-full items-center corner-squircle gap-2.5 rounded-md px-2 py-2 text-sm transition-colors hover:bg-accent"
      >
        <HugeiconsIcon icon={icon} className="size-4 text-muted-foreground" />
        <span className="flex-1 text-left font-medium">{label}</span>
        <motion.div
          animate={{ rotate: open ? 180 : 0 }}
          transition={{ duration: 0.15 }}
        >
          <HugeiconsIcon
            icon={ArrowDown01Icon}
            className="size-3.5 text-muted-foreground"
          />
        </motion.div>
      </button>
      <AnimatePresence initial={false}>
        {open && (
          <motion.div
            initial={{ height: 0, opacity: 0 }}
            animate={{ height: "auto", opacity: 1 }}
            exit={{ height: 0, opacity: 0 }}
            transition={{ duration: 0.2, ease: "easeInOut" }}
            className="overflow-hidden"
          >
            <div className="px-2 pb-3 pt-1">{children}</div>
          </motion.div>
        )}
      </AnimatePresence>
    </div>
  );
}

interface ChatSettingsPanelProps {
  open: boolean;
  onOpenChange?: (open: boolean) => void;
  params: InferenceParams;
  onParamsChange: (params: InferenceParams) => void;
  autoTitle: boolean;
  onAutoTitleChange: (enabled: boolean) => void;
  onReloadModel?: () => void;
}

export function ChatSettingsPanel({
  open,
  onOpenChange,
  params,
  onParamsChange,
  autoTitle,
  onAutoTitleChange,
  onReloadModel,
}: ChatSettingsPanelProps) {
  const isMobile = useIsMobile();
  const isGguf = useChatRuntimeStore((s) => s.activeGgufVariant) != null;
  const ggufContextLength = useChatRuntimeStore((s) => s.ggufContextLength);
  const kvCacheDtype = useChatRuntimeStore((s) => s.kvCacheDtype);
  const setKvCacheDtype = useChatRuntimeStore((s) => s.setKvCacheDtype);
  const [presets, setPresets] = useState<Preset[]>(BUILTIN_PRESETS);
  const [activePreset, setActivePreset] = useState("Default");
  const isBuiltinPreset = BUILTIN_PRESETS.some((p) => p.name === activePreset);

  function set<K extends keyof InferenceParams>(key: K) {
    return (v: InferenceParams[K]) => onParamsChange({ ...params, [key]: v });
  }

  function applyPreset(name: string) {
    const p = presets.find((pr) => pr.name === name);
    if (p) {
      onParamsChange({
        ...p.params,
        systemPrompt: params.systemPrompt,
        checkpoint: params.checkpoint,
        trustRemoteCode: params.trustRemoteCode,
      });
      setActivePreset(name);
    }
  }

  function savePreset() {
    const name = prompt("Preset name:");
    if (!name?.trim()) {
      return;
    }
    const trimmed = name.trim();
    setPresets((prev) => [
      ...prev.filter((p) => p.name !== trimmed),
      { name: trimmed, params: { ...params } },
    ]);
    setActivePreset(trimmed);
  }

  function deletePreset(name: string) {
    if (BUILTIN_PRESETS.some((p) => p.name === name)) {
      return;
    }
    setPresets((prev) => prev.filter((p) => p.name !== name));
    if (activePreset === name) {
      setActivePreset("Default");
    }
  }

  const settingsContent = (
    <>
      <div className="flex items-center gap-2 px-4 py-3">
        <HugeiconsIcon
          icon={PencilEdit01Icon}
          className="size-4 text-muted-foreground/70"
        />
        <span className="flex-1 text-base font-semibold tracking-tight">
          Configuration
        </span>
      </div>

      <div className="flex-1 overflow-y-auto px-1.5">
        {/* mt-4 matches the Playground sidebar gap (SidebarHeader py-3 + SidebarGroup pt-1) */}
        <div className="mt-4 px-2 pb-3">
            <div className="flex items-center gap-2">
              <Select value={activePreset} onValueChange={applyPreset}>
                <SelectTrigger className="h-8 flex-1 corner-squircle text-xs">
                  <SelectValue />
                </SelectTrigger>
                <SelectContent>
                  {presets.map((p) => (
                    <SelectItem key={p.name} value={p.name}>
                      {p.name}
                    </SelectItem>
                  ))}
                </SelectContent>
              </Select>
              <button
                type="button"
                onClick={savePreset}
                className="flex h-8 items-center gap-1.5 rounded-md border px-2.5 text-xs text-muted-foreground transition-colors hover:bg-accent"
                title="Save preset"
              >
                <HugeiconsIcon icon={FloppyDiskIcon} className="size-3.5" />
                Save
              </button>
              <button
                type="button"
                onClick={() => deletePreset(activePreset)}
                disabled={isBuiltinPreset}
                className="flex h-8 items-center gap-1.5 rounded-md border px-2.5 text-xs text-muted-foreground transition-colors hover:bg-accent disabled:cursor-not-allowed disabled:opacity-50"
                title={
                  isBuiltinPreset
                    ? "Built-in presets cannot be deleted"
                    : "Delete selected preset"
                }
              >
                <HugeiconsIcon icon={Delete02Icon} className="size-3.5" />
                Delete
              </button>
            </div>
          </div>

          <div className="px-2 pb-4">
            <label
              htmlFor="system-prompt"
              className="mb-1.5 block text-xs font-medium"
            >
              System Prompt
            </label>
            <Textarea
              id="system-prompt"
              value={params.systemPrompt}
              onChange={(e) => set("systemPrompt")(e.target.value)}
              placeholder="You are a helpful assistant..."
              className="min-h-20 text-xs corner-squircle"
              rows={3}
            />
          </div>

          <CollapsibleSection
            icon={SlidersHorizontalIcon}
            label="Sampling"
            defaultOpen={true}
          >
            <div className="flex flex-col gap-5">
              <ParamSlider
                label="Temperature"
                value={params.temperature}
                min={0}
                max={2}
                step={0.1}
                onChange={set("temperature")}
              />
              <ParamSlider
                label="Top P"
                value={params.topP}
                min={0}
                max={1}
                step={0.05}
                onChange={set("topP")}
                displayValue={params.topP === 1 ? "Off" : undefined}
              />
              <ParamSlider
                label="Top K"
                value={params.topK}
                min={0}
                max={100}
                step={1}
                onChange={set("topK")}
                displayValue={params.topK === 0 ? "Off" : undefined}
              />
              <ParamSlider
                label="Min P"
                value={params.minP}
                min={0}
                max={1}
                step={0.01}
                onChange={set("minP")}
              />
              <ParamSlider
                label="Repetition Penalty"
                value={params.repetitionPenalty}
                min={1}
                max={2}
                step={0.05}
                onChange={set("repetitionPenalty")}
                displayValue={params.repetitionPenalty === 1 ? "Off" : undefined}
              />
              <ParamSlider
                label="Presence Penalty"
                value={params.presencePenalty}
                min={0}
                max={2}
                step={0.1}
                onChange={set("presencePenalty")}
                displayValue={params.presencePenalty === 0 ? "Off" : undefined}
              />
              {!isGguf && (
                <ParamSlider
                  label="Max Seq Length"
                  value={params.maxSeqLength}
                  min={128}
                  max={32768}
                  step={128}
                  onChange={set("maxSeqLength")}
                />
              )}
              <ParamSlider
                label="Max Tokens"
                value={params.maxTokens}
                min={64}
                max={isGguf && ggufContextLength ? ggufContextLength : 32768}
                step={64}
                onChange={set("maxTokens")}
                displayValue={
                  isGguf && ggufContextLength && params.maxTokens >= ggufContextLength
                    ? "Max"
                    : undefined
                }
              />
            </div>
          </CollapsibleSection>

          <CollapsibleSection icon={Settings02Icon} label="Settings" defaultOpen={true}>
            <div className="flex flex-col gap-3 py-1">
              <div className="flex items-center justify-between gap-3">
                <div className="min-w-0">
                  <div className="text-xs font-medium">Auto title</div>
                  <div className="text-[11px] text-muted-foreground">
                    Generate short title after reply.
                  </div>
                </div>
                <Switch
                  checked={autoTitle}
                  onCheckedChange={onAutoTitleChange}
                />
              </div>
              <div className="flex items-center justify-between gap-3">
                <div className="min-w-0">
                  <div className="text-xs font-medium">Trust remote code</div>
                  <div className="text-[11px] text-muted-foreground">
                    Allow models with custom code (e.g. Nemotron). Only enable for repos you trust.
                  </div>
                </div>
                <Switch
                  checked={params.trustRemoteCode ?? false}
                  onCheckedChange={set("trustRemoteCode")}
                />
              </div>
              {isGguf && (
                <div className="flex items-center justify-between gap-3">
                  <div className="min-w-0">
                    <div className="text-xs font-medium">KV Cache Dtype</div>
                    <div className="text-[11px] text-muted-foreground">
                      Quantize KV cache to reduce VRAM. Reload to apply.
                    </div>
                  </div>
                  <Select
                    value={kvCacheDtype ?? "f16"}
                    onValueChange={(v) => {
                      setKvCacheDtype(v === "f16" ? null : v);
                      onReloadModel?.();
                    }}
                  >
                    <SelectTrigger className="h-7 w-[90px] text-xs">
                      <SelectValue />
                    </SelectTrigger>
                    <SelectContent>
                      <SelectItem value="f16">f16</SelectItem>
                      <SelectItem value="bf16">bf16</SelectItem>
                      <SelectItem value="q8_0">q8_0</SelectItem>
                      <SelectItem value="q5_1">q5_1</SelectItem>
                      <SelectItem value="q4_1">q4_1</SelectItem>
                    </SelectContent>
                  </Select>
                </div>
              )}
              <AutoHealToolCallsToggle />
              <MaxToolCallsSlider />
              <ToolCallTimeoutSlider />
            </div>
          </CollapsibleSection>

          <ChatTemplateSection onReloadModel={onReloadModel} />
        </div>
      </>
  );

  if (isMobile) {
    return (
      <Sheet open={open} onOpenChange={onOpenChange}>
        <SheetContent side="right" className="w-[18rem] p-0">
          <SheetHeader className="sr-only">
            <SheetTitle>Configuration</SheetTitle>
            <SheetDescription>Chat inference settings</SheetDescription>
          </SheetHeader>
          <div className="flex h-full flex-col">{settingsContent}</div>
        </SheetContent>
      </Sheet>
    );
  }

  return (
    <aside
      className={`shrink-0 self-start h-[calc(100%-0.875rem)] overflow-hidden bg-muted/70 rounded-2xl corner-squircle transition-[width] duration-200 ease-linear ${open ? "w-[17rem] border-l border-sidebar-border/70" : "w-0"}`}
    >
      <div className="flex h-full w-[17rem] flex-col">{settingsContent}</div>
    </aside>
  );
}

function MaxToolCallsSlider() {
  const maxToolCalls = useChatRuntimeStore((s) => s.maxToolCallsPerMessage);
  const setMaxToolCalls = useChatRuntimeStore((s) => s.setMaxToolCallsPerMessage);

  // Slider range 0-41; 41 maps to 9999 ("Max")
  const sliderValue = maxToolCalls >= 9999 ? 41 : Math.min(maxToolCalls, 40);

  return (
    <ParamSlider
      label="Max Tool Calls Per Message"
      value={sliderValue}
      min={0}
      max={41}
      step={1}
      onChange={(v) => setMaxToolCalls(v >= 41 ? 9999 : v)}
      displayValue={sliderValue >= 41 ? "Max" : sliderValue === 0 ? "Off" : undefined}
    />
  );
}

function ToolCallTimeoutSlider() {
  const timeout = useChatRuntimeStore((s) => s.toolCallTimeout);
  const setTimeout_ = useChatRuntimeStore((s) => s.setToolCallTimeout);

  // Slider 1-31; 31 maps to 9999 ("Max")
  const sliderValue = timeout >= 9999 ? 31 : Math.min(Math.max(timeout, 1), 30);

  const displayValue =
    sliderValue >= 31
      ? "Max"
      : sliderValue === 1
        ? "1 minute"
        : `${sliderValue} minutes`;

  return (
    <ParamSlider
      label="Max Tool Call Duration"
      value={sliderValue}
      min={1}
      max={31}
      step={1}
      onChange={(v) => setTimeout_(v >= 31 ? 9999 : v)}
      displayValue={displayValue}
    />
  );
}

function AutoHealToolCallsToggle() {
  const autoHealToolCalls = useChatRuntimeStore((s) => s.autoHealToolCalls);
  const setAutoHealToolCalls = useChatRuntimeStore((s) => s.setAutoHealToolCalls);

  return (
    <div className="flex items-center justify-between gap-3">
      <div className="min-w-0">
        <div className="text-xs font-medium">Auto Heal Tool Calls 🦥</div>
        <div className="text-[11px] text-muted-foreground">
          Fix malformed tool calls from the model automatically.
        </div>
      </div>
      <Switch
        checked={autoHealToolCalls}
        onCheckedChange={setAutoHealToolCalls}
      />
    </div>
  );
}

function ChatTemplateSection({
  onReloadModel,
}: {
  onReloadModel?: () => void;
}) {
  const defaultTemplate = useChatRuntimeStore((s) => s.defaultChatTemplate);
  const override = useChatRuntimeStore((s) => s.chatTemplateOverride);
  const setOverride = useChatRuntimeStore((s) => s.setChatTemplateOverride);

  if (!defaultTemplate) return null;

  const displayValue = override ?? defaultTemplate;
  const isModified = override !== null;

  return (
    <CollapsibleSection icon={CodeIcon} label="Chat Template">
      <div className="flex flex-col gap-2 py-1">
        <Textarea
          value={displayValue}
          onChange={(e) => setOverride(e.target.value)}
          className="min-h-32 font-mono text-[10px] leading-relaxed corner-squircle"
          rows={6}
          spellCheck={false}
        />
        <div className="flex flex-wrap gap-1.5">
          {isModified && (
            <>
              <button
                type="button"
                onClick={() => {
                  onReloadModel?.();
                }}
                className="rounded-md bg-primary px-2.5 py-1 text-[11px] font-medium text-primary-foreground transition-colors hover:bg-primary/90"
              >
                Apply & Reload
              </button>
              <button
                type="button"
                onClick={() => setOverride(null)}
                className="rounded-md border px-2.5 py-1 text-[11px] font-medium text-muted-foreground transition-colors hover:bg-accent"
              >
                Revert changes
              </button>
            </>
          )}
        </div>
      </div>
    </CollapsibleSection>
  );
}


================================================
FILE: studio/frontend/src/features/chat/components/model-load-status.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Progress } from "@/components/ui/progress";
import { Spinner } from "@/components/ui/spinner";
import { Button } from "@/components/ui/button";

type ModelLoadDescriptionProps = {
  title?: string | null;
  message?: string | null;
  progressPercent?: number | null;
  progressLabel?: string | null;
  onStop?: () => void;
};

function clampProgress(value: number): number {
  return Math.max(0, Math.min(100, value));
}

export function ModelLoadDescription({
  title,
  message,
  progressPercent,
  progressLabel,
  onStop,
}: ModelLoadDescriptionProps) {
  const hasProgress = typeof progressPercent === "number";

  return (
    <div className="relative flex min-h-12 w-full items-stretch gap-2">
      <div className="flex h-full shrink-0 items-center self-center">
        <Spinner className="size-4 text-foreground" />
      </div>
      <div className="min-w-0 flex-1 pr-5">
        {title ? <p className="text-foreground leading-5 font-semibold">{title}</p> : null}
        {hasProgress ? (
          <div className="w-full pt-1">
            <div className="flex items-center justify-between text-[10px] font-medium tracking-[0.08em] text-muted-foreground/80">
              <span>{progressLabel}</span>
              <span>{Math.round(clampProgress(progressPercent))}%</span>
            </div>
            <Progress value={clampProgress(progressPercent)} className="h-1 bg-foreground/[0.08]" />
          </div>
        ) : message ? (
          <p className="pt-1 text-xs leading-relaxed text-muted-foreground">{message}</p>
        ) : null}
      </div>
      {onStop ? (
        <Button
          type="button"
          size="xs"
          variant="ghost"
          aria-label="Stop model loading"
          className="h-auto self-stretch shrink-0 !rounded-none !border-0 bg-transparent px-1 text-[10px] text-muted-foreground hover:bg-transparent hover:text-destructive focus-visible:text-destructive"
          onClick={onStop}
        >
          Cancel
        </Button>
      ) : null}
    </div>
  );
}

type ModelLoadInlineStatusProps = {
  label: string;
  title: string;
  progressPercent?: number | null;
  progressLabel?: string | null;
  onStop?: () => void;
};

export function ModelLoadInlineStatus({
  label,
  title,
  progressPercent,
  progressLabel,
  onStop,
}: ModelLoadInlineStatusProps) {
  const hasProgress = typeof progressPercent === "number";

  return (
    <div className="flex min-w-[20rem] items-center gap-2.5 text-muted-foreground" title={title}>
      <div className="flex items-center gap-1.5 shrink-0">
        <Spinner className="size-3.5 shrink-0" />
        <span className="text-xs">{label}</span>
      </div>
      {hasProgress ? (
        <div className="flex min-w-0 flex-[1.35] items-center gap-2.5">
          <div className="min-w-[7rem] flex-1">
            <Progress value={clampProgress(progressPercent)} className="h-1 bg-foreground/[0.08]" />
          </div>
          <div className="flex shrink-0 items-center gap-1 text-[10px] font-medium tracking-[0.08em] text-muted-foreground/80">
            <span>{progressLabel}</span>
            <span>{Math.round(clampProgress(progressPercent))}%</span>
          </div>
        </div>
      ) : null}
      {onStop ? (
        <Button
          type="button"
          size="xs"
          variant="outline"
          className="shrink-0 text-[11px]"
          onClick={onStop}
        >
          Stop
        </Button>
      ) : null}
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/chat/db.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import Dexie, { type EntityTable, liveQuery } from "dexie";
import { useEffect, useState } from "react";
import type { MessageRecord, ThreadRecord } from "./types";

const db = new Dexie("unsloth-chat") as Dexie & {
  threads: EntityTable<ThreadRecord, "id">;
  messages: EntityTable<MessageRecord, "id">;
};

db.version(1).stores({
  threads: "id, modelType, pairId, archived, createdAt",
  messages: "id, threadId, createdAt",
});

db.version(2)
  .stores({
    threads: "id, modelType, pairId, archived, createdAt",
    messages: "id, threadId, createdAt",
  })
  .upgrade((tx) => tx.table("messages").clear());

db.version(3)
  .stores({
    threads: "id, modelType, pairId, archived, createdAt",
    messages: "id, threadId, createdAt",
  })
  .upgrade((tx) =>
    tx
      .table("threads")
      .toCollection()
      .modify((thread) => {
        if (!thread.modelId) thread.modelId = "";
      }),
  );

export { db };

export function useLiveQuery<T>(
  querier: () => Promise<T>,
  deps: unknown[] = [],
): T | undefined {
  const [value, setValue] = useState<T>();
  useEffect(() => {
    const sub = liveQuery(querier).subscribe({
      next: setValue,
      error: (err) => console.error("useLiveQuery:", err),
    });
    return () => sub.unsubscribe();
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [querier, ...deps]);
  return value;
}


================================================
FILE: studio/frontend/src/features/chat/hooks/use-chat-model-runtime.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { createElement, useCallback, useRef, useState } from "react";
import { toast } from "sonner";
import { ModelLoadDescription } from "../components/model-load-status";
import {
  getDownloadProgress,
  getGgufDownloadProgress,
  getInferenceStatus,
  listLoras,
  listModels,
  loadModel,
  unloadModel,
  validateModel,
} from "../api/chat-api";
import { useChatRuntimeStore } from "../stores/chat-runtime-store";
import type { LoadModelResponse } from "../types/api";
import type {
  ChatLoraSummary,
  ChatModelSummary,
  InferenceParams,
} from "../types/runtime";

type SelectedModelInput = {
  id: string;
  isLora?: boolean;
  ggufVariant?: string;
  loadingDescription?: string;
  isDownloaded?: boolean;
  expectedBytes?: number;
  forceReload?: boolean;
};

const MODEL_LOAD_TOAST_CLASSNAMES = {
  toast: "items-start gap-2.5",
  content: "gap-0.5 flex-1 min-w-0",
  title: "leading-5",
  description: "mt-0 w-full",
} as const;

const LORA_SUFFIX_RE = /_(\d{9,})$/;

function parseTrailingEpoch(input: string): number | undefined {
  const match = input.match(LORA_SUFFIX_RE);
  if (!match) {
    return undefined;
  }
  const parsed = Number.parseInt(match[1], 10);
  return Number.isFinite(parsed) ? parsed : undefined;
}

function stripTrailingEpoch(input: string): string {
  const cleaned = input.replace(LORA_SUFFIX_RE, "").replace(/[_-]+$/, "").trim();
  return cleaned || input;
}

function describeModel(model: {
  is_lora?: boolean;
  is_vision?: boolean;
  is_gguf?: boolean;
  is_audio?: boolean;
  has_audio_input?: boolean;
}): string | undefined {
  const tags: string[] = [];
  if (model.is_gguf) tags.push("GGUF");
  if (model.is_lora) tags.push("LoRA");
  if (model.is_vision) tags.push("Vision");
  if (model.is_audio) tags.push("Audio");
  if (model.has_audio_input) tags.push("Audio Input");
  if (!model.is_lora && !model.is_vision && !model.is_gguf && !model.is_audio && !model.has_audio_input)
    tags.push("Base");
  return tags.join(" · ");
}

function toChatModelSummary(model: {
  id: string;
  name?: string | null;
  is_lora?: boolean;
  is_vision?: boolean;
  is_gguf?: boolean;
  is_audio?: boolean;
  audio_type?: string | null;
  has_audio_input?: boolean;
}): ChatModelSummary {
  return {
    id: model.id,
    name: model.name || model.id,
    description: describeModel(model),
    isLora: Boolean(model.is_lora),
    isVision: Boolean(model.is_vision),
    isGguf: Boolean(model.is_gguf),
    isAudio: Boolean(model.is_audio),
    audioType: model.audio_type ?? null,
    hasAudioInput: Boolean(model.has_audio_input),
  };
}

function toLoraSummary(lora: {
  display_name: string;
  adapter_path: string;
  base_model?: string | null;
  source?: "training" | "exported" | null;
  export_type?: "lora" | "merged" | "gguf" | null;
}): ChatLoraSummary {
  const idTail = lora.adapter_path.split("/").filter(Boolean).at(-1) ?? "";
  const updatedAt =
    parseTrailingEpoch(lora.display_name) ?? parseTrailingEpoch(idTail);

  return {
    id: lora.adapter_path,
    name: stripTrailingEpoch(lora.display_name),
    baseModel: lora.base_model || "Unknown base model",
    updatedAt,
    source: lora.source ?? undefined,
    exportType: lora.export_type ?? undefined,
  };
}

function toFiniteNumber(value: unknown): number | undefined {
  if (typeof value !== "number" || !Number.isFinite(value)) {
    return undefined;
  }
  return value;
}

function mergeRecommendedInference(
  current: InferenceParams,
  response: LoadModelResponse,
  modelId: string,
): InferenceParams {
  const inference = response.inference;
  // GGUF: use actual context length from GGUF metadata, fallback to 131072
  // Non-GGUF: 4096
  const defaultMaxTokens = response.is_gguf
    ? (response.context_length ?? 131072)
    : 4096;
  return {
    ...current,
    checkpoint: modelId,
    maxTokens: defaultMaxTokens,
    temperature:
      toFiniteNumber(inference?.temperature) ?? current.temperature,
    topP: toFiniteNumber(inference?.top_p) ?? current.topP,
    topK: toFiniteNumber(inference?.top_k) ?? current.topK,
    minP: toFiniteNumber(inference?.min_p) ?? current.minP,
    presencePenalty:
      toFiniteNumber(inference?.presence_penalty) ?? current.presencePenalty,
    trustRemoteCode:
      typeof inference?.trust_remote_code === "boolean"
        ? inference.trust_remote_code
        : current.trustRemoteCode,
  };
}

export function useChatModelRuntime() {
  const params = useChatRuntimeStore((state) => state.params);
  const models = useChatRuntimeStore((state) => state.models);
  const loras = useChatRuntimeStore((state) => state.loras);
  const setModels = useChatRuntimeStore((state) => state.setModels);
  const setLoras = useChatRuntimeStore((state) => state.setLoras);
  const setParams = useChatRuntimeStore((state) => state.setParams);
  const setModelsError = useChatRuntimeStore((state) => state.setModelsError);
  const setCheckpoint = useChatRuntimeStore((state) => state.setCheckpoint);
  const clearCheckpoint = useChatRuntimeStore((state) => state.clearCheckpoint);

  const [loadingModel, setLoadingModel] = useState<{
    id: string;
    displayName: string;
    isDownloaded?: boolean;
  } | null>(null);
  const [loadToastDismissed, setLoadToastDismissed] = useState(false);
  const [loadProgress, setLoadProgress] = useState<{
    percent: number | null;
    label: string | null;
    phase: "downloading" | "starting";
  } | null>(null);
  const loadAbortRef = useRef<AbortController | null>(null);
  const loadingModelRef = useRef<typeof loadingModel>(null);
  const loadToastIdRef = useRef<string | number | null>(null);
  const loadToastDismissedRef = useRef(false);

  const setLoadToastDismissedState = useCallback((dismissed: boolean) => {
    loadToastDismissedRef.current = dismissed;
    setLoadToastDismissed(dismissed);
  }, []);

  const resetLoadingUi = useCallback(() => {
    setLoadingModel(null);
    setLoadProgress(null);
    loadingModelRef.current = null;
    loadAbortRef.current = null;
    loadToastIdRef.current = null;
    setLoadToastDismissedState(false);
    useChatRuntimeStore.getState().setModelLoading(false);
  }, [setLoadToastDismissedState]);

  const renderLoadDescription = useCallback(
    (
      title: string,
      message: string,
      progressPercent?: number | null,
      progressLabel?: string | null,
      onStop?: () => void,
    ) =>
      createElement(ModelLoadDescription, {
        title,
        message,
        progressPercent,
        progressLabel,
        onStop,
      }),
    [],
  );

  const refresh = useCallback(async () => {
    setModelsError(null);
    try {
      const [listRes, statusRes, lorasRes] = await Promise.all([
        listModels(),
        getInferenceStatus(),
        listLoras(),
      ]);

      setModels(listRes.models.map(toChatModelSummary));
      setLoras(lorasRes.loras.map(toLoraSummary));

      if (statusRes.active_model) {
        setCheckpoint(statusRes.active_model, statusRes.gguf_variant);

        // Apply inference defaults on reconnect (page refresh with model already loaded)
        if (statusRes.inference) {
          const currentParams = useChatRuntimeStore.getState().params;
          setParams(
            mergeRecommendedInference(currentParams, statusRes as any, statusRes.active_model),
          );
        }

        // Restore reasoning/tools support flags and context length
        const supportsReasoning = statusRes.supports_reasoning ?? false;
        const supportsTools = statusRes.supports_tools ?? false;
        useChatRuntimeStore.setState({
          supportsReasoning,
          supportsTools,
          ggufContextLength: statusRes.is_gguf ? (statusRes.context_length ?? null) : null,
        });

        // Set reasoning default for Qwen3.5 small models
        if (supportsReasoning) {
          let reasoningDefault = true;
          const mid = statusRes.active_model.toLowerCase();
          if (mid.includes("qwen3.5")) {
            const sizeMatch = mid.match(/(\d+\.?\d*)\s*b/);
            if (sizeMatch && parseFloat(sizeMatch[1]) < 9) {
              reasoningDefault = false;
            }
          }
          useChatRuntimeStore.getState().setReasoningEnabled(reasoningDefault);
        }
      }
    } catch (error) {
      const message =
        error instanceof Error ? error.message : "Failed to load models";
      setModelsError(message);
      toast.error("Failed to refresh models", {
        description: message,
      });
    }
  }, [setCheckpoint, setLoras, setModels, setModelsError, setParams]);

  const cancelLoading = useCallback(() => {
    const model = loadingModelRef.current;
    if (!model) return;
    loadAbortRef.current?.abort();
    loadAbortRef.current = null;
    loadingModelRef.current = null;
    const tid = loadToastIdRef.current;
    loadToastIdRef.current = null;
    setLoadingModel(null);
    setLoadProgress(null);
    setLoadToastDismissedState(false);
    clearCheckpoint();
    if (tid != null) toast.dismiss(tid);
    toast.info("Stopped loading model", {
      description: "The current download may still finish in the background.",
    });
    // Fire-and-forget: tell backend to stop, don't block UI
    unloadModel({ model_path: model.id }).catch(() => {});
  }, [clearCheckpoint, setLoadToastDismissedState]);

  const selectModel = useCallback(
    async (selection: string | SelectedModelInput) => {
      const modelId = typeof selection === "string" ? selection : selection.id;
      const ggufVariant =
        typeof selection === "string" ? undefined : selection.ggufVariant;
      const forceReload =
        typeof selection === "string" ? false : selection.forceReload ?? false;
      const currentVariant = useChatRuntimeStore.getState().activeGgufVariant;
      if (!forceReload && (!modelId || (params.checkpoint === modelId && (ggufVariant ?? null) === (currentVariant ?? null)))) {
        return;
      }
      // Prevent duplicate loads if already loading this model
      if (loadingModelRef.current?.id === modelId) return;

      const explicitIsLora =
        typeof selection === "string" ? undefined : selection.isLora;
      const extraLoadingDescription =
        typeof selection === "string" ? undefined : selection.loadingDescription;
      const isDownloaded =
        typeof selection === "string" ? false : selection.isDownloaded ?? false;
      const model = models.find((entry) => entry.id === modelId);
      const lora = loras.find((entry) => entry.id === modelId);
      const isLora =
        explicitIsLora ?? model?.isLora ?? (lora ? true : false);
      const displayName = model?.name || lora?.name || modelId;
      const currentCheckpoint =
        useChatRuntimeStore.getState().params.checkpoint;
      const previousCheckpoint = currentCheckpoint;
      const previousVariant =
        useChatRuntimeStore.getState().activeGgufVariant ?? null;
      const previousModel = previousCheckpoint
        ? models.find((entry) => entry.id === previousCheckpoint)
        : undefined;
      const previousLora = previousCheckpoint
        ? loras.find((entry) => entry.id === previousCheckpoint)
        : undefined;
      const previousIsLora =
        previousModel?.isLora ?? (previousLora ? true : false);
      const loadingDescription = [
        currentCheckpoint ? "Switching models." : null,
        extraLoadingDescription ?? null,
        isDownloaded ? "Loading cached model into memory." : null,
      ]
        .filter(Boolean)
        .join(" ");
      setModelsError(null);
      setLoadToastDismissedState(false);
      const loadInfo = { id: modelId, displayName, isDownloaded };
      setLoadingModel(loadInfo);
      useChatRuntimeStore.getState().setModelLoading(true);
      setLoadProgress(
        isDownloaded
          ? { percent: null, label: null, phase: "starting" }
          : { percent: 0, label: "Preparing download", phase: "downloading" },
      );
      loadingModelRef.current = loadInfo;
      const abortCtrl = new AbortController();
      loadAbortRef.current = abortCtrl;
      try {
        async function performLoad(): Promise<void> {
          if (abortCtrl.signal.aborted) throw new Error("Cancelled");
          let previousWasUnloaded = false;
          const currentCheckpoint =
            useChatRuntimeStore.getState().params.checkpoint;
          const paramsBeforeLoad = useChatRuntimeStore.getState().params;
          const maxSeqLength = paramsBeforeLoad.maxSeqLength;
          try {
            // Lightweight pre-flight validation: avoid unloading a working model
            // if the new identifier is clearly invalid (e.g. bad HF id / path).
            await validateModel({
              model_path: modelId,
              hf_token: null,
              max_seq_length: maxSeqLength,
              load_in_4bit: true,
              is_lora: isLora,
              gguf_variant: ggufVariant ?? null,
            });

            if (currentCheckpoint) {
              await unloadModel({ model_path: currentCheckpoint });
              previousWasUnloaded = true;
            }

            const { chatTemplateOverride, kvCacheDtype } = useChatRuntimeStore.getState();
            const loadResponse = await loadModel({
              model_path: modelId,
              hf_token: null,
              max_seq_length: maxSeqLength,
              load_in_4bit: true,
              is_lora: isLora,
              gguf_variant: ggufVariant ?? null,
              trust_remote_code: paramsBeforeLoad.trustRemoteCode ?? false,
              chat_template_override: chatTemplateOverride,
              cache_type_kv: kvCacheDtype,
            });

            // If cancelled while loading, don't update UI to show
            // the model as active -- it's being unloaded.
            if (abortCtrl.signal.aborted) throw new Error("Cancelled");

            const currentParams = useChatRuntimeStore.getState().params;
            setParams(
              mergeRecommendedInference(currentParams, loadResponse, modelId),
            );
            // Qwen3.5 small models (0.8B, 2B, 4B, 9B) disable thinking by default
            let reasoningDefault = loadResponse.supports_reasoning ?? false;
            if (reasoningDefault) {
              const mid = modelId.toLowerCase();
              if (mid.includes("qwen3.5")) {
                const sizeMatch = mid.match(/(\d+\.?\d*)\s*b/);
                if (sizeMatch && parseFloat(sizeMatch[1]) < 9) {
                  reasoningDefault = false;
                }
              }
            }
            useChatRuntimeStore.setState({
              ggufContextLength: loadResponse.is_gguf
                ? (loadResponse.context_length ?? 131072)
                : null,
              supportsReasoning: loadResponse.supports_reasoning ?? false,
              reasoningEnabled: reasoningDefault,
              supportsTools: loadResponse.supports_tools ?? false,
              toolsEnabled: false,
              kvCacheDtype: loadResponse.cache_type_kv ?? null,
              defaultChatTemplate: loadResponse.chat_template ?? null,
              chatTemplateOverride: null,
            });
            // Qwen3/3.5: apply thinking-mode-specific params after load
            if (modelId.toLowerCase().includes("qwen3") && (loadResponse.supports_reasoning ?? false)) {
              const store = useChatRuntimeStore.getState();
              const p = reasoningDefault
                ? { temperature: 0.6, topP: 0.95, topK: 20, minP: 0.0 }
                : { temperature: 0.7, topP: 0.8, topK: 20, minP: 0.0 };
              store.setParams({ ...store.params, ...p });
            }
            await refresh();
          } catch (error) {
            // Skip rollback if user cancelled -- model is already being unloaded.
            if (abortCtrl.signal.aborted) throw error;
            // If we unloaded a previous model and the new load failed, attempt a rollback.
            if (previousWasUnloaded && previousCheckpoint) {
              try {
                await loadModel({
                  model_path: previousCheckpoint,
                  hf_token: null,
                  max_seq_length: maxSeqLength,
                  load_in_4bit: true,
                  is_lora: previousIsLora,
                  gguf_variant: previousVariant,
                });
                await refresh();
              } catch {
                // If rollback also fails, surface the original error.
              }
            }
            throw error;
          }
        }

        const toastTitle = isDownloaded ? "Starting model…" : "Downloading model…";
        const toastId = toast(
          null,
          {
            description: renderLoadDescription(
              toastTitle,
              loadingDescription,
              isDownloaded ? null : 0,
              isDownloaded ? null : "Preparing download",
              cancelLoading,
            ),
            duration: Infinity,
            closeButton: false,
            classNames: MODEL_LOAD_TOAST_CLASSNAMES,
            onDismiss: (dismissedToast) => {
              if (loadToastIdRef.current !== dismissedToast.id) {
                return;
              }
              setLoadToastDismissedState(true);
            },
          },
        );
        loadToastIdRef.current = toastId;

        // Poll download progress for non-cached models (GGUF and non-GGUF)
        let progressInterval: ReturnType<typeof setInterval> | null = null;
        if (!isDownloaded) {
          const expectedBytes =
            typeof selection !== "string" ? selection.expectedBytes ?? 0 : 0;
          let hasShownProgress = false;

          const pollProgress = async () => {
            if (abortCtrl.signal.aborted || !loadingModelRef.current) {
              if (progressInterval) clearInterval(progressInterval);
              return;
            }
            try {
              const prog = ggufVariant && expectedBytes > 0
                ? await getGgufDownloadProgress(modelId, ggufVariant, expectedBytes)
                : await getDownloadProgress(modelId);

              if (!loadingModelRef.current) return;

              if (prog.progress > 0 && prog.progress < 1) {
                hasShownProgress = true;
                const dlGb = prog.downloaded_bytes / (1024 ** 3);
                const totalGb = prog.expected_bytes / (1024 ** 3);
                const pct = Math.round(prog.progress * 100);
                const progressLabel = totalGb > 0
                  ? `${dlGb.toFixed(1)} of ${totalGb.toFixed(1)} GB`
                  : `${dlGb.toFixed(1)} GB downloaded`;
                setLoadProgress({
                  percent: pct,
                  label: progressLabel,
                  phase: "downloading",
                });
                if (loadToastDismissedRef.current) return;
                toast(
                  null,
                  {
                    id: toastId,
                    description: renderLoadDescription(
                      "Downloading model…",
                      loadingDescription,
                      pct,
                      progressLabel,
                      cancelLoading,
                    ),
                    duration: Infinity,
                    closeButton: false,
                    classNames: MODEL_LOAD_TOAST_CLASSNAMES,
                    onDismiss: (dismissedToast) => {
                      if (loadToastIdRef.current !== dismissedToast.id) return;
                      setLoadToastDismissedState(true);
                    },
                  },
                );
              } else if (prog.downloaded_bytes > 0 && prog.expected_bytes === 0 && prog.progress === 0) {
                hasShownProgress = true;
                const dlGb = prog.downloaded_bytes / (1024 ** 3);
                setLoadProgress({
                  percent: null,
                  label: `${dlGb.toFixed(1)} GB downloaded`,
                  phase: "downloading",
                });
              } else if (prog.progress >= 1 && hasShownProgress) {
                setLoadProgress({
                  percent: 100,
                  label: "Download complete",
                  phase: "starting",
                });
                if (loadToastDismissedRef.current) {
                  if (progressInterval) clearInterval(progressInterval);
                  return;
                }
                toast(null, {
                  id: toastId,
                  description: renderLoadDescription(
                    "Starting model…",
                    "Download complete. Loading the model into memory.",
                    100,
                    "Download complete",
                    cancelLoading,
                  ),
                  duration: Infinity,
                  closeButton: false,
                  classNames: MODEL_LOAD_TOAST_CLASSNAMES,
                  onDismiss: (dismissedToast) => {
                    if (loadToastIdRef.current !== dismissedToast.id) return;
                    setLoadToastDismissedState(true);
                  },
                });
                if (progressInterval) clearInterval(progressInterval);
              }
            } catch {
              // Ignore polling errors
            }
          };

          setTimeout(pollProgress, 500);
          progressInterval = setInterval(pollProgress, 2000);
        }

        try {
          await performLoad();
          if (loadToastDismissedRef.current) {
            toast.success(`${displayName} loaded`);
          } else {
            toast.success(`${displayName} loaded`, {
              id: toastId,
              description: undefined,
              closeButton: false,
              duration: 2000,
            });
          }
        } catch (err) {
          if (!abortCtrl.signal.aborted) {
            const message =
              err instanceof Error ? err.message : "Failed to load model";
            if (loadToastDismissedRef.current) {
              toast.error(message);
            } else {
              toast.error(message, {
                id: toastId,
                description: undefined,
                closeButton: false,
                duration: 5000,
              });
            }
          }
          throw err;
        } finally {
          if (progressInterval) clearInterval(progressInterval);
          resetLoadingUi();
        }
      } catch (error) {
        if (abortCtrl.signal.aborted) return; // User cancelled, nothing to report
        resetLoadingUi();
        const message =
          error instanceof Error ? error.message : "Failed to load model";
        setModelsError(message);
      }
    },
    [
      cancelLoading,
      loras,
      models,
      params.checkpoint,
      refresh,
      renderLoadDescription,
      resetLoadingUi,
      setLoadToastDismissedState,
      setModelsError,
      setParams,
    ],
  );

  const ejectModel = useCallback(async () => {
    if (!params.checkpoint) {
      return;
    }
    setModelsError(null);
    try {
      async function performUnload(): Promise<void> {
        await unloadModel({ model_path: params.checkpoint });
        clearCheckpoint();
        await refresh();
      }

      await toast.promise(performUnload(), {
        loading: "Unloading model",
        success: { message: "Model unloaded", duration: 1200 },
        error: (err) =>
          err instanceof Error ? err.message : "Failed to unload model",
        description: "Releases VRAM and resets inference state.",
      });
    } catch (error) {
      const message =
        error instanceof Error ? error.message : "Failed to unload model";
      setModelsError(message);
    }
  }, [clearCheckpoint, params.checkpoint, refresh, setModelsError]);

  return {
    refresh,
    selectModel,
    ejectModel,
    cancelLoading,
    loadingModel,
    loadProgress,
    loadToastDismissed,
  };
}


================================================
FILE: studio/frontend/src/features/chat/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export { ChatPage } from "./chat-page";
export {
  ChatSettingsPanel,
  defaultInferenceParams,
  type InferenceParams,
  type Preset,
} from "./chat-settings-sheet";
export { useChatRuntimeStore } from "./stores/chat-runtime-store";
export { useChatModelRuntime } from "./hooks/use-chat-model-runtime";
export { setTrainingCompareHandoff } from "./lib/training-compare-handoff";


================================================
FILE: studio/frontend/src/features/chat/runtime-provider.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  AssistantRuntimeProvider,
  type AttachmentAdapter,
  type CompleteAttachment,
  CompositeAttachmentAdapter,
  ExportedMessageRepository,
  type ExportedMessageRepositoryItem,
  type PendingAttachment,
  RuntimeAdapterProvider,
  Suggestions,
  type ThreadHistoryAdapter,
  type ThreadMessage,
  WebSpeechDictationAdapter,
  type unstable_RemoteThreadListAdapter,
  useAui,
  useAuiState,
  useLocalRuntime,
  unstable_useRemoteThreadListRuntime as useRemoteThreadListRuntime,
} from "@assistant-ui/react";
import { createAssistantStream } from "assistant-stream";
import mammoth from "mammoth";
import { type ReactElement, type ReactNode, useEffect, useMemo } from "react";
import { extractText, getDocumentProxy } from "unpdf";
import { authFetch } from "@/features/auth";
import { createOpenAIStreamAdapter } from "./api/chat-adapter";
import { db } from "./db";
import { useChatRuntimeStore } from "./stores/chat-runtime-store";
import type { MessageRecord, ModelType } from "./types";

const DEFAULT_SUGGESTIONS = [
  "Draw an SVG of a cute sloth",
  "Solve the integral of x²·sin(x) step by step",
  "Write a Python function that finds the longest palindrome in a string",
  "Format a comparison of 3 databases as a markdown table with pros and cons",
];

type TitleResponse = {
  choices?: Array<{
    message?: {
      content?: string;
    };
  }>;
};

class VisionImageAdapter implements AttachmentAdapter {
  accept = "image/jpeg,image/png,image/webp,image/gif";

  async add({ file }: { file: File }): Promise<PendingAttachment> {
    const maxSize = 20 * 1024 * 1024;
    if (file.size > maxSize) {
      throw new Error("Image size exceeds 20MB limit");
    }

    return {
      id: crypto.randomUUID(),
      type: "image",
      name: file.name,
      contentType: file.type,
      file,
      status: { type: "requires-action", reason: "composer-send" },
    };
  }

  async send(attachment: PendingAttachment): Promise<CompleteAttachment> {
    return {
      id: attachment.id,
      type: "image",
      name: attachment.name,
      contentType: attachment.contentType,
      content: [
        {
          type: "image",
          image: await this.fileToBase64DataURL(attachment.file),
        },
      ],
      status: { type: "complete" },
    };
  }

  async remove(): Promise<void> {
    return Promise.resolve();
  }

  private async fileToBase64DataURL(file: File): Promise<string> {
    return new Promise((resolve, reject) => {
      const reader = new FileReader();
      reader.onload = () => resolve(reader.result as string);
      reader.onerror = () => reject(new Error("Failed to read image file"));
      reader.readAsDataURL(file);
    });
  }
}

class PDFAttachmentAdapter implements AttachmentAdapter {
  accept = "application/pdf";

  add({ file }: { file: File }): Promise<PendingAttachment> {
    return Promise.resolve({
      id: crypto.randomUUID(),
      type: "document",
      name: file.name,
      contentType: file.type,
      file,
      status: { type: "requires-action", reason: "composer-send" },
    });
  }

  async send(attachment: PendingAttachment): Promise<CompleteAttachment> {
    const buffer = new Uint8Array(await attachment.file.arrayBuffer());
    const pdf = await getDocumentProxy(buffer);
    const { text } = await extractText(pdf, { mergePages: true });
    return {
      id: attachment.id,
      type: "document",
      name: attachment.name,
      contentType: attachment.contentType,
      content: [{ type: "text", text: `[PDF: ${attachment.name}]\n${text}` }],
      status: { type: "complete" },
    };
  }

  remove(): Promise<void> {
    return Promise.resolve();
  }
}

class TextAttachmentAdapter implements AttachmentAdapter {
  accept = "text/plain,text/markdown,text/csv,text/xml,text/json,text/css";

  async add({ file }: { file: File }): Promise<PendingAttachment> {
    return {
      id: crypto.randomUUID(),
      type: "document",
      name: file.name,
      contentType: file.type,
      file,
      status: { type: "requires-action", reason: "composer-send" },
    };
  }

  async send(attachment: PendingAttachment): Promise<CompleteAttachment> {
    const text = await attachment.file.text();
    return {
      id: attachment.id,
      type: "document",
      name: attachment.name,
      contentType: attachment.contentType,
      content: [
        { type: "text", text: `<attachment name=${attachment.name}>\n${text}\n</attachment>` },
      ],
      status: { type: "complete" },
    };
  }

  remove(): Promise<void> {
    return Promise.resolve();
  }
}

class HtmlAttachmentAdapter implements AttachmentAdapter {
  accept = "text/html";

  async add({ file }: { file: File }): Promise<PendingAttachment> {
    return {
      id: crypto.randomUUID(),
      type: "document",
      name: file.name,
      contentType: file.type,
      file,
      status: { type: "requires-action", reason: "composer-send" },
    };
  }

  async send(attachment: PendingAttachment): Promise<CompleteAttachment> {
    const html = await attachment.file.text();
    // Strip HTML tags to extract readable text
    const doc = new DOMParser().parseFromString(html, "text/html");
    // Remove script and style elements
    for (const el of doc.querySelectorAll("script, style")) el.remove();
    const text = (doc.body.textContent ?? "").replace(/\s+/g, " ").trim();
    return {
      id: attachment.id,
      type: "document",
      name: attachment.name,
      contentType: attachment.contentType,
      content: [
        { type: "text", text: `[HTML: ${attachment.name}]\n${text}` },
      ],
      status: { type: "complete" },
    };
  }

  remove(): Promise<void> {
    return Promise.resolve();
  }
}

class DocxAttachmentAdapter implements AttachmentAdapter {
  accept =
    "application/vnd.openxmlformats-officedocument.wordprocessingml.document";

  add({ file }: { file: File }): Promise<PendingAttachment> {
    return Promise.resolve({
      id: crypto.randomUUID(),
      type: "document",
      name: file.name,
      contentType: file.type,
      file,
      status: { type: "requires-action", reason: "composer-send" },
    });
  }

  async send(attachment: PendingAttachment): Promise<CompleteAttachment> {
    const arrayBuffer = await attachment.file.arrayBuffer();
    const { value } = await mammoth.extractRawText({ arrayBuffer });
    return {
      id: attachment.id,
      type: "document",
      name: attachment.name,
      contentType: attachment.contentType,
      content: [{ type: "text", text: `[DOCX: ${attachment.name}]\n${value}` }],
      status: { type: "complete" },
    };
  }

  remove(): Promise<void> {
    return Promise.resolve();
  }
}

function clip(input: string, maxLen: number): string {
  const text = input.replace(/\s+/g, " ").trim();
  if (text.length <= maxLen) return text;
  return text.slice(0, maxLen).trimEnd();
}

function extractTextParts(m: ThreadMessage | undefined): string {
  if (!m) return "";
  const content = Array.isArray(m.content) ? m.content : [];
  return content
    .filter((p): p is Extract<typeof p, { type: "text" }> => p.type === "text")
    .map((p) => p.text)
    .join("")
    .trim();
}

async function generateTitleWithModel(payload: {
  userText: string;
}): Promise<string | null> {
  const params = useChatRuntimeStore.getState().params;
  if (!params.checkpoint) return null;

  const user = clip(payload.userText, 256);
  const parts: string[] = [user];

  function normalizeTitle(raw: string): string | null {
    let title = raw.split(/\r?\n/, 1)[0] ?? "";
    title = title.replace(/^\s*title\s*:\s*/i, "");
    title = title.replace(/[^\x20-\x7E]+/g, " ");
    title = title.replace(/["'`]+/g, "");
    title = title.replace(/[.!?:;,]+/g, " ");
    title = title.replace(/\s+/g, " ").trim();

    // Model echo fail-safe.
    if (/\b(user|base|lora|assistant)\s*:/i.test(title)) {
      return null;
    }

    const words = title.split(" ").filter(Boolean).slice(0, 6);
    const joined = words.join(" ").trim();
    if (!joined) return null;
    return joined.length > 60 ? joined.slice(0, 60).trimEnd() : joined;
  }

  const response = await authFetch("/v1/chat/completions", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      model: params.checkpoint,
      stream: false,
      temperature: 0.2,
      top_p: 0.9,
      max_tokens: 24,
      top_k: 20,
      repetition_penalty: 1.0,
      messages: [
        {
          role: "system",
          content:
            "Write 1 concise chat title for the user's message. Rules: 2-6 words, no quotes, no punctuation, ASCII only, do not echo input. Output title only.",
        },
        { role: "user", content: parts.join("\n") },
      ],
    }),
  });

  const body = (await response.json().catch(() => null)) as TitleResponse | null;
  if (!response.ok) return null;
  const raw: string | undefined = body?.choices?.[0]?.message?.content;
  if (!raw) return null;
  return normalizeTitle(raw);
}

const inflightTitleByKey = new Set<string>();

function fallbackTitleFromUserText(userText: string): string {
  const firstLine = (userText || "").split(/\r?\n/, 1)[0] ?? "";
  const cleaned = firstLine.replace(/\s+/g, " ").trim();
  const max = 48;
  if (!cleaned) return "New Chat";
  return cleaned.slice(0, max) + (cleaned.length > max ? "..." : "");
}

function cloneContent(content: ThreadMessage["content"]): ThreadMessage["content"] {
  return Array.isArray(content)
    ? JSON.parse(JSON.stringify(content))
    : [];
}

function cloneAttachments(
  attachments: readonly CompleteAttachment[] | undefined,
): readonly CompleteAttachment[] {
  if (!Array.isArray(attachments)) {
    return [];
  }
  return JSON.parse(JSON.stringify(attachments));
}

function toThreadMessage(m: MessageRecord): ThreadMessage {
  const content =
    Array.isArray(m.content) && m.content.length > 0
      ? cloneContent(m.content)
      : [{ type: "text" as const, text: "" }];

  if (m.role === "user") {
    return {
      id: m.id,
      createdAt: new Date(m.createdAt),
      role: "user" as const,
      content: content as Extract<ThreadMessage, { role: "user" }>["content"],
      attachments: cloneAttachments(m.attachments),
      metadata: { custom: {} },
    };
  }
  return {
    id: m.id,
    createdAt: new Date(m.createdAt),
    role: "assistant" as const,
    content: content as Extract<ThreadMessage, { role: "assistant" }>["content"],
    status: { type: "complete" as const, reason: "unknown" as const },
    metadata: {
      custom: (m.metadata as Record<string, unknown>) ?? {},
      steps: [],
      unstable_annotations: [],
      unstable_data: [],
      unstable_state: null,
    },
  };
}

function createDexieAdapter(
  modelType: ModelType,
  pairId?: string,
): unstable_RemoteThreadListAdapter {
  return {
    async fetch(remoteId: string) {
      const thread = await db.threads.get(remoteId);
      if (!thread) {
        throw new Error(`Thread ${remoteId} not found`);
      }
      return {
        remoteId: thread.id,
        status: thread.archived ? "archived" : "regular",
        title: thread.title,
      };
    },

    async list() {
      const threads = await db.threads
        .where("modelType")
        .equals(modelType)
        .reverse()
        .sortBy("createdAt");
      return {
        threads: threads.map((t) => ({
          status: (t.archived ? "archived" : "regular") as
            | "archived"
            | "regular",
          remoteId: t.id,
          title: t.title,
        })),
      };
    },

    async initialize(threadId: string) {
      const currentModelId =
        useChatRuntimeStore.getState().params.checkpoint ?? "";
      await db.threads.add({
        id: threadId,
        title: "New Chat",
        modelType,
        modelId: currentModelId,
        pairId,
        archived: false,
        createdAt: Date.now(),
      });
      return { remoteId: threadId, externalId: undefined };
    },

    async rename(remoteId: string, newTitle: string) {
      await db.threads.update(remoteId, { title: newTitle });
    },

    async archive(remoteId: string) {
      await db.threads.update(remoteId, { archived: true });
    },

    async unarchive(remoteId: string) {
      await db.threads.update(remoteId, { archived: false });
    },

    async delete(remoteId: string) {
      await db.messages.where("threadId").equals(remoteId).delete();
      await db.threads.delete(remoteId);
    },

    async generateTitle(remoteId: string, messages: readonly ThreadMessage[]) {
      const autoTitle = useChatRuntimeStore.getState().autoTitle;
      const thread = await db.threads.get(remoteId);
      const defaultTitle = "New Chat";

      function streamTitle(title: string) {
        return createAssistantStream((c) => {
          c.appendText(title);
          c.close();
        });
      }

      async function persistTitle(title: string): Promise<void> {
        await db.threads.update(remoteId, { title });
        if (!pairId) return;
        const paired = await db.threads
          .where("pairId")
          .equals(pairId)
          .filter((t) => t.id !== remoteId)
          .first();
        if (paired) await db.threads.update(paired.id, { title });
      }

      if (!thread) {
        return streamTitle(defaultTitle);
      }

      // Only generate once per thread/pair.
      if (thread.title && thread.title !== "New Chat") {
        return streamTitle(thread.title);
      }

      const firstUser = messages.find((m) => m.role === "user");
      const userText = extractTextParts(firstUser) || defaultTitle;

      if (!autoTitle) {
        const title = fallbackTitleFromUserText(userText);
        await persistTitle(title);
        return streamTitle(title);
      }

      const key = pairId ? `pair:${pairId}` : `thread:${remoteId}`;
      if (inflightTitleByKey.has(key)) {
        return streamTitle(thread.title || defaultTitle);
      }

      // Compare: wait until both threads done.
      if (pairId) {
        const paired = await db.threads
          .where("pairId")
          .equals(pairId)
          .filter((t) => t.id !== remoteId)
          .first();

        if (paired) {
          const running = useChatRuntimeStore.getState().runningByThreadId;
          if (running[paired.id]) {
            setTimeout(() => {
              void createDexieAdapter(modelType, pairId).generateTitle(remoteId, messages);
            }, 600);
            return streamTitle(thread.title || defaultTitle);
          }
        }
      }

      inflightTitleByKey.add(key);
      try {
        const title =
          (await generateTitleWithModel({
            userText,
          })) ||
          fallbackTitleFromUserText(userText);

        await persistTitle(title);
        return streamTitle(title);
      } finally {
        inflightTitleByKey.delete(key);
      }
    },
  };
}

function ThreadHistoryProvider({
  children,
}: { children?: ReactNode }): ReactElement {
  const aui = useAui();

  const history = useMemo<ThreadHistoryAdapter>(
    () => ({
      async load() {
        const { remoteId } = aui.threadListItem().getState();
        if (!remoteId) {
          return { messages: [] };
        }
        const roleOrder: Record<string, number> = {
          system: 0,
          user: 1,
          assistant: 2,
        };
        const msgs = await db.messages.where("threadId").equals(remoteId).toArray();
        msgs.sort((a, b) => {
          if (a.createdAt !== b.createdAt) return a.createdAt - b.createdAt;
          const aOrder = roleOrder[a.role] ?? 99;
          const bOrder = roleOrder[b.role] ?? 99;
          if (aOrder !== bOrder) return aOrder - bOrder;
          return a.id < b.id ? -1 : a.id > b.id ? 1 : 0;
        });

        return ExportedMessageRepository.fromArray(msgs.map(toThreadMessage));
      },

      async append({ message }: ExportedMessageRepositoryItem) {
        const { remoteId } = await aui.threadListItem().initialize();
        const content = cloneContent(message.content);
        const attachments =
          message.role === "user" ? cloneAttachments(message.attachments) : [];
        const custom = message.metadata?.custom;
        const existing = await db.messages.get(message.id);
        const createdAt =
          existing?.createdAt ??
          message.createdAt?.getTime?.() ??
          Date.now();
        await db.messages.put({
          id: message.id,
          threadId: remoteId,
          role: message.role,
          content,
          ...(attachments.length > 0 && { attachments }),
          ...(custom && Object.keys(custom).length > 0 && { metadata: custom }),
          createdAt,
        });
      },
    }),
    [aui],
  );

  const dictation = useMemo(
    () =>
      WebSpeechDictationAdapter.isSupported()
        ? new WebSpeechDictationAdapter()
        : undefined,
    [],
  );
  const attachments = useMemo(
    () =>
      new CompositeAttachmentAdapter([
        new VisionImageAdapter(),
        new TextAttachmentAdapter(),
        new HtmlAttachmentAdapter(),
        new PDFAttachmentAdapter(),
        new DocxAttachmentAdapter(),
      ]),
    [],
  );
  const adapters = useMemo(
    () => ({ history, dictation, attachments }),
    [history, dictation, attachments],
  );

  return (
    <RuntimeAdapterProvider adapters={adapters}>
      {children}
    </RuntimeAdapterProvider>
  );
}

const chatAdapter = createOpenAIStreamAdapter();

function useRuntimeHook(): ReturnType<typeof useLocalRuntime> {
  return useLocalRuntime(chatAdapter);
}

function ThreadAutoSwitch({
  threadId,
}: { threadId: string }): ReactElement | null {
  const aui = useAui();
  const isLoading = useAuiState(({ threads }) => threads.isLoading);
  const mainThreadId = useAuiState(({ threads }) => threads.mainThreadId);

  useEffect(() => {
    if (!isLoading && mainThreadId !== threadId) {
      aui.threads().switchToThread(threadId);
    }
  }, [aui, isLoading, mainThreadId, threadId]);

  return null;
}

function ThreadNewChatSwitch({
  nonce,
}: { nonce: string }): ReactElement | null {
  const aui = useAui();
  const isLoading = useAuiState(({ threads }) => threads.isLoading);

  useEffect(() => {
    if (!isLoading) {
      aui.threads().switchToNewThread();
    }
  }, [aui, isLoading, nonce]);

  return null;
}

function ActiveThreadSync({
  enabled,
}: { enabled: boolean }): ReactElement | null {
  const mainThreadId = useAuiState(({ threads }) => threads.mainThreadId);
  const setActiveThreadId = useChatRuntimeStore((state) => state.setActiveThreadId);

  useEffect(() => {
    if (!enabled) {
      return;
    }
    setActiveThreadId(mainThreadId ?? null);
  }, [enabled, mainThreadId, setActiveThreadId]);

  return null;
}

export function ChatRuntimeProvider({
  children,
  modelType = "base",
  pairId,
  initialThreadId,
  newThreadNonce,
}: {
  children: ReactNode;
  modelType?: ModelType;
  pairId?: string;
  initialThreadId?: string;
  newThreadNonce?: string;
}): ReactElement {
  const runtime = useRemoteThreadListRuntime({
    runtimeHook: useRuntimeHook,
    adapter: {
      ...createDexieAdapter(modelType, pairId),
      unstable_Provider: ThreadHistoryProvider,
    },
  });

  const aui = useAui({
    suggestions: Suggestions(DEFAULT_SUGGESTIONS),
  });

  return (
    <AssistantRuntimeProvider runtime={runtime} aui={aui}>
      <ActiveThreadSync enabled={modelType === "base" && !pairId} />
      {initialThreadId && <ThreadAutoSwitch threadId={initialThreadId} />}
      {!initialThreadId && newThreadNonce && (
        <ThreadNewChatSwitch nonce={newThreadNonce} />
      )}
      {children}
    </AssistantRuntimeProvider>
  );
}


================================================
FILE: studio/frontend/src/features/chat/shared-composer.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { TooltipIconButton } from "@/components/assistant-ui/tooltip-icon-button";
import { Button } from "@/components/ui/button";
import { AUDIO_ACCEPT, MAX_AUDIO_SIZE, fileToBase64 } from "@/lib/audio-utils";
import { useAui } from "@assistant-ui/react";
import { cn } from "@/lib/utils";
import { ArrowUpIcon, GlobeIcon, HeadphonesIcon, LightbulbIcon, LightbulbOffIcon, MicIcon, PlusIcon, SquareIcon, TerminalIcon, XIcon } from "lucide-react";
import { toast } from "sonner";
import { loadModel } from "./api/chat-api";
import { useChatRuntimeStore } from "./stores/chat-runtime-store";
import {
  type KeyboardEvent,
  type MutableRefObject,
  type ReactElement,
  type ReactNode,
  createContext,
  useCallback,
  useContext,
  useEffect,
  useRef,
  useState,
} from "react";

export type CompareMessagePart =
  | { type: "text"; text: string }
  | { type: "image"; image: string }
  | { type: "audio"; audio: string };

export interface CompareHandle {
  append: (content: CompareMessagePart[]) => void;
  /** Append a user message without triggering generation. */
  appendMessage: (content: CompareMessagePart[]) => void;
  /** Trigger generation on the current thread (after appendMessage). */
  startRun: () => void;
  cancel: () => void;
  isRunning: () => boolean;
  /** Returns a promise that resolves when the current or next run finishes. */
  waitForRunEnd: () => Promise<void>;
}

const IMAGE_ACCEPT = "image/jpeg,image/png,image/webp,image/gif";
const MAX_IMAGE_SIZE = 20 * 1024 * 1024;

function fileToBase64DataURL(file: File): Promise<string> {
  return new Promise((resolve, reject) => {
    const reader = new FileReader();
    reader.onload = () => resolve(reader.result as string);
    reader.onerror = () => reject(new Error("Failed to read image file"));
    reader.readAsDataURL(file);
  });
}

function useDictation(
  setText: (value: string | ((prev: string) => string)) => void,
) {
  const [isDictating, setIsDictating] = useState(false);
  const recognitionRef = useRef<SpeechRecognition | null>(null);

  const start = useCallback(() => {
    const SpeechRecognitionAPI =
      typeof window !== "undefined" &&
      (window.SpeechRecognition ?? (window as unknown as { webkitSpeechRecognition?: typeof SpeechRecognition }).webkitSpeechRecognition);
    if (!SpeechRecognitionAPI) {
      return;
    }
    const recognition = new SpeechRecognitionAPI() as SpeechRecognition;
    recognition.continuous = true;
    recognition.interimResults = true;
    recognition.lang = "en-US";
    recognition.onresult = (event: SpeechRecognitionEvent) => {
      const last = event.resultIndex;
      const result = event.results[last];
      if (!result?.isFinal) return;
      const transcript = result[0]?.transcript?.trim();
      if (transcript) {
        setText((prev) => (prev ? `${prev} ${transcript}` : transcript));
      }
    };
    recognition.onerror = () => {
      setIsDictating(false);
    };
    recognition.onend = () => {
      setIsDictating(false);
    };
    recognition.start();
    recognitionRef.current = recognition;
    setIsDictating(true);
  }, [setText]);

  const stop = useCallback(() => {
    if (recognitionRef.current) {
      recognitionRef.current.stop();
      recognitionRef.current = null;
    }
    setIsDictating(false);
  }, []);

  useEffect(() => {
    return () => {
      if (recognitionRef.current) {
        recognitionRef.current.abort();
      }
    };
  }, []);

  const supported =
    typeof window !== "undefined" &&
    !!(window.SpeechRecognition ?? (window as unknown as { webkitSpeechRecognition?: unknown }).webkitSpeechRecognition);

  return { isDictating, start, stop, supported };
}

export type CompareHandles = MutableRefObject<Record<string, CompareHandle>>;

const CompareHandlesContext = createContext<CompareHandles | null>(null);

export function CompareHandlesProvider({
  handlesRef,
  children,
}: {
  handlesRef: CompareHandles;
  children: ReactNode;
}): ReactElement {
  return (
    <CompareHandlesContext.Provider value={handlesRef}>
      {children}
    </CompareHandlesContext.Provider>
  );
}

export function RegisterCompareHandle({
  name,
}: {
  name: string;
}): ReactElement | null {
  const handlesRef = useContext(CompareHandlesContext);
  const aui = useAui();

  useEffect(() => {
    if (!handlesRef) {
      return;
    }
    const currentHandles = handlesRef.current;
    currentHandles[name] = {
      // fixes occasional reorder on reload.
      append: (content) =>
        aui.thread().append({ role: "user", content, createdAt: new Date() } as never),
      appendMessage: (content) =>
        aui.thread().append({ role: "user", content, createdAt: new Date(), startRun: false } as never),
      startRun: () => {
        const msgs = aui.thread().getState().messages;
        const lastId = msgs.length > 0 ? msgs[msgs.length - 1].id : null;
        aui.thread().startRun({ parentId: lastId });
      },
      cancel: () => aui.thread().cancelRun(),
      isRunning: () => aui.thread().getState().isRunning,
      waitForRunEnd: () =>
        new Promise<void>((resolve) => {
          let wasRunning = false;
          const unsub = useChatRuntimeStore.subscribe((state) => {
            const anyRunning = Object.keys(state.runningByThreadId).length > 0;
            if (anyRunning) wasRunning = true;
            if (wasRunning && !anyRunning) {
              unsub();
              resolve();
            }
          });
        }),
    };
    return () => {
      delete currentHandles[name];
    };
  }, [handlesRef, name, aui]);

  return null;
}

type PendingImage = { id: string; file: File };

function PendingImageThumb({
  file,
  onRemove,
}: {
  file: File;
  onRemove: () => void;
}): ReactElement {
  const [src, setSrc] = useState<string | null>(null);
  useEffect(() => {
    const url = URL.createObjectURL(file);
    setSrc(url);
    return () => URL.revokeObjectURL(url);
  }, [file]);
  if (!src) return <div className="size-14 animate-pulse rounded-[14px] bg-muted" />;
  return (
    <div className="relative size-14 shrink-0 overflow-hidden rounded-[14px] border border-foreground/20 bg-muted">
      <img src={src} alt={file.name} className="h-full w-full object-cover" />
      <button
        type="button"
        onClick={onRemove}
        className="absolute top-1 right-1 flex size-5 items-center justify-center rounded-full bg-white text-muted-foreground shadow-sm hover:bg-destructive hover:text-destructive-foreground"
        aria-label="Remove attachment"
      >
        <XIcon className="size-3" />
      </button>
    </div>
  );
}

type CompareModelSelection = {
  id: string;
  isLora: boolean;
  ggufVariant?: string;
};

export function SharedComposer({
  handlesRef,
  model1,
  model2,
}: {
  handlesRef: CompareHandles;
  model1?: CompareModelSelection;
  model2?: CompareModelSelection;
}): ReactElement {
  const [text, setText] = useState("");
  const [running, setRunning] = useState(false);
  const [comparing, setComparing] = useState(false);
  const [pendingImages, setPendingImages] = useState<PendingImage[]>([]);
  const [pendingAudio, setPendingAudio] = useState<{ name: string; base64: string } | null>(null);
  const [dragging, setDragging] = useState(false);
  const textareaRef = useRef<HTMLTextAreaElement>(null);
  const fileInputRef = useRef<HTMLInputElement>(null);
  const audioInputRef = useRef<HTMLInputElement>(null);

  const activeModel = useChatRuntimeStore((s) => {
    const checkpoint = s.params.checkpoint;
    return s.models.find((m) => m.id === checkpoint);
  });
  const supportsReasoning = useChatRuntimeStore((s) => s.supportsReasoning);
  const reasoningEnabled = useChatRuntimeStore((s) => s.reasoningEnabled);
  const setReasoningEnabled = useChatRuntimeStore((s) => s.setReasoningEnabled);
  const supportsTools = useChatRuntimeStore((s) => s.supportsTools);
  const toolsEnabled = useChatRuntimeStore((s) => s.toolsEnabled);
  const setToolsEnabled = useChatRuntimeStore((s) => s.setToolsEnabled);
  const codeToolsEnabled = useChatRuntimeStore((s) => s.codeToolsEnabled);
  const setCodeToolsEnabled = useChatRuntimeStore((s) => s.setCodeToolsEnabled);
  const setPendingAudioStore = useChatRuntimeStore((s) => s.setPendingAudio);
  const clearPendingAudioStore = useChatRuntimeStore((s) => s.clearPendingAudio);

  const { isDictating, start: startDictation, stop: stopDictation, supported: dictationSupported } = useDictation(
    setText,
  );

  useEffect(() => {
    const id = setInterval(() => {
      const handles = handlesRef.current;
      const any = Object.values(handles).some((h) => h.isRunning());
      setRunning(any);
    }, 200);
    return () => clearInterval(id);
  }, [handlesRef]);

  const addFiles = useCallback((files: FileList | null) => {
    if (!files?.length) return;
    const next: PendingImage[] = [];
    for (let i = 0; i < files.length; i++) {
      const file = files[i];
      if (!file) continue;
      // Handle audio files
      if (file.type.match(/^audio\//i) && file.size <= MAX_AUDIO_SIZE) {
        fileToBase64(file).then((base64) => {
          setPendingAudio({ name: file.name, base64 });
          setPendingAudioStore(base64, file.name);
        });
        continue;
      }
      // Handle image files
      if (!file.type.match(/^image\/(jpeg|png|webp|gif)$/i)) continue;
      if (file.size > MAX_IMAGE_SIZE) continue;
      next.push({ id: crypto.randomUUID(), file });
    }
    setPendingImages((prev) => [...prev, ...next]);
  }, [setPendingAudioStore]);

  const removePendingImage = useCallback((id: string) => {
    setPendingImages((prev) => prev.filter((p) => p.id !== id));
  }, []);

  async function send() {
    const msg = text.trim();
    if (!msg && pendingImages.length === 0 && !pendingAudio) return;

    const content: CompareMessagePart[] = [];
    for (const { file } of pendingImages) {
      try {
        const image = await fileToBase64DataURL(file);
        content.push({ type: "image", image });
      } catch {
        // skip failed image
      }
    }
    if (pendingAudio) {
      content.push({ type: "audio", audio: pendingAudio.base64 });
    }
    if (msg) {
      content.push({ type: "text", text: msg });
    }
    if (content.length === 0) return;

    setText("");
    setPendingImages([]);
    setPendingAudio(null);
    clearPendingAudioStore();
    textareaRef.current?.focus();

    // Generalized compare: load each model before dispatching to its side
    const hasCompareHandles = Boolean(handlesRef.current["model1"] || handlesRef.current["model2"]);
    const isGeneralizedCompare = hasCompareHandles && Boolean(model1?.id || model2?.id);
    if (isGeneralizedCompare) {
      const store = useChatRuntimeStore.getState();
      const maxSeqLength = store.params.maxSeqLength;
      const trustRemoteCode = store.params.trustRemoteCode ?? false;
      const chatTemplateOverride = store.chatTemplateOverride;

      function modelDisplayName(id: string): string {
        const parts = id.split("/");
        return parts[parts.length - 1] || id;
      }

      // Helper: load a model and update store checkpoint
      async function ensureModelLoaded(sel: CompareModelSelection): Promise<string> {
        const resp = await loadModel({
          model_path: sel.id,
          hf_token: null,
          max_seq_length: maxSeqLength,
          load_in_4bit: true,
          is_lora: sel.isLora,
          gguf_variant: sel.ggufVariant ?? null,
          trust_remote_code: trustRemoteCode,
          chat_template_override: chatTemplateOverride,
        });
        useChatRuntimeStore.getState().setCheckpoint(
          resp.model,
          resp.is_gguf ? (sel.ggufVariant ?? undefined) : null,
        );
        return resp.status;
      }

      const handle1 = handlesRef.current["model1"];
      const handle2 = handlesRef.current["model2"];

      // Show user messages immediately on both sides
      if (handle1) handle1.appendMessage(content);
      if (handle2) handle2.appendMessage(content);

      const name1 = model1?.id ? modelDisplayName(model1.id) : "";
      const name2 = model2?.id ? modelDisplayName(model2.id) : "";
      const toastId = toast("Comparing models…", { duration: Infinity });

      setComparing(true);
      try {
        // Side 1: load → generate → wait
        if (handle1 && model1?.id) {
          toast("Loading Model 1…", { id: toastId, description: name1, duration: Infinity });
          const status1 = await ensureModelLoaded(model1);
          toast("Generating with Model 1…", { id: toastId, description: `${name1} (${status1})`, duration: Infinity });
          const done = handle1.waitForRunEnd();
          handle1.startRun();
          await done;
        }

        // Side 2: load → generate → wait
        if (handle2 && model2?.id) {
          const needsLoad = model2.id.toLowerCase() !== (model1?.id || "").toLowerCase()
            || (model2.ggufVariant ?? "") !== (model1?.ggufVariant ?? "");
          if (needsLoad) {
            toast("Loading Model 2…", { id: toastId, description: name2, duration: Infinity });
          }
          const status2 = await ensureModelLoaded(model2);
          toast("Generating with Model 2…", { id: toastId, description: `${name2} (${status2})`, duration: Infinity });
          const done = handle2.waitForRunEnd();
          handle2.startRun();
          await done;
        }

        toast.success("Compare complete", { id: toastId, duration: 2000 });
      } catch (err) {
        toast.error("Compare failed", {
          id: toastId,
          description: err instanceof Error ? err.message : "Unknown error",
          duration: 4000,
        });
      } finally {
        setComparing(false);
      }
    } else {
      // Original behavior: fire all handles simultaneously
      for (const handle of Object.values(handlesRef.current)) {
        handle.append(content);
      }
    }
  }

  function stop() {
    if (isDictating) stopDictation();
    for (const handle of Object.values(handlesRef.current)) {
      handle.cancel();
    }
  }

  const busy = running || comparing;

  function onKeyDown(e: KeyboardEvent) {
    if (e.key === "Enter" && !e.shiftKey) {
      e.preventDefault();
      if (!busy) {
        send();
      }
    }
  }

  const canSend = (text.trim().length > 0 || pendingImages.length > 0 || pendingAudio !== null) && !busy;

  return (
    <div
      className={`shadow-border ring-1 ring-border relative flex w-full flex-col rounded-2xl bg-background px-1 pt-2 transition-shadow outline-none ${dragging ? "ring-ring bg-accent/50" : ""}`}
      onDragOver={(e) => {
        e.preventDefault();
        setDragging(true);
      }}
      onDragLeave={() => setDragging(false)}
      onDrop={(e) => {
        e.preventDefault();
        setDragging(false);
        addFiles(e.dataTransfer.files);
      }}
    >
      {(pendingImages.length > 0 || pendingAudio) && (
        <div className="mb-2 flex w-full flex-row flex-wrap items-center gap-2 px-1.5 pt-0.5 pb-1">
          {pendingImages.map(({ id, file }) => (
            <PendingImageThumb
              key={id}
              file={file}
              onRemove={() => removePendingImage(id)}
            />
          ))}
          {pendingAudio && (
            <div className="flex items-center gap-2 rounded-lg border border-foreground/20 bg-muted px-3 py-1.5 text-xs">
              <HeadphonesIcon className="size-3.5 text-muted-foreground" />
              <span className="max-w-48 truncate">{pendingAudio.name}</span>
              <button
                type="button"
                onClick={() => { setPendingAudio(null); clearPendingAudioStore(); }}
                className="flex size-4 items-center justify-center rounded-full hover:bg-destructive hover:text-destructive-foreground"
                aria-label="Remove audio"
              >
                <XIcon className="size-3" />
              </button>
            </div>
          )}
        </div>
      )}
      <textarea
        ref={textareaRef}
        value={text}
        onChange={(e) => setText(e.target.value)}
        onKeyDown={onKeyDown}
        placeholder="Send to both models..."
        className="mb-1 max-h-32 min-h-14 w-full resize-none bg-transparent px-4 pt-2 pb-3 text-sm outline-none placeholder:text-muted-foreground"
        rows={1}
      />
      <div className="relative mx-2 mb-2 flex items-center justify-between">
        <div className="flex items-center gap-1">
          <input
            ref={fileInputRef}
            type="file"
            accept={IMAGE_ACCEPT}
            multiple
            className="hidden"
            onChange={(e) => {
              addFiles(e.target.files);
              e.target.value = "";
            }}
          />
          <TooltipIconButton
            tooltip="Add attachment"
            side="bottom"
            variant="ghost"
            size="icon"
            className="size-8 rounded-full text-muted-foreground hover:bg-muted-foreground/15"
            onClick={() => fileInputRef.current?.click()}
            aria-label="Add attachment"
          >
            <PlusIcon className="size-5 stroke-[1.5px]" />
          </TooltipIconButton>
          {activeModel?.hasAudioInput && (
            <>
              <input
                ref={audioInputRef}
                type="file"
                accept={AUDIO_ACCEPT}
                className="hidden"
                onChange={(e) => {
                  addFiles(e.target.files);
                  e.target.value = "";
                }}
              />
              <TooltipIconButton
                tooltip="Upload audio"
                side="bottom"
                variant="ghost"
                size="icon"
                className="size-8 rounded-full text-muted-foreground hover:bg-muted-foreground/15"
                onClick={() => audioInputRef.current?.click()}
                aria-label="Upload audio"
              >
                <HeadphonesIcon className="size-4 stroke-[1.5px]" />
              </TooltipIconButton>
            </>
          )}
          {supportsReasoning && (
            <button
              type="button"
              onClick={() => {
                const next = !reasoningEnabled;
                setReasoningEnabled(next);
                // Qwen3/3.5: adjust params for thinking on/off
                const store = useChatRuntimeStore.getState();
                const cp = store.params.checkpoint?.toLowerCase() ?? "";
                if (cp.includes("qwen3")) {
                  const p = next
                    ? { temperature: 0.6, topP: 0.95, topK: 20, minP: 0.0 }
                    : { temperature: 0.7, topP: 0.8, topK: 20, minP: 0.0 };
                  store.setParams({ ...store.params, ...p });
                }
              }}
              className={cn(
                "flex items-center gap-0.5 rounded-full px-2 py-0.5 text-xs font-medium transition-colors",
                reasoningEnabled
                  ? "bg-primary/10 text-primary hover:bg-primary/20"
                  : "bg-muted text-muted-foreground hover:bg-muted-foreground/15",
              )}
              aria-label={reasoningEnabled ? "Disable thinking" : "Enable thinking"}
            >
              {reasoningEnabled ? (
                <LightbulbIcon className="size-3" />
              ) : (
                <LightbulbOffIcon className="size-3" />
              )}
              <span>Think</span>
            </button>
          )}
          {supportsTools && (
            <button
              type="button"
              onClick={() => setToolsEnabled(!toolsEnabled)}
              className={cn(
                "flex items-center gap-1.5 rounded-full px-2.5 py-1 text-xs font-medium transition-colors",
                toolsEnabled
                  ? "bg-primary/10 text-primary hover:bg-primary/20"
                  : "bg-muted text-muted-foreground hover:bg-muted-foreground/15",
              )}
              aria-label={toolsEnabled ? "Disable web search" : "Enable web search"}
            >
              <GlobeIcon className="size-3.5" />
              <span>Search</span>
            </button>
          )}
          {supportsTools && (
            <button
              type="button"
              onClick={() => setCodeToolsEnabled(!codeToolsEnabled)}
              className={cn(
                "flex items-center gap-1.5 rounded-full px-2.5 py-1 text-xs font-medium transition-colors",
                codeToolsEnabled
                  ? "bg-primary/10 text-primary hover:bg-primary/20"
                  : "bg-muted text-muted-foreground hover:bg-muted-foreground/15",
              )}
              aria-label={codeToolsEnabled ? "Disable code execution" : "Enable code execution"}
            >
              <TerminalIcon className="size-3.5" />
              <span>Code</span>
            </button>
          )}
        </div>
        <div className="flex items-center gap-1">
          {dictationSupported && (
            <>
              {!isDictating ? (
                <TooltipIconButton
                  tooltip="Dictate"
                  side="bottom"
                  variant="ghost"
                  size="icon"
                  className="size-8 rounded-full text-muted-foreground hover:bg-muted-foreground/15"
                  onClick={startDictation}
                  aria-label="Dictate"
                >
                  <MicIcon className="size-4" />
                </TooltipIconButton>
              ) : (
                <TooltipIconButton
                  tooltip="Stop dictation"
                  side="bottom"
                  variant="ghost"
                  size="icon"
                  className="size-8 rounded-full text-destructive"
                  onClick={stopDictation}
                  aria-label="Stop dictation"
                >
                  <SquareIcon className="size-3 animate-pulse fill-current" />
                </TooltipIconButton>
              )}
            </>
          )}
          {busy ? (
            <Button
              type="button"
              variant="default"
              size="icon"
              className="size-8 rounded-full"
              onClick={stop}
            >
              <SquareIcon className="size-3 fill-current" />
            </Button>
          ) : (
            <TooltipIconButton
              tooltip="Send message"
              side="bottom"
              variant="default"
              size="icon"
              className="size-8 rounded-full"
              onClick={send}
              disabled={!canSend}
            >
              <ArrowUpIcon className="size-4" />
            </TooltipIconButton>
          )}
        </div>
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/chat/stores/chat-runtime-store.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { create } from "zustand";
import {
  DEFAULT_INFERENCE_PARAMS,
  type ChatLoraSummary,
  type ChatModelSummary,
  type InferenceParams,
} from "../types/runtime";

const AUTO_TITLE_KEY = "unsloth_chat_auto_title";
const AUTO_HEAL_TOOL_CALLS_KEY = "unsloth_auto_heal_tool_calls";
const MAX_TOOL_CALLS_KEY = "unsloth_max_tool_calls_per_message";
const TOOL_CALL_TIMEOUT_KEY = "unsloth_tool_call_timeout";

function canUseStorage(): boolean {
  return typeof window !== "undefined";
}

function loadBool(key: string, fallback: boolean): boolean {
  if (!canUseStorage()) return fallback;
  try {
    const raw = localStorage.getItem(key);
    if (raw === null) return fallback;
    return raw === "true";
  } catch {
    return fallback;
  }
}

function saveBool(key: string, value: boolean): void {
  if (!canUseStorage()) return;
  try {
    localStorage.setItem(key, value ? "true" : "false");
  } catch {
    // ignore
  }
}

function loadInt(key: string, fallback: number): number {
  if (!canUseStorage()) return fallback;
  try {
    const raw = localStorage.getItem(key);
    if (raw === null) return fallback;
    const parsed = parseInt(raw, 10);
    return Number.isNaN(parsed) ? fallback : parsed;
  } catch {
    return fallback;
  }
}

function saveInt(key: string, value: number): void {
  if (!canUseStorage()) return;
  try {
    localStorage.setItem(key, String(value));
  } catch {
    // ignore
  }
}

type ChatRuntimeStore = {
  params: InferenceParams;
  models: ChatModelSummary[];
  loras: ChatLoraSummary[];
  runningByThreadId: Record<string, boolean>;
  autoTitle: boolean;
  modelsError: string | null;
  activeGgufVariant: string | null;
  ggufContextLength: number | null;
  supportsReasoning: boolean;
  reasoningEnabled: boolean;
  supportsTools: boolean;
  toolsEnabled: boolean;
  codeToolsEnabled: boolean;
  toolStatus: string | null;
  generatingStatus: string | null;
  autoHealToolCalls: boolean;
  maxToolCallsPerMessage: number;
  toolCallTimeout: number;
  kvCacheDtype: string | null;
  defaultChatTemplate: string | null;
  chatTemplateOverride: string | null;
  activeThreadId: string | null;
  pendingAudioBase64: string | null;
  pendingAudioName: string | null;
  modelLoading: boolean;
  setModelLoading: (loading: boolean) => void;
  setParams: (params: InferenceParams) => void;
  setModels: (models: ChatModelSummary[]) => void;
  setLoras: (loras: ChatLoraSummary[]) => void;
  setThreadRunning: (threadId: string, running: boolean) => void;
  setAutoTitle: (enabled: boolean) => void;
  setModelsError: (error: string | null) => void;
  setCheckpoint: (modelId: string, ggufVariant?: string | null) => void;
  setActiveThreadId: (threadId: string | null) => void;
  clearCheckpoint: () => void;
  setReasoningEnabled: (enabled: boolean) => void;
  setToolsEnabled: (enabled: boolean) => void;
  setCodeToolsEnabled: (enabled: boolean) => void;
  setToolStatus: (status: string | null) => void;
  setGeneratingStatus: (status: string | null) => void;
  setAutoHealToolCalls: (enabled: boolean) => void;
  setMaxToolCallsPerMessage: (value: number) => void;
  setToolCallTimeout: (value: number) => void;
  setKvCacheDtype: (dtype: string | null) => void;
  setChatTemplateOverride: (template: string | null) => void;
  setPendingAudio: (base64: string, name: string) => void;
  clearPendingAudio: () => void;
};

export const useChatRuntimeStore = create<ChatRuntimeStore>((set) => ({
  params: DEFAULT_INFERENCE_PARAMS,
  models: [],
  loras: [],
  runningByThreadId: {},
  autoTitle: loadBool(AUTO_TITLE_KEY, false),
  modelsError: null,
  activeGgufVariant: null,
  ggufContextLength: null,
  supportsReasoning: false,
  reasoningEnabled: true,
  supportsTools: false,
  toolsEnabled: false,
  codeToolsEnabled: false,
  toolStatus: null,
  generatingStatus: null,
  autoHealToolCalls: loadBool(AUTO_HEAL_TOOL_CALLS_KEY, true),
  maxToolCallsPerMessage: loadInt(MAX_TOOL_CALLS_KEY, 10),
  toolCallTimeout: loadInt(TOOL_CALL_TIMEOUT_KEY, 5),
  kvCacheDtype: null,
  defaultChatTemplate: null,
  chatTemplateOverride: null,
  activeThreadId: null,
  pendingAudioBase64: null,
  pendingAudioName: null,
  modelLoading: false,
  setModelLoading: (loading) => set({ modelLoading: loading }),
  setParams: (params) => set({ params }),
  setModels: (models) => set({ models }),
  setLoras: (loras) => set({ loras }),
  setThreadRunning: (threadId, running) =>
    set((state) => {
      const next = { ...state.runningByThreadId };
      if (running) {
        next[threadId] = true;
      } else {
        delete next[threadId];
      }
      return { runningByThreadId: next };
    }),
  setAutoTitle: (autoTitle) =>
    set(() => {
      saveBool(AUTO_TITLE_KEY, autoTitle);
      return { autoTitle };
    }),
  setModelsError: (modelsError) => set({ modelsError }),
  setCheckpoint: (modelId, ggufVariant) =>
    set((state) => ({
      params: {
        ...state.params,
        checkpoint: modelId,
      },
      activeGgufVariant: ggufVariant ?? null,
    })),
  setActiveThreadId: (activeThreadId) => set({ activeThreadId }),
  clearCheckpoint: () =>
    set((state) => ({
      params: {
        ...state.params,
        checkpoint: "",
      },
      activeGgufVariant: null,
      ggufContextLength: null,
      supportsReasoning: false,
      reasoningEnabled: true,
      supportsTools: false,
      toolsEnabled: false,
      codeToolsEnabled: false,
      toolStatus: null,
      kvCacheDtype: null,
      defaultChatTemplate: null,
      chatTemplateOverride: null,
    })),
  setReasoningEnabled: (reasoningEnabled) => set({ reasoningEnabled }),
  setToolsEnabled: (toolsEnabled) => set({ toolsEnabled }),
  setCodeToolsEnabled: (codeToolsEnabled) => set({ codeToolsEnabled }),
  setToolStatus: (toolStatus) => set({ toolStatus }),
  setGeneratingStatus: (generatingStatus) => set({ generatingStatus }),
  setAutoHealToolCalls: (autoHealToolCalls) =>
    set(() => {
      saveBool(AUTO_HEAL_TOOL_CALLS_KEY, autoHealToolCalls);
      return { autoHealToolCalls };
    }),
  setMaxToolCallsPerMessage: (maxToolCallsPerMessage) =>
    set(() => {
      saveInt(MAX_TOOL_CALLS_KEY, maxToolCallsPerMessage);
      return { maxToolCallsPerMessage };
    }),
  setToolCallTimeout: (toolCallTimeout) =>
    set(() => {
      saveInt(TOOL_CALL_TIMEOUT_KEY, toolCallTimeout);
      return { toolCallTimeout };
    }),
  setKvCacheDtype: (kvCacheDtype) => set({ kvCacheDtype }),
  setChatTemplateOverride: (chatTemplateOverride) => set({ chatTemplateOverride }),
  setPendingAudio: (base64, name) =>
    set({ pendingAudioBase64: base64, pendingAudioName: name }),
  clearPendingAudio: () =>
    set({ pendingAudioBase64: null, pendingAudioName: null }),
}));


================================================
FILE: studio/frontend/src/features/chat/thread-sidebar.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  SidebarContent,
  SidebarGroup,
  SidebarGroupContent,
  SidebarGroupLabel,
  SidebarHeader,
  SidebarMenu,
  SidebarMenuAction,
  SidebarMenuButton,
  SidebarMenuItem,
} from "@/components/ui/sidebar";
import {
  ColumnInsertIcon,
  Delete02Icon,
  PencilEdit02Icon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { db, useLiveQuery } from "./db";
import type { ChatView, ThreadRecord } from "./types";

interface SidebarItem {
  type: "single" | "compare";
  id: string;
  title: string;
  createdAt: number;
}

function groupThreads(threads: ThreadRecord[]): SidebarItem[] {
  const items: SidebarItem[] = [];
  const seenPairs = new Set<string>();

  for (const t of threads) {
    if (t.archived) {
      continue;
    }
    if (t.pairId) {
      if (seenPairs.has(t.pairId)) {
        continue;
      }
      seenPairs.add(t.pairId);
      items.push({
        type: "compare",
        id: t.pairId,
        title: t.title,
        createdAt: t.createdAt,
      });
    } else if (!t.pairId) {
      items.push({
        type: "single",
        id: t.id,
        title: t.title,
        createdAt: t.createdAt,
      });
    }
  }

  return items.sort((a, b) => b.createdAt - a.createdAt);
}

export function ThreadSidebar({
  view,
  onSelect,
  onNewThread,
  onNewCompare,
  showCompare,
}: {
  view: ChatView;
  onSelect: (view: ChatView) => void;
  onNewThread: () => void;
  onNewCompare: () => void;
  showCompare: boolean;
}) {
  const allThreads = useLiveQuery(
    () => db.threads.orderBy("createdAt").reverse().toArray(),
    [],
  );
  const items = groupThreads(allThreads ?? []);
  const activeId = view.mode === "single" ? view.threadId : view.pairId;

  function viewForItem(item: SidebarItem): ChatView {
    return item.type === "single"
      ? { mode: "single", threadId: item.id }
      : { mode: "compare", pairId: item.id };
  }

  async function handleDelete(item: SidebarItem) {
    if (item.type === "single") {
      await db.messages.where("threadId").equals(item.id).delete();
      await db.threads.delete(item.id);
    } else {
      const paired = await db.threads.where("pairId").equals(item.id).toArray();
      for (const t of paired) {
        await db.messages.where("threadId").equals(t.id).delete();
        await db.threads.delete(t.id);
      }
    }
    if (activeId === item.id) {
      onSelect({ mode: "single" });
    }
  }

  return (
    <>
      <SidebarHeader className="px-4 py-3">
        <span className="text-base font-semibold tracking-tight">Playground</span>
      </SidebarHeader>
      <SidebarContent>
        <SidebarGroup className="px-4 pt-1">
          <SidebarGroupContent>
            <SidebarMenu>
              <SidebarMenuItem>
                <SidebarMenuButton onClick={onNewThread}>
                  <HugeiconsIcon icon={PencilEdit02Icon} />
                  <span>New Chat</span>
                </SidebarMenuButton>
              </SidebarMenuItem>
              {showCompare ? (
                <SidebarMenuItem>
                  <SidebarMenuButton data-tour="chat-compare" onClick={onNewCompare}>
                    <HugeiconsIcon icon={ColumnInsertIcon} />
                    <span>Compare</span>
                  </SidebarMenuButton>
                </SidebarMenuItem>
              ) : null}
            </SidebarMenu>
          </SidebarGroupContent>
        </SidebarGroup>
        <SidebarGroup className="flex-1 px-4">
          <SidebarGroupLabel className="text-xs font-medium text-muted-foreground/80">Your Chats</SidebarGroupLabel>
          <SidebarGroupContent>
            <SidebarMenu>
              {items.map((item) => (
                <SidebarMenuItem key={item.id}>
                  <SidebarMenuButton
                    isActive={activeId === item.id}
                    onClick={() => onSelect(viewForItem(item))}
                  >
                    <span>{item.title}</span>
                  </SidebarMenuButton>
                  <SidebarMenuAction
                    showOnHover={true}
                    onClick={() => handleDelete(item)}
                    title="Delete"
                  >
                    <HugeiconsIcon icon={Delete02Icon} />
                  </SidebarMenuAction>
                </SidebarMenuItem>
              ))}
            </SidebarMenu>
            {items.length === 0 && (
              <p className="px-2 py-6 text-center text-xs text-muted-foreground">
                No threads yet
              </p>
            )}
          </SidebarGroupContent>
        </SidebarGroup>
      </SidebarContent>
    </>
  );
}


================================================
FILE: studio/frontend/src/features/chat/tour/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export { buildChatTourSteps } from "./steps";


================================================
FILE: studio/frontend/src/features/chat/tour/steps.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { TourStep } from "@/features/tour";

export function buildChatTourSteps({
  canCompare,
  openModelSelector,
  closeModelSelector,
  openSettings,
  closeSettings,
  openSidebar,
  enterCompare,
  exitCompare,
}: {
  canCompare: boolean;
  openModelSelector: () => void;
  closeModelSelector: () => void;
  openSettings: () => void;
  closeSettings: () => void;
  openSidebar: () => void;
  enterCompare: () => void;
  exitCompare: () => void;
}): TourStep[] {
  const steps: TourStep[] = [
    {
      id: "model",
      target: "chat-model-selector",
      title: "Pick a model",
      body: (
        <>
          This selects what’s loaded for inference. Hub = base models. Fine-tuned
          = your LoRA adapters from Studio.
        </>
      ),
    },
    {
      id: "model-tabs",
      target: "chat-model-selector-popover",
      title: "Two tabs",
      body: (
        <>
          Hub: search Hugging Face models. Fine-tuned: adapters (LoRA) you’ve
          trained locally. If results look off, compare base vs LoRA to see what
          changed.
        </>
      ),
      onEnter: openModelSelector,
      onExit: closeModelSelector,
    },
    {
      id: "settings",
      target: "chat-settings",
      title: "Settings sidebar",
      body: (
        <>
          Sampling (temperature/top-p/top-k) + system prompt live here. If you
          want more deterministic outputs, lower temperature first.
        </>
      ),
      onEnter: openSettings,
      onExit: closeSettings,
    },
  ];

  if (canCompare) {
    steps.push(
      {
        id: "compare-btn",
        target: "chat-compare",
        title: "Compare mode",
        body: (
          <>
            Compare any two models side-by-side.
            Pick a different model for each side and see how they respond to the same prompt.
          </>
        ),
        onEnter: openSidebar,
      },
      {
        id: "compare-view",
        target: "chat-compare-view",
        title: "Side-by-side threads",
        body: (
          <>
            Same prompt, 2 threads. If LoRA is worse than base, it’s usually
            data formatting, too many epochs, or a bad checkpoint choice.
          </>
        ),
        onEnter: enterCompare,
        onExit: exitCompare,
      },
    );
  }

  return steps;
}


================================================
FILE: studio/frontend/src/features/chat/types/api.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export interface BackendModelDetails {
  id: string;
  name?: string | null;
  is_vision?: boolean;
  is_lora?: boolean;
  is_gguf?: boolean;
  is_audio?: boolean;
  audio_type?: string | null;
  has_audio_input?: boolean;
}

export interface ListModelsResponse {
  models: BackendModelDetails[];
  default_models: string[];
}

export interface BackendLoraInfo {
  display_name: string;
  adapter_path: string;
  base_model?: string | null;
  source?: "training" | "exported" | null;
  export_type?: "lora" | "merged" | "gguf" | null;
}

export interface ListLorasResponse {
  loras: BackendLoraInfo[];
  outputs_dir: string;
}

export interface LoadModelRequest {
  model_path: string;
  hf_token: string | null;
  max_seq_length: number;
  load_in_4bit: boolean;
  is_lora: boolean;
  gguf_variant?: string | null;
  /** Allow loading models with custom code (e.g. NVIDIA Nemotron). Only enable for repos you trust. */
  trust_remote_code?: boolean;
  chat_template_override?: string | null;
  cache_type_kv?: string | null;
}

export interface ValidateModelResponse {
  valid: boolean;
  message: string;
  identifier?: string | null;
  display_name?: string | null;
  is_gguf?: boolean;
  is_lora?: boolean;
  is_vision?: boolean;
}

export interface GgufVariantDetail {
  filename: string;
  quant: string;
  size_bytes: number;
  downloaded?: boolean;
}

export interface GgufVariantsResponse {
  repo_id: string;
  variants: GgufVariantDetail[];
  has_vision: boolean;
  default_variant: string | null;
}

export interface LoadModelResponse {
  status: string;
  model: string;
  display_name: string;
  is_vision: boolean;
  is_lora: boolean;
  is_gguf?: boolean;
  is_audio?: boolean;
  audio_type?: string | null;
  has_audio_input?: boolean;
  inference?: {
    temperature?: number;
    top_p?: number;
    top_k?: number;
    min_p?: number;
    presence_penalty?: number;
    trust_remote_code?: boolean;
  };
  context_length?: number | null;
  supports_reasoning?: boolean;
  supports_tools?: boolean;
  cache_type_kv?: string | null;
  chat_template?: string | null;
}

export interface UnloadModelRequest {
  model_path: string;
}

export interface InferenceStatusResponse {
  active_model: string | null;
  is_vision: boolean;
  is_gguf?: boolean;
  gguf_variant?: string | null;
  is_audio?: boolean;
  audio_type?: string | null;
  has_audio_input?: boolean;
  loading: string[];
  loaded: string[];
  inference?: {
    temperature?: number;
    top_p?: number;
    top_k?: number;
    min_p?: number;
    presence_penalty?: number;
    trust_remote_code?: boolean;
  };
  supports_reasoning?: boolean;
  supports_tools?: boolean;
  context_length?: number | null;
}

export interface AudioGenerationResponse {
  id: string;
  object: string;
  model: string;
  audio: {
    data: string;
    format: string;
    sample_rate: number;
  };
  choices: Array<{
    index: number;
    message: { role: string; content: string };
    finish_reason: string;
  }>;
}

export interface OpenAIChatMessage {
  role: "system" | "user" | "assistant";
  content: string;
}

export interface OpenAIChatCompletionsRequest {
  model: string;
  messages: OpenAIChatMessage[];
  stream: boolean;
  temperature: number;
  top_p: number;
  max_tokens: number;
  top_k: number;
  min_p: number;
  repetition_penalty: number;
  presence_penalty: number;
  image_base64?: string;
  audio_base64?: string;
  use_adapter?: boolean | string | null;
  enable_thinking?: boolean | null;
  enable_tools?: boolean | null;
  enabled_tools?: string[];
  auto_heal_tool_calls?: boolean;
  max_tool_calls_per_message?: number;
  tool_call_timeout?: number;
  session_id?: string;
}

export interface OpenAIChatDelta {
  role?: string;
  content?: string;
}

export interface OpenAIChatChunkChoice {
  delta?: OpenAIChatDelta;
  finish_reason?: string | null;
}

export interface OpenAIChatChunk {
  choices?: OpenAIChatChunkChoice[];
}


================================================
FILE: studio/frontend/src/features/chat/types/runtime.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export interface InferenceParams {
  temperature: number;
  topP: number;
  topK: number;
  minP: number;
  repetitionPenalty: number;
  presencePenalty: number;
  maxSeqLength: number;
  maxTokens: number;
  systemPrompt: string;
  checkpoint: string;
  /** Allow loading models with custom code (e.g. NVIDIA Nemotron). Only enable for repos you trust. */
  trustRemoteCode?: boolean;
}

export const DEFAULT_INFERENCE_PARAMS: InferenceParams = {
  temperature: 0.6,
  topP: 0.95,
  topK: 20,
  minP: 0.01,
  repetitionPenalty: 1.0,
  presencePenalty: 0.0,
  maxSeqLength: 4096,
  maxTokens: 8192,
  systemPrompt: "",
  checkpoint: "",
  trustRemoteCode: false,
};

export interface ChatModelSummary {
  id: string;
  name: string;
  description?: string;
  isVision: boolean;
  isLora: boolean;
  isGguf?: boolean;
  isAudio?: boolean;
  audioType?: string | null;
  hasAudioInput?: boolean;
}

export interface ChatLoraSummary {
  id: string;
  name: string;
  baseModel: string;
  updatedAt?: number;
  source?: "training" | "exported";
  exportType?: "lora" | "merged" | "gguf";
}


================================================
FILE: studio/frontend/src/features/chat/types.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export type ModelType = "base" | "lora" | "model1" | "model2";

export type ChatView =
  | { mode: "single"; threadId?: string; newThreadNonce?: string }
  | { mode: "compare"; pairId: string };

export interface ThreadRecord {
  id: string;
  title: string;
  modelType: ModelType;
  modelId?: string;
  pairId?: string;
  archived: boolean;
  createdAt: number;
}

export interface MessageRecord {
  id: string;
  threadId: string;
  role: import("@assistant-ui/react").ThreadMessage["role"];
  content: import("@assistant-ui/react").ThreadMessage["content"];
  attachments?: import("@assistant-ui/react").ThreadMessage["attachments"];
  metadata?: Record<string, unknown>;
  createdAt: number;
}


================================================
FILE: studio/frontend/src/features/chat/utils/parse-assistant-content.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ChatModelRunResult } from "@assistant-ui/react";

type ContentPart = NonNullable<ChatModelRunResult["content"]>[number];

const THINK_OPEN_TAG = "<think>";
const THINK_CLOSE_TAG = "</think>";

function appendTextPart(parts: ContentPart[], text: string): void {
  if (text) {
    parts.push({ type: "text", text });
  }
}

function appendReasoningPart(parts: ContentPart[], text: string): void {
  if (text) {
    parts.push({ type: "reasoning", text });
  }
}

export function parseAssistantContent(
  raw: string,
): ContentPart[] {
  const parts: ContentPart[] = [];
  if (!raw) {
    return parts;
  }

  let cursor = 0;
  while (cursor < raw.length) {
    const openIndex = raw.indexOf(THINK_OPEN_TAG, cursor);
    if (openIndex === -1) {
      appendTextPart(parts, raw.slice(cursor));
      break;
    }

    appendTextPart(parts, raw.slice(cursor, openIndex));

    const reasoningStart = openIndex + THINK_OPEN_TAG.length;
    const closeIndex = raw.indexOf(THINK_CLOSE_TAG, reasoningStart);
    if (closeIndex === -1) {
      appendReasoningPart(parts, raw.slice(reasoningStart));
      break;
    }

    appendReasoningPart(parts, raw.slice(reasoningStart, closeIndex));
    cursor = closeIndex + THINK_CLOSE_TAG.length;
  }

  return parts;
}

export function hasClosedThinkTag(raw: string): boolean {
  return raw.includes(THINK_CLOSE_TAG);
}


================================================
FILE: studio/frontend/src/features/data-recipes/data/recipes-db.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { createEmptyRecipePayload } from "@/features/recipe-studio";
import { normalizeNonEmptyName } from "@/utils";
import Dexie, { type EntityTable, liveQuery } from "dexie";
import { useEffect, useState } from "react";
import type { RecipeRecord, SaveRecipeInput } from "../types";

const db = new Dexie("unsloth-data-recipes") as Dexie & {
  recipes: EntityTable<RecipeRecord, "id">;
};

db.version(1).stores({
  recipes: "id, name, updatedAt, createdAt",
});

const recentRecipeCache = new Map<string, RecipeRecord>();

export function listRecipes(): Promise<RecipeRecord[]> {
  return db.recipes.orderBy("updatedAt").reverse().toArray();
}

export function getRecipe(id: string): Promise<RecipeRecord | undefined> {
  return db.recipes.get(id);
}

function writeRecipeCache(record: RecipeRecord): void {
  recentRecipeCache.set(record.id, record);
}

export function getCachedRecipe(id: string): RecipeRecord | null {
  return recentRecipeCache.get(id) ?? null;
}

export function primeRecipeCache(record: RecipeRecord): void {
  writeRecipeCache(record);
}

export async function saveRecipe(
  input: SaveRecipeInput,
): Promise<RecipeRecord> {
  const now = Date.now();
  const id = input.id ?? crypto.randomUUID();
  const existing = input.id ? await db.recipes.get(input.id) : undefined;
  const record: RecipeRecord = {
    id,
    name: normalizeNonEmptyName(input.name),
    payload: input.payload,
    createdAt: existing?.createdAt ?? now,
    updatedAt: now,
    learningRecipeId: input.learningRecipeId ?? existing?.learningRecipeId,
    learningRecipeTitle:
      input.learningRecipeTitle ?? existing?.learningRecipeTitle,
  };
  await db.recipes.put(record);
  writeRecipeCache(record);
  return record;
}

export async function deleteRecipe(id: string): Promise<void> {
  await db.recipes.delete(id);
  recentRecipeCache.delete(id);
}

export function createRecipeDraft(): Promise<RecipeRecord> {
  return saveRecipe({
    name: "Unnamed",
    payload: createEmptyRecipePayload(),
  });
}

export function createRecipeFromLearningRecipe(input: {
  templateId: string;
  templateTitle: string;
  payload: RecipeRecord["payload"];
}): Promise<RecipeRecord> {
  return saveRecipe({
    name: input.templateTitle,
    payload: input.payload,
    learningRecipeId: input.templateId,
    learningRecipeTitle: input.templateTitle,
  });
}

export function useRecipes(): {
  recipes: RecipeRecord[];
  ready: boolean;
} {
  const [recipes, setRecipes] = useState<RecipeRecord[]>([]);
  const [ready, setReady] = useState(false);

  useEffect(() => {
    const sub = liveQuery(() => listRecipes()).subscribe({
      next: (value) => {
        for (const recipe of value) {
          writeRecipeCache(recipe);
        }
        setRecipes(value);
        setReady(true);
      },
      error: (error) => {
        console.error("data-recipes liveQuery:", error);
        setReady(true);
      },
    });
    return () => sub.unsubscribe();
  }, []);

  return { recipes, ready };
}


================================================
FILE: studio/frontend/src/features/data-recipes/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export { DataRecipesPage } from "./pages/data-recipes-page";
export { EditRecipePage } from "./pages/edit-recipe-page";


================================================
FILE: studio/frontend/src/features/data-recipes/learning-recipes/conversation.json
================================================
{
  "recipe": {
    "model_providers": [
      {
        "name": "provider_1",
        "endpoint": "https://openrouter.ai/api/v1",
        "provider_type": "openai",
        "extra_headers": {},
        "extra_body": {}
      }
    ],
    "mcp_providers": [],
    "model_configs": [
      {
        "alias": "model_1",
        "model": "mistralai/ministral-8b-2512",
        "provider": "provider_1",
        "inference_parameters": {
          "temperature": 0.7,
          "max_tokens": 2048
        }
      }
    ],
    "tool_configs": [],
    "columns": [
      {
        "column_type": "sampler",
        "name": "domain",
        "drop": true,
        "sampler_type": "category",
        "params": {
          "values": [
            "Tech Support",
            "Personal Finance",
            "Learning"
          ]
        }
      },
      {
        "column_type": "sampler",
        "name": "topic",
        "drop": true,
        "sampler_type": "subcategory",
        "params": {
          "category": "domain",
          "values": {
            "Tech Support": [
              "Wi-Fi keeps disconnecting",
              "Laptop running very slow",
              "Cannot install app update"
            ],
            "Personal Finance": [
              "Monthly budget planning",
              "Credit card debt payoff",
              "Emergency fund setup"
            ],
            "Learning": [
              "Exam study plan",
              "Learn Python basics",
              "Improve English writing"
            ]
          }
        }
      },
      {
        "column_type": "sampler",
        "name": "conversation_length",
        "drop": true,
        "sampler_type": "category",
        "params": {
          "values": [
            "4",
            "6"
          ]
        }
      },
      {
        "column_type": "llm-text",
        "name": "user_goal",
        "drop": false,
        "model_alias": "model_1",
        "prompt": "Write one user goal for a chat assistant.\nDomain: {{ domain }}\nTopic: {{ topic }}\nConversation length target: {{ conversation_length }} messages total.\nRules:\n- 1 sentence.\n- Specific and practical.\n- Output only the goal text.",
        "system_prompt": "You write realistic user goals for assistant conversations.\n",
        "with_trace": "none"
      },
      {
        "column_type": "llm-structured",
        "name": "output_format",
        "drop": false,
        "model_alias": "model_1",
        "prompt": "Generate a realistic multi-turn conversation.\nUser goal:\n{{ user_goal }}\nConstraints:\n- Exactly {{ conversation_length }} messages total.\n- Alternate roles strictly: user, assistant, user, assistant...\n- First message must be user.\n- Last message must be assistant.\n- Keep responses grounded in {{ domain }} / {{ topic }}.\n- End naturally with resolution or clear next step.\n- No markdown, no extra keys.",
        "output_format": {
          "type": "object",
          "properties": {
            "conversation": {
              "type": "array",
              "minItems": 4,
              "maxItems": 6,
              "items": {
                "type": "object",
                "properties": {
                  "role": {
                    "type": "string",
                    "enum": [
                      "user",
                      "assistant"
                    ]
                  },
                  "content": {
                    "type": "string",
                    "minLength": 1
                  }
                },
                "required": [
                  "role",
                  "content"
                ],
                "additionalProperties": false
              }
            }
          },
          "required": [
            "conversation"
          ],
          "additionalProperties": false
        }
      }
    ],
    "processors": []
  },
  "run": {
    "rows": 5,
    "preview": true,
    "output_formats": [
      "jsonl"
    ]
  },
  "ui": {
    "nodes": [
      {
        "id": "provider_1",
        "x": -1056.848383841495,
        "y": 519.6373927070263,
        "width": 400
      },
      {
        "id": "model_1",
        "x": -543.7221365246206,
        "y": 488.2975724283656,
        "width": 400
      },
      {
        "id": "domain",
        "x": 0,
        "y": 140,
        "width": 400
      },
      {
        "id": "topic",
        "x": 0,
        "y": 280,
        "width": 400
      },
      {
        "id": "conversation_length",
        "x": 466.61510192672256,
        "y": 139.68271861864798,
        "width": 400
      },
      {
        "id": "user_goal",
        "x": 1.412158386197035,
        "y": 508.77123580445596,
        "width": 400
      },
      {
        "id": "output_format",
        "x": 1.1486983549970375,
        "y": 754.4221089431811,
        "width": 400
      },
      {
        "id": "note_1",
        "x": 210.01377182764494,
        "y": -262.9440547613487,
        "width": 400,
        "node_type": "markdown_note",
        "name": "note_1",
        "markdown": "###  Start with controlled chat context\nThis recipe uses sampler columns to shape each conversation:\n\n- `domain`\n- `topic`\n- `conversation_length` (4 or 6 messages)\n\n**Why this helps**:\n\n- You get varied conversations without manual writing\n- Each row stays grounded in a clear scenario\n- You can scale quickly while keeping data quality consistent",
        "note_color": "#FFE4E6",
        "note_opacity": "35"
      },
      {
        "id": "note_2",
        "x": 515.9369583007435,
        "y": 454.3936030274385,
        "width": 400,
        "node_type": "markdown_note",
        "name": "note_2",
        "markdown": "The **LLM Text** block (`user_goal`) creates one realistic user intent from sampler context.\n\n**It should be**:\n\n- **specific**\n- **practical**\n- **short**\n\nThis goal becomes the anchor for the full multi-turn conversation.",
        "note_color": "#FFE4E6",
        "note_opacity": "35"
      },
      {
        "id": "note_3",
        "x": -12.952616065779239,
        "y": 912.1316336111515,
        "width": 400,
        "node_type": "markdown_note",
        "name": "note_3",
        "markdown": "The **LLM Structured** block (`output_format`) generates the conversation as strict JSON.\n\nIn this recipe, schema enforces:\n\n- `conversation` array\n- message objects with `role` + `content`\n- role enum: `user` / `assistant`\n- no extra keys\n\nPrompt constraints also enforce:\n\n- exact length (`{{ conversation_length }}`)\n- alternating roles\n- first user message, last assistant message\n- natural ending\n\nThis is key for training data: same shape, less cleanup.",
        "note_color": "#FFE4E6",
        "note_opacity": "35"
      },
      {
        "id": "note_4",
        "x": -519.9585237323188,
        "y": 81.84144119564277,
        "width": 400,
        "node_type": "markdown_note",
        "name": "note_4",
        "markdown": "Sampler columns are useful during generation but usually noisy in final export.\n\nSet helper columns to `drop=true`, keep only core outputs such as:\n\n- `user_goal`\n- `output_format`\n\nTip: Keep final schema close to your training format, not your generation scaffolding.\n",
        "note_color": "#FFE4E6",
        "note_opacity": "35"
      }
    ],
    "edges": [
      {
        "from": "domain",
        "to": "topic",
        "type": "canvas",
        "source_handle": "data-out-bottom",
        "target_handle": "data-in-top"
      },
      {
        "from": "domain",
        "to": "conversation_length",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "topic",
        "to": "user_goal",
        "type": "canvas",
        "source_handle": "data-out-bottom",
        "target_handle": "data-in-top"
      },
      {
        "from": "user_goal",
        "to": "output_format",
        "type": "canvas",
        "source_handle": "data-out-bottom",
        "target_handle": "data-in-top"
      },
      {
        "from": "provider_1",
        "to": "model_1",
        "type": "semantic",
        "source_handle": "semantic-out",
        "target_handle": "semantic-in"
      },
      {
        "from": "model_1",
        "to": "user_goal",
        "type": "semantic",
        "source_handle": "semantic-out",
        "target_handle": "data-in"
      },
      {
        "from": "model_1",
        "to": "output_format",
        "type": "semantic",
        "source_handle": "semantic-out-bottom",
        "target_handle": "data-in"
      }
    ],
    "layout_direction": "LR"
  }
}


================================================
FILE: studio/frontend/src/features/data-recipes/learning-recipes/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { RecipePayload } from "@/features/recipe-studio";

const structuredOutputsJinjaUrl = new URL(
  "./structured-outputs-jinja.json",
  import.meta.url,
).href;
const pdfGroundedQaUrl = new URL("./pdf-grounded-qa.json", import.meta.url)
  .href;
const instructionFromAnswerUrl = new URL(
  "./instruction-from-answer.json",
  import.meta.url,
).href;
const textToPythonUrl = new URL("./text-to-python.json", import.meta.url).href;
const textToSqlUrl = new URL("./text-to-sql.json", import.meta.url).href;
const ocrDocumentExtractionUrl = new URL(
  "./ocr-document-extraction.json",
  import.meta.url,
).href;

function isRecord(value: unknown): value is Record<string, unknown> {
  return !!value && typeof value === "object" && !Array.isArray(value);
}

function toRecordArray(value: unknown): Record<string, unknown>[] {
  if (!Array.isArray(value)) {
    return [];
  }
  return value.filter((item): item is Record<string, unknown> =>
    isRecord(item),
  );
}

function coerceRecipePayload(value: unknown): RecipePayload {
  if (!isRecord(value)) {
    throw new Error("Template payload is invalid JSON object.");
  }

  const recipeSource = isRecord(value.recipe) ? value.recipe : value;
  if (!Array.isArray(recipeSource.columns)) {
    throw new Error("Template payload must include recipe.columns.");
  }

  if (isRecord(value.recipe) && isRecord(value.run) && isRecord(value.ui)) {
    return value as unknown as RecipePayload;
  }

  const recipe: RecipePayload["recipe"] = {
    // biome-ignore lint/style/useNamingConvention: api schema
    model_providers: toRecordArray(recipeSource.model_providers),
    // biome-ignore lint/style/useNamingConvention: api schema
    mcp_providers: toRecordArray(recipeSource.mcp_providers),
    // biome-ignore lint/style/useNamingConvention: api schema
    model_configs: toRecordArray(recipeSource.model_configs),
    // biome-ignore lint/style/useNamingConvention: api schema
    seed_config: isRecord(recipeSource.seed_config)
      ? recipeSource.seed_config
      : undefined,
    // biome-ignore lint/style/useNamingConvention: api schema
    tool_configs: toRecordArray(recipeSource.tool_configs),
    columns: toRecordArray(recipeSource.columns),
    processors: toRecordArray(recipeSource.processors),
  };

  return {
    recipe,
    run: {
      rows: 5,
      preview: true,
      // biome-ignore lint/style/useNamingConvention: api schema
      output_formats: ["jsonl"],
    },
    ui: {
      nodes: [],
      edges: [],
    },
  };
}

async function loadPayloadFromUrl(url: string): Promise<RecipePayload> {
  const response = await fetch(url);
  if (!response.ok) {
    throw new Error(`Failed to fetch template payload (${response.status})`);
  }
  const json = (await response.json()) as unknown;
  return coerceRecipePayload(json);
}

export type LearningRecipeDef = {
  id: string;
  title: string;
  description: string;
  loadPayload: () => Promise<RecipePayload>;
};

export const LEARNING_RECIPES: LearningRecipeDef[] = [
  {
    id: "structured-outputs-jinja",
    title: "Structured Outputs + Jinja Expressions",
    description:
      "Support ticket triage with structured JSON outputs and Jinja conditionals.",
    loadPayload: () => loadPayloadFromUrl(structuredOutputsJinjaUrl),
  },
  {
    id: "pdf-grounded-qa",
    title: "PDF Document QA",
    description: "Build grounded question-answer examples from PDF chunks.",
    loadPayload: () => loadPayloadFromUrl(pdfGroundedQaUrl),
  },
  {
    id: "instruction-from-answer",
    title: "Instruction from Answer",
    description:
      "Use seed answer columns to generate high-quality instruction targets.",
    loadPayload: () => loadPayloadFromUrl(instructionFromAnswerUrl),
  },
  {
    id: "text-to-python",
    title: "Text to Python",
    description:
      "Generate instruction-to-code data with category sampling and LLM judging.",
    loadPayload: () => loadPayloadFromUrl(textToPythonUrl),
  },
  {
    id: "text-to-sql",
    title: "Text to SQL",
    description:
      "Generate SQL tasks and runnable SQL outputs with prompt-driven generation.",
    loadPayload: () => loadPayloadFromUrl(textToSqlUrl),
  },
  {
    id: "ocr-document-extraction",
    title: "OCR Document Extraction",
    description:
      "Use image context to generate OCR-style document extraction output.",
    loadPayload: () => loadPayloadFromUrl(ocrDocumentExtractionUrl),
  },
];


================================================
FILE: studio/frontend/src/features/data-recipes/learning-recipes/instruction-from-answer.json
================================================
{
  "recipe": {
    "model_providers": [
      {
        "name": "openai_provider",
        "endpoint": "",
        "provider_type": "openai",
        "extra_headers": {},
        "extra_body": {}
      }
    ],
    "mcp_providers": [],
    "model_configs": [
      {
        "alias": "ministral",
        "model": "",
        "provider": "openai_provider",
        "inference_parameters": {
          "temperature": 0.7,
          "max_tokens": 2048
        }
      }
    ],
    "seed_config": {
      "source": {
        "seed_type": "hf",
        "path": "unsloth/alpaca-cleaned",
        "endpoint": "https://huggingface.co"
      },
      "sampling_strategy": "ordered",
      "selection_strategy": {
        "start": 1,
        "end": 100
      }
    },
    "tool_configs": [],
    "columns": [
      {
        "column_type": "llm-text",
        "name": "generated_instruction",
        "drop": false,
        "model_alias": "ministral",
        "prompt": "Based on this target answer:\n{{ output }}\n\nWrite one high-quality plain text short and brief user instruction that this answer would satisfy.\nReturn only the instruction.",
        "with_trace": "none",
        "extract_reasoning_content": false
      }
    ],
    "processors": [
      {
        "processor_type": "drop_columns",
        "name": "drop_seed_columns",
        "column_names": [
          "input",
          "instruction"
        ]
      }
    ]
  },
  "run": {
    "rows": 5,
    "preview": true,
    "output_formats": [
      "jsonl"
    ]
  },
  "ui": {
    "nodes": [
      {
        "id": "note_1",
        "x": -567.3566303099885,
        "y": 38.88875727651093,
        "width": 400,
        "node_type": "markdown_note",
        "name": "note_1",
        "markdown": "#### Hugging Face seed block\nThis recipe uses a ** HuggingFace dataset ** as seed data.\nYou select a Hugging Face dataset, load columns, then generate new fields from seed columns. Each column in the Hugging Face dataset becomes a valid variable that you can reference eg. `{{ topic }}`\n\n##### Setup:\n\n1. Search for a dataset and select one in the dropdown (example: `unsloth/alpaca-cleaned`)\n2. Add token only if dataset is gated/private\n3. Load columns + preview rows so variables are available in prompts\n\n##### Why this matters:\n- Seed columns can drive generation quality\n- You can reference seed values directly in prompts (for example `{{ output }}`)",
        "note_color": "#DCFCE7",
        "note_opacity": "35"
      },
      {
        "id": "note_2",
        "x": -74.04047072330651,
        "y": -265.3540670633283,
        "width": 400,
        "node_type": "markdown_note",
        "name": "note_2",
        "markdown": "##### Drop columns behavior:\n\n- You can mark specific seed columns to **drop from final output**\n- Those columns are still used during generation\n- They are removed only from exported final dataset\n\n##### Example:\n- Keep `generated_instruction` from llm-text block\n- Drop original `instruction`, `input`, `output` from the hugginface dataset from final dataset\n- Result: clean training output while still using source columns as generation context\n",
        "note_color": "#DCFCE7",
        "note_opacity": "35"
      },
      {
        "id": "seed",
        "x": -76.07288662013991,
        "y": 143.39449780463954,
        "width": 400
      },
      {
        "id": "openai_provider",
        "x": 461.00000000000006,
        "y": -489.8750000000001,
        "width": 400
      },
      {
        "id": "ministral",
        "x": 463.272022949692,
        "y": -191.13601147484601,
        "width": 400
      },
      {
        "id": "generated_instruction",
        "x": 464,
        "y": 109.00000000000003,
        "width": 400
      }
    ],
    "edges": [
      {
        "from": "seed",
        "to": "generated_instruction",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "ministral",
        "to": "generated_instruction",
        "type": "semantic",
        "source_handle": "semantic-out-bottom",
        "target_handle": "data-in-top"
      },
      {
        "from": "openai_provider",
        "to": "ministral",
        "type": "semantic",
        "source_handle": "semantic-out-bottom",
        "target_handle": "semantic-in-top"
      }
    ],
    "layout_direction": "LR",
    "seed_source_type": "hf",
    "seed_columns": [],
    "seed_drop_columns": [],
    "seed_preview_rows": [],
    "local_file_name": "",
    "unstructured_file_name": "",
    "unstructured_chunk_size": "1200",
    "unstructured_chunk_overlap": "200"
  }
}


================================================
FILE: studio/frontend/src/features/data-recipes/learning-recipes/ocr-document-extraction.json
================================================
{
  "recipe": {
    "model_providers": [
      {
        "name": "provider_1",
        "endpoint": "https://openrouter.ai/api/v1",
        "provider_type": "openai",
        "extra_headers": {},
        "extra_body": {}
      }
    ],
    "mcp_providers": [],
    "model_configs": [
      {
        "alias": "provider_column",
        "model": "google/gemini-2.0-flash-001",
        "provider": "provider_1",
        "inference_parameters": {
          "temperature": 0.2,
          "max_tokens": 4096
        }
      }
    ],
    "seed_config": {
      "source": {
        "seed_type": "hf",
        "path": "datasets/ylecun/mnist/mnist/**/*.parquet"
      },
      "sampling_strategy": "ordered",
      "selection_strategy": null
    },
    "tool_configs": [],
    "columns": [
      {
        "column_type": "llm-text",
        "name": "ocr_text",
        "drop": false,
        "model_alias": "provider_column",
        "prompt": "Transcribe all text from this document image.",
        "multi_modal_context": [
          {
            "modality": "image",
            "column_name": "image"
          }
        ]
      }
    ],
    "processors": []
  },
  "run": {
    "rows": 5,
    "preview": true,
    "output_formats": ["jsonl"]
  },
  "ui": {
    "nodes": [
      {
        "id": "note_1",
        "x": -180,
        "y": 43,
        "width": 400,
        "node_type": "markdown_note",
        "name": "note_1",
        "markdown": "This recipe uses **Gemini 2.0 Flash** via OpenRouter to transcribe document images into clean text.\n\nThe Seed block is prefilled with `ylecun/mnist` so you can run immediately. You can swap to any Hugging Face dataset that includes an `image` column.\n\nOutput: `ocr_text` column with the raw transcribed text per image.",
        "note_color": "#DCFCE7",
        "note_opacity": "35"
      },
      {
        "id": "note_2",
        "x": 283,
        "y": -333,
        "width": 400,
        "node_type": "markdown_note",
        "name": "note_2",
        "markdown": "##### Setup\n\nAdd your OpenRouter API key to the **Model Provider** block — same as every other recipe.\n\nGemini 2.0 Flash is well-suited for OCR: fast, cheap, and strong on tables, receipts, forms, and multi-column layouts.\n\nWant a purpose-built OCR model? Swap the endpoint to a local vLLM server running `lightonai/LightOnOCR-2-1B` for maximum throughput.",
        "note_color": "#DCFCE7",
        "note_opacity": "35"
      },
      {
        "id": "note_3",
        "x": 303,
        "y": 299,
        "width": 400,
        "node_type": "markdown_note",
        "name": "note_3",
        "markdown": "##### Seed: HF dataset with image column\n\nThis template starts with `ylecun/mnist` so first run works without seed setup.\n\nTo use your own data: open Seed → keep **HF dataset** selected → choose a dataset that contains an `image` column → click **Load**.\n\nThen open the LLM Text block and set **Image Context** to the `image` column so each row image is sent with the prompt.\n\nTip: datasets with embedded image columns are more reliable than URL-only image fields.",
        "note_color": "#DCFCE7",
        "note_opacity": "35"
      },
      {
        "id": "seed",
        "x": 295,
        "y": 108,
        "width": 400
      },
      {
        "id": "provider_1",
        "x": 960,
        "y": -465,
        "width": 400
      },
      {
        "id": "provider_column",
        "x": 959,
        "y": -180,
        "width": 400
      },
      {
        "id": "ocr_text",
        "x": 960,
        "y": 108,
        "width": 400
      }
    ],
    "edges": [
      {
        "from": "seed",
        "to": "ocr_text",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "provider_1",
        "to": "provider_column",
        "type": "semantic",
        "source_handle": "semantic-out-bottom",
        "target_handle": "semantic-in-top"
      },
      {
        "from": "provider_column",
        "to": "ocr_text",
        "type": "semantic",
        "source_handle": "semantic-out-bottom",
        "target_handle": "data-in-top"
      }
    ],
    "layout_direction": "LR",
    "seed_source_type": "hf",
    "seed_columns": [],
    "seed_drop_columns": [],
    "seed_preview_rows": [],
    "local_file_name": "",
    "unstructured_file_name": "",
    "unstructured_chunk_size": "900",
    "unstructured_chunk_overlap": "150"
  }
}


================================================
FILE: studio/frontend/src/features/data-recipes/learning-recipes/pdf-grounded-qa.json
================================================
{
  "recipe": {
    "model_providers": [
      {
        "name": "provider_1",
        "endpoint": "",
        "provider_type": "openai",
        "extra_headers": {},
        "extra_body": {}
      }
    ],
    "mcp_providers": [],
    "model_configs": [
      {
        "alias": "provider_column",
        "model": "",
        "provider": "provider_1",
        "inference_parameters": {
          "temperature": 0.7
        }
      }
    ],
    "seed_config": {
      "source": {
        "seed_type": "unstructured",
        "path": "",
        "chunk_size": 1200,
        "chunk_overlap": 200
      },
      "sampling_strategy": "ordered",
      "selection_strategy": null
    },
    "tool_configs": [],
    "columns": [
      {
        "column_type": "llm-structured",
        "name": "llm_structured_1",
        "drop": false,
        "model_alias": "provider_column",
        "prompt": "Given ONLY this chunk: {{ chunk_text }} generate one answerable question, answer, and exact supporting quote from chunk. If not answerable, skip.",
        "with_trace": "none",
        "extract_reasoning_content": false,
        "output_format": {
          "type": "object",
          "additionalProperties": false,
          "required": [
            "question",
            "answer",
            "evidence_quote"
          ],
          "properties": {
            "question": {
              "type": "string"
            },
            "answer": {
              "type": "string"
            },
            "evidence_quote": {
              "type": "string"
            }
          }
        }
      }
    ],
    "processors": []
  },
  "run": {
    "rows": 5,
    "preview": true,
    "output_formats": [
      "jsonl"
    ]
  },
  "ui": {
    "nodes": [
      {
        "id": "note_1",
        "x": 474.6120044693708,
        "y": 1229.5810476890458,
        "width": 400,
        "node_type": "markdown_note",
        "name": "note_1",
        "markdown": "This recipe uses **seed data** from external documents.\nInstead of starting from empty generation, we load real source text first.\n\nIn this flow, the seed source is **Unstructured Documents**:\n\n- Upload: `.pdf`, `.docx`, `.txt`\n- Text is extracted and split on client into chunks\n- Each chunk becomes a row-like seed record (`chunk_text`) that you can reference in prompts with `{{ chunk_text }} `",
        "note_color": "#F3E8FF",
        "note_opacity": "35"
      },
      {
        "id": "note_2",
        "x": 26.758540311329455,
        "y": 963.3465578835235,
        "width": 400,
        "node_type": "markdown_note",
        "name": "note_2",
        "markdown": "##### Chunking settings:\n\n- **Chunk size**: how much text per chunk\n- **Chunk overlap**: shared text between neighboring chunks to preserve context\n\n##### Sampling settings:\n\n- **Ordered**: keep original document order\n- **Shuffle**: randomize chunk order\n- **Selection index / selection settings**: choose which part/subset of seed data to use",
        "note_color": "#F3E8FF",
        "note_opacity": "35"
      },
      {
        "id": "note_3",
        "x": 473.62551435180245,
        "y": 741.5352258256931,
        "width": 400,
        "node_type": "markdown_note",
        "name": "note_3",
        "markdown": "- LLM prompt: `{{ chunk_text }}`\n- Expression block: combine/format values using `{{ chunk_text }}`\n- Processor templates: use `{{ chunk_text }}` during transforms\n\nTip:\n- Start with medium chunk size + small overlap.\n- Increase overlap only if answers lose context between chunks.",
        "note_color": "#F3E8FF",
        "note_opacity": "35"
      },
      {
        "id": "seed",
        "x": 484.36210245413577,
        "y": 1059.99180558796,
        "width": 400
      },
      {
        "id": "provider_1",
        "x": 960,
        "y": 622,
        "width": 400
      },
      {
        "id": "provider_column",
        "x": 960,
        "y": 816,
        "width": 400
      },
      {
        "id": "llm_structured_1",
        "x": 960,
        "y": 1077,
        "width": 400
      }
    ],
    "edges": [
      {
        "from": "provider_1",
        "to": "provider_column",
        "type": "semantic",
        "source_handle": "semantic-out-bottom",
        "target_handle": "semantic-in-top"
      },
      {
        "from": "provider_column",
        "to": "llm_structured_1",
        "type": "semantic",
        "source_handle": "semantic-out-bottom",
        "target_handle": "data-in-top"
      },
      {
        "from": "llm_structured_1",
        "to": "seed",
        "type": "canvas",
        "source_handle": "data-out-left",
        "target_handle": "data-in-right"
      }
    ],
    "layout_direction": "LR",
    "seed_source_type": "unstructured",
    "seed_columns": [],
    "seed_drop_columns": [],
    "seed_preview_rows": [],
    "local_file_name": "",
    "unstructured_file_name": "",
    "unstructured_chunk_size": "1200",
    "unstructured_chunk_overlap": "200"
  }
}

================================================
FILE: studio/frontend/src/features/data-recipes/learning-recipes/structured-outputs-jinja.json
================================================
{
  "recipe": {
    "model_providers": [
      {
        "name": "provider_column",
        "endpoint": "",
        "provider_type": "openai",
        "extra_headers": {},
        "extra_body": {}
      }
    ],
    "mcp_providers": [],
    "model_configs": [
      {
        "alias": "ministral",
        "model": "",
        "provider": "provider_column",
        "inference_parameters": {
          "temperature": 0.7
        }
      }
    ],
    "tool_configs": [],
    "columns": [
      {
        "column_type": "sampler",
        "name": "user",
        "drop": true,
        "sampler_type": "person_from_faker",
        "params": {}
      },
      {
        "column_type": "sampler",
        "name": "platform",
        "drop": false,
        "sampler_type": "category",
        "params": {
          "values": [
            "web",
            "mobile",
            "cli"
          ]
        }
      },
      {
        "column_type": "sampler",
        "name": "impact_scope",
        "drop": false,
        "sampler_type": "category",
        "params": {
          "values": [
            "single_user",
            "team",
            "org_wide"
          ]
        }
      },
      {
        "column_type": "expression",
        "name": "user_first_name",
        "drop": false,
        "expr": "{{ user.first_name }}",
        "dtype": "str"
      },
      {
        "column_type": "expression",
        "name": "user_full_name",
        "drop": false,
        "expr": "{{ user.first_name }} {{ user.last_name }}",
        "dtype": "str"
      },
      {
        "column_type": "llm-structured",
        "name": "ticket",
        "drop": false,
        "model_alias": "ministral",
        "prompt": "Create a realistic support ticket from {{ user_full_name }} using the {{ platform }} platform. Impact scope is {{ impact_scope }}.\n",
        "with_trace": "none",
        "extract_reasoning_content": false,
        "output_format": {
          "type": "object",
          "additionalProperties": false,
          "required": [
            "issue_title",
            "issue_summary",
            "category",
            "priority"
          ],
          "properties": {
            "issue_title": {
              "type": "string",
              "description": "Short title of issue"
            },
            "issue_summary": {
              "type": "string",
              "description": "1-2 sentence summary"
            },
            "category": {
              "type": "string",
              "enum": [
                "account",
                "billing",
                "api",
                "infra"
              ],
              "description": "Issue category"
            },
            "priority": {
              "type": "string",
              "enum": [
                "P1",
                "P2",
                "P3"
              ],
              "description": "Urgency level"
            }
          }
        }
      },
      {
        "column_type": "expression",
        "name": "sla_target",
        "drop": false,
        "expr": "{% if impact_scope == 'org_wide' %}15m\n{% elif impact_scope == 'team' %}1h\n{% else %}4h\n{% endif %}",
        "dtype": "str"
      },
      {
        "column_type": "llm-structured",
        "name": "agent_reply",
        "drop": false,
        "model_alias": "ministral",
        "prompt": "Write a concise support reply for ticket '{{ ticket.issue_title }}'. Category: {{ ticket.category }}. Priority: {{ ticket.priority }}. SLA target: {{ sla_target }}. {% if ticket.priority == 'P1' %}Tone must be urgent and action-first.{% else %}Tone must be calm and instructional.{% endif %}",
        "with_trace": "none",
        "extract_reasoning_content": false,
        "output_format": {
          "type": "object",
          "additionalProperties": false,
          "required": [
            "response",
            "next_action"
          ],
          "properties": {
            "response": {
              "type": "string",
              "description": "Support response to user"
            },
            "next_action": {
              "type": "string",
              "enum": [
                "ask_logs",
                "reset_credentials",
                "escalate",
                "provide_steps"
              ],
              "description": "Primary next action"
            }
          }
        }
      }
    ],
    "processors": []
  },
  "run": {
    "rows": 5,
    "preview": true,
    "output_formats": [
      "jsonl"
    ]
  },
  "ui": {
    "nodes": [
      {
        "id": "note_1",
        "x": 990.3973509933774,
        "y": 1487.5768211920529,
        "width": 782,
        "node_type": "markdown_note",
        "name": "note_1",
        "markdown": "## Expression columns \nAre like lightweight spreadsheet formulas.\nUse them when you want to transform existing columns quickly, without calling an LLM.\n\n### What you can do:\n\n- Use values from other columns: `{{ first_name }} {{ last_name }}`\n- Clean/format text: `{{ city | upper }}`, `{{ product_name | trim }}`\n- Conditional logic:\n  - `{% if order_total >= 100 %}VIP{% elif order_total >= 50 %}Standard{% else %}Starter{% endif %}`\n- Simple math:\n  - `{{ quantity * unit_price }}`\n  - `{{ (subtotal - discount) | round(2) }}`\n\n### Good rule:\n- If the value can be computed from existing data, use Expression first.\n- Use LLM only when you need true language generation.",
        "note_color": "#CFFAFE",
        "note_opacity": "35"
      },
      {
        "id": "note_2",
        "x": 3217.6543046357615,
        "y": 2081.596026490066,
        "width": 400,
        "node_type": "markdown_note",
        "name": "note_2",
        "markdown": "### LLM Structured block\nGenerates JSON that matches your Output Format schema.\nThink of Output Format as a contract for what the model must return.\n\n#### Prompt tips:\n\n- Reference existing columns with Jinja: `{{ column_name }}`\n- You can reference nested values too: `{{ customer.first_name }}`\n- Be explicit about what each field should contain.\n\n#### Example prompt pattern:\n\n```text\nCreate a support ticket summary.\nCustomer: {{ customer_name }}\nIssue text: {{ issue_text }}\n\nReturn data for:\n- priority\n- short_title\n- resolution_steps\n```",
        "note_color": "#CFFAFE",
        "note_opacity": "35"
      },
      {
        "id": "note_3",
        "x": 2294.4516556291387,
        "y": 2399.6099337748346,
        "width": 638,
        "node_type": "markdown_note",
        "name": "note_3",
        "markdown": "## Example output format shape (concept):\n\n```json\n{\n  \"type\": \"object\",\n  \"properties\": {\n    \"priority\": { \"type\": \"string\" },\n    \"short_title\": { \"type\": \"string\" },\n    \"resolution_steps\": { \"type\": \"array\", \"items\": { \"type\": \"string\" } }\n  },\n  \"required\": [\"priority\", \"short_title\", \"resolution_steps\"]\n}\n```",
        "note_color": "#CFFAFE",
        "note_opacity": "35"
      },
      {
        "id": "note_4",
        "x": 2544.684105960265,
        "y": 1126.5490066225163,
        "width": 399,
        "node_type": "markdown_note",
        "name": "note_4",
        "markdown": "### Model provider & Config\nEvery LLM block needs a model alias.\nThat alias comes from a Model Config.\nModel Config points to a Model Provider.\n\n#### Minimum setup:\n\n1. Create **Model Provider**\n   - Set endpoint/provider type\n   - Prefer env var auth (`api_key_env`) over hardcoded keys\n\n2. Create **Model Config**\n   - Set alias (example: `model_1`)\n   - Set model id\n   - Link to provider\n   - Tune params (temperature, max_tokens, etc.)\n\n3. In each LLM block\n   - Set `model_alias` to that alias\n\nIf alias/provider link is missing, validation/run will fail.",
        "note_color": "#CFFAFE",
        "note_opacity": "35"
      },
      {
        "id": "provider_column",
        "x": 2542,
        "y": 1696,
        "width": 400
      },
      {
        "id": "ministral",
        "x": 2542,
        "y": 1890,
        "width": 400
      },
      {
        "id": "user",
        "x": 191,
        "y": 2423,
        "width": 400
      },
      {
        "id": "platform",
        "x": 858.0384105960266,
        "y": 2286.5,
        "width": 400
      },
      {
        "id": "impact_scope",
        "x": 1342,
        "y": 2286.5,
        "width": 400
      },
      {
        "id": "user_first_name",
        "x": 1822,
        "y": 2505,
        "width": 400
      },
      {
        "id": "user_full_name",
        "x": 1822,
        "y": 1959,
        "width": 400
      },
      {
        "id": "ticket",
        "x": 2302,
        "y": 2286.5,
        "width": 400
      },
      {
        "id": "sla_target",
        "x": 1822,
        "y": 2232,
        "width": 400
      },
      {
        "id": "agent_reply",
        "x": 2782,
        "y": 2151,
        "width": 400
      }
    ],
    "edges": [
      {
        "from": "platform",
        "to": "impact_scope",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "user",
        "to": "user_first_name",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "user_full_name",
        "to": "ticket",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "user",
        "to": "platform",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "user",
        "to": "user_full_name",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "user_first_name",
        "to": "ticket",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "impact_scope",
        "to": "sla_target",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "sla_target",
        "to": "ticket",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "ticket",
        "to": "agent_reply",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "provider_column",
        "to": "ministral",
        "type": "semantic",
        "source_handle": "semantic-out-bottom",
        "target_handle": "semantic-in-top"
      },
      {
        "from": "ministral",
        "to": "ticket",
        "type": "semantic",
        "source_handle": "semantic-out-bottom",
        "target_handle": "data-in-top"
      },
      {
        "from": "ministral",
        "to": "agent_reply",
        "type": "semantic",
        "source_handle": "semantic-out-bottom",
        "target_handle": "data-in-top"
      }
    ],
    "layout_direction": "LR"
  }
}

================================================
FILE: studio/frontend/src/features/data-recipes/learning-recipes/text-to-python.json
================================================
{
  "recipe": {
    "model_providers": [
      {
        "name": "openai-compatible",
        "endpoint": "",
        "provider_type": "openai",
        "extra_headers": {},
        "extra_body": {}
      }
    ],
    "mcp_providers": [],
    "model_configs": [
      {
        "alias": "coding-model",
        "model": "",
        "provider": "openai-compatible",
        "inference_parameters": {
          "temperature": 0.7
        }
      }
    ],
    "tool_configs": [],
    "columns": [
      {
        "column_type": "sampler",
        "name": "domain",
        "drop": false,
        "sampler_type": "category",
        "params": {
          "values": [
            "Data Processing",
            "Web API",
            "Automation"
          ]
        }
      },
      {
        "column_type": "sampler",
        "name": "task_type",
        "drop": false,
        "sampler_type": "subcategory",
        "params": {
          "category": "domain",
          "values": {
            "Data Processing": [
              "CSV cleaning",
              "JSON transform",
              "deduplicate rows"
            ],
            "Web API": [
              "GET endpoint",
              "POST validation",
              "pagination helper"
            ],
            "Automation": [
              "file organizer",
              "log parser",
              "daily report script"
            ]
          }
        }
      },
      {
        "column_type": "llm-text",
        "name": "instruction",
        "drop": false,
        "model_alias": "coding-model",
        "prompt": "Write one clear Python coding instruction.\nDomain: {{ domain }}\nTask type: {{ task_type }}\n\nKeep it practical and specific.\nReturn only the instruction without any code.",
        "with_trace": "none",
        "extract_reasoning_content": false
      },
      {
        "column_type": "llm-code",
        "name": "code_implementation",
        "drop": false,
        "model_alias": "coding-model",
        "prompt": "Write Python code for:\n{{ instruction }}\n\nRequirements:\n- runnable script or function\n- include needed imports\n- short comments only where useful\n- no markdown fences",
        "with_trace": "none",
        "extract_reasoning_content": false,
        "code_lang": "python"
      },
      {
        "column_type": "llm-judge",
        "name": "code_judge_result",
        "drop": false,
        "model_alias": "coding-model",
        "prompt": "Evaluate generated Python code against the instruction.\n\nInstruction:\n{{ instruction }}\n\nCode:\n{{ code_implementation }}",
        "with_trace": "none",
        "extract_reasoning_content": false,
        "scores": [
          {
            "name": "Correctness",
            "description": "Follows instruction and is executable",
            "options": {
              "0": "bad",
              "1": "partial",
              "2": "good",
              "3": "excellent"
            }
          }
        ]
      }
    ],
    "processors": []
  },
  "run": {
    "rows": 5,
    "preview": true,
    "output_formats": [
      "jsonl"
    ]
  },
  "ui": {
    "nodes": [
      {
        "id": "note_1",
        "x": 1526,
        "y": 1790.75,
        "width": 568,
        "node_type": "markdown_note",
        "name": "note_1",
        "markdown": "The **LLM Code** block is where Python code is generated from your instruction/prompt.\n\n##### How it works in this recipe:\n\n- You provide a clear prompt (often using Jinja references from earlier columns)\n- The model returns a response\n- The block extracts code content directly for the output column\n\n##### Current status:\n\n- We are **not** running Python lint/syntax validation in this recipe yet (Soon)\n- Validation support is planned and will be added\n\n##### What this means:\n\n- You may get mostly correct code, but some rows can still have syntax/style issues\n- Keep prompts specific and constrained to reduce bad outputs\n\n##### Tip:\n\n- Ask for one self-contained function/script\n- Ask for required imports\n- Ask for no markdown fences if you want cleaner extraction\n",
        "note_color": "#FEF3C7",
        "note_opacity": "35"
      },
      {
        "id": "note_2",
        "x": 2597.376821192053,
        "y": 1233.2039735099338,
        "width": 471,
        "node_type": "markdown_note",
        "name": "note_2",
        "markdown": "The **LLM Judge** block evaluates generated outputs with rubric-style scores.\n\n##### Important:\n\n- A judge can have **one or many scores**\n- Each score has:\n  - a name (for example: `Correctness`)\n  - a description\n  - options (value + meaning)\n\n##### Example multi-score setup:\n\n- Correctness\n- Readability\n- Efficiency\n\n##### Why use multiple scores:\n\n- You get richer quality signals than a single pass/fail\n- Easier filtering and weighting later in training data prep\n\n##### Practical pattern:\n\n1. Generate code with LLM Code\n2. Judge with 2-4 focused scores\n3. Keep high-quality rows based on score thresholds\n",
        "note_color": "#FEF3C7",
        "note_opacity": "35"
      },
      {
        "id": "openai-compatible",
        "x": 1627.1046357615896,
        "y": 921.0301324503313,
        "width": 400
      },
      {
        "id": "coding-model",
        "x": 1627.1046357615894,
        "y": 1138.910927152318,
        "width": 400
      },
      {
        "id": "domain",
        "x": 84,
        "y": 1600.5,
        "width": 400
      },
      {
        "id": "task_type",
        "x": 648,
        "y": 1600.5,
        "width": 400
      },
      {
        "id": "instruction",
        "x": 1128,
        "y": 1567,
        "width": 400
      },
      {
        "id": "code_implementation",
        "x": 1627.1046357615894,
        "y": 1531.4728476821192,
        "width": 400
      },
      {
        "id": "code_judge_result",
        "x": 2124.617218543046,
        "y": 1567.076490066225,
        "width": 400
      }
    ],
    "edges": [
      {
        "from": "domain",
        "to": "task_type",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "task_type",
        "to": "instruction",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "openai-compatible",
        "to": "coding-model",
        "type": "semantic",
        "source_handle": "semantic-out-bottom",
        "target_handle": "semantic-in-top"
      },
      {
        "from": "instruction",
        "to": "code_implementation",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "coding-model",
        "to": "instruction",
        "type": "semantic",
        "source_handle": "semantic-out-bottom",
        "target_handle": "data-in-top"
      },
      {
        "from": "coding-model",
        "to": "code_implementation",
        "type": "semantic",
        "source_handle": "semantic-out-bottom",
        "target_handle": "data-in-top"
      },
      {
        "from": "code_implementation",
        "to": "code_judge_result",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "coding-model",
        "to": "code_judge_result",
        "type": "semantic",
        "source_handle": "semantic-out-bottom",
        "target_handle": "data-in-top"
      }
    ],
    "layout_direction": "LR"
  }
}

================================================
FILE: studio/frontend/src/features/data-recipes/learning-recipes/text-to-sql.json
================================================
{
  "recipe": {
    "model_providers": [
      {
        "name": "vllm",
        "endpoint": "",
        "provider_type": "openai",
        "extra_headers": {},
        "extra_body": {}
      }
    ],
    "mcp_providers": [],
    "model_configs": [
      {
        "alias": "sql-pro",
        "model": "",
        "provider": "vllm",
        "inference_parameters": {
          "temperature": 0.7
        }
      }
    ],
    "tool_configs": [],
    "columns": [
      {
        "column_type": "sampler",
        "name": "domain",
        "drop": true,
        "sampler_type": "category",
        "params": {
          "values": [
            "Ecommerce",
            "Customer Support",
            "Finance"
          ]
        }
      },
      {
        "column_type": "sampler",
        "name": "topic",
        "drop": true,
        "sampler_type": "subcategory",
        "params": {
          "category": "domain",
          "values": {
            "Ecommerce": [
              "Orders and Revenue",
              "Returns and Refunds",
              "Product Performance"
            ],
            "Customer Support": [
              "Ticket Resolution",
              "SLA Compliance",
              "Agent Productivity"
            ],
            "Finance": [
              "Invoices and Payments",
              "Subscription Churn",
              "Monthly Cashflow"
            ]
          }
        }
      },
      {
        "column_type": "sampler",
        "name": "sql_task_type",
        "drop": true,
        "sampler_type": "category",
        "params": {
          "values": [
            "Filtering",
            "Aggregation",
            "Join Analysis",
            "Trend Reporting"
          ]
        }
      },
      {
        "column_type": "sampler",
        "name": "instruction_phrase",
        "drop": true,
        "sampler_type": "category",
        "params": {
          "values": [
            "Write a SQL query that",
            "Create a SQL statement to",
            "Develop a SQL query to"
          ]
        }
      },
      {
        "column_type": "llm-text",
        "name": "sql_prompt",
        "drop": false,
        "model_alias": "sql-pro",
        "prompt": "Generate one natural-language SQL task.\nContext:\n- Domain: {{ domain }}\n- Topic: {{ topic }}\n- Task type: {{ sql_task_type }}\nRules:\n- Must start exactly with: \"{{ instruction_phrase }}\"\n- Make it specific and practical.\n- Mention expected business outcome.\n- Keep it 1-2 sentences.\n- Do not include SQL code.\n- Output only the instruction text.",
        "system_prompt": "You create clear, realistic business SQL tasks for training data.\n",
        "with_trace": "none",
        "extract_reasoning_content": false
      },
      {
        "column_type": "llm-code",
        "name": "sql",
        "drop": false,
        "model_alias": "sql-pro",
        "prompt": "Write SQL for this instruction:\n{{ sql_prompt }}\nReturn ONE SQL script with this exact structure:\n-- SCHEMA\n[CREATE TABLE statements]\n[INSERT statements with sample rows]\n-- QUERY\n[final SELECT query solving the instruction]\nRules:\n- Use 2-3 tables max.\n- Use realistic snake_case names.\n- Include 5-8 rows of sample data per table.\n- Query must match task type \"{{ sql_task_type }}\".\n- Use only tables/columns you created.\n- No markdown fences.\n- No explanation text outside SQL comments shown above.",
        "system_prompt": "You are an expert SQL engineer. Produce correct, runnable SQL only.\n",
        "with_trace": "none",
        "extract_reasoning_content": false,
        "code_lang": "sql:ansi"
      },
      {
        "column_type": "validation",
        "name": "sql-validator",
        "drop": false,
        "target_columns": [
          "sql"
        ],
        "validator_type": "code",
        "validator_params": {
          "code_lang": "sql:ansi"
        },
        "batch_size": 10
      }
    ],
    "processors": []
  },
  "run": {
    "rows": 5,
    "preview": true,
    "output_formats": [
      "jsonl"
    ]
  },
  "ui": {
    "nodes": [
      {
        "id": "note_1",
        "x": 338,
        "y": 1020,
        "width": 600,
        "node_type": "markdown_note",
        "name": "note_1",
        "markdown": "##### This recipe starts with **sampler columns** to create controlled SQL task context:\n\n- `domain`\n- `topic` (subcategory from `domain`)\n- `sql_task_type`\n- `instruction_phrase`\n\n##### Why this is useful:\n\n- You get diverse tasks without writing every prompt by hand\n- You can steer business context + task pattern in a predictable way\n- LLM prompts become cleaner because context is already structured",
        "note_color": "#DBEAFE",
        "note_opacity": "35"
      },
      {
        "id": "note_2",
        "x": 1675.8410596026492,
        "y": 1644.2185430463576,
        "width": 400,
        "node_type": "markdown_note",
        "name": "note_2",
        "markdown": "The **LLM Text** block (`sql_prompt`) turns sampler context into one clean natural-language SQL task.\n\n##### Prompt pattern in this recipe:\n\n- references prior columns with Jinja (`{{ domain }}`, `{{ topic }}`, etc.)\n- enforces start phrase with `{{ instruction_phrase }}`\n- returns instruction text only (no SQL yet)\n\n##### Tip:\n\n- Keep this instruction block concise and specific\n- Save implementation details for the next SQL generation block",
        "note_color": "#DBEAFE",
        "note_opacity": "35"
      },
      {
        "id": "note_3",
        "x": 2198.980132450331,
        "y": 1723.1456953642385,
        "width": 400,
        "node_type": "markdown_note",
        "name": "note_3",
        "markdown": "The **LLM Code** block (`sql`) generates SQL script from `{{ sql_prompt }}`.\n\n##### In this recipe it returns:\n\n- schema section (`CREATE TABLE`)\n- sample seed rows (`INSERT`)\n- final query (`SELECT`)\n",
        "note_color": "#DBEAFE",
        "note_opacity": "35"
      },
      {
        "id": "note_4",
        "x": 1264,
        "y": 1037,
        "width": 400,
        "node_type": "markdown_note",
        "name": "note_4",
        "markdown": "Sampler columns are useful during generation, but often noisy in final output.\n\nSet helper columns to **drop=true** (like in this recipe), keep only output columns you want to export.\n\n#### Final keep we have set here:\n\n- `sql_prompt`\n- `sql`\n\n",
        "note_color": "#DBEAFE",
        "note_opacity": "35"
      },
      {
        "id": "vllm",
        "x": 1939.5364238410598,
        "y": 781.25,
        "width": 400
      },
      {
        "id": "sql-pro",
        "x": 1939.5364238410593,
        "y": 975.25,
        "width": 400
      },
      {
        "id": "domain",
        "x": 680,
        "y": 1495,
        "width": 400
      },
      {
        "id": "topic",
        "x": 1160,
        "y": 1413,
        "width": 400
      },
      {
        "id": "sql_task_type",
        "x": 1160,
        "y": 1577,
        "width": 400
      },
      {
        "id": "instruction_phrase",
        "x": 100,
        "y": 1495,
        "width": 400
      },
      {
        "id": "sql_prompt",
        "x": 1672.6490066225165,
        "y": 1457.6854304635763,
        "width": 400
      },
      {
        "id": "sql",
        "x": 2194.9006622516554,
        "y": 1457.110927152318,
        "width": 400
      },
      {
        "id": "sql-validator",
        "x": 2682.5827814569534,
        "y": 1491.0413907284767,
        "width": 400
      }
    ],
    "edges": [
      {
        "from": "domain",
        "to": "topic",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "domain",
        "to": "sql_task_type",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "instruction_phrase",
        "to": "domain",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "topic",
        "to": "sql_prompt",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "sql_prompt",
        "to": "sql",
        "type": "canvas",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "vllm",
        "to": "sql-pro",
        "type": "semantic",
        "source_handle": "semantic-out-bottom",
        "target_handle": "semantic-in-top"
      },
      {
        "from": "sql-pro",
        "to": "sql",
        "type": "semantic",
        "source_handle": "semantic-out-bottom",
        "target_handle": "data-in-top"
      },
      {
        "from": "sql",
        "to": "sql-validator",
        "type": "semantic",
        "source_handle": "data-out",
        "target_handle": "data-in"
      },
      {
        "from": "sql-pro",
        "to": "sql_prompt",
        "type": "semantic",
        "source_handle": "semantic-out-bottom",
        "target_handle": "data-in-top"
      },
      {
        "from": "sql_prompt",
        "to": "sql_task_type",
        "type": "canvas",
        "source_handle": "data-out-left",
        "target_handle": "data-in-right"
      }
    ],
    "layout_direction": "LR"
  }
}

================================================
FILE: studio/frontend/src/features/data-recipes/pages/data-recipes-page.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import {
  Dialog,
  DialogContent,
  DialogDescription,
  DialogHeader,
  DialogTitle,
} from "@/components/ui/dialog";
import {
  DropdownMenu,
  DropdownMenuContent,
  DropdownMenuItem,
  DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu";
import {
  Empty,
  EmptyContent,
  EmptyDescription,
  EmptyHeader,
  EmptyMedia,
  EmptyTitle,
} from "@/components/ui/empty";
import { ShineBorder } from "@/components/ui/shine-border";
import { toastError } from "@/shared/toast";
import {
  Album02Icon,
  ArrowDown01Icon,
  CodeIcon,
  CookBookIcon,
  Database02Icon,
  Delete02Icon,
  DocumentAttachmentIcon,
  FunctionIcon,
  Plant01Icon,
  PlusSignIcon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { useNavigate } from "@tanstack/react-router";
import type { ReactElement } from "react";
import { useEffect, useState } from "react";
import {
  createRecipeDraft,
  createRecipeFromLearningRecipe,
  deleteRecipe,
  primeRecipeCache,
  useRecipes,
} from "../data/recipes-db";
import { LEARNING_RECIPES } from "../learning-recipes";

const OPEN_LEARNING_RECIPES_ON_ARRIVAL_KEY =
  "data-recipes:open-learning-recipes";

type TemplateCard = {
  title: string;
  description: string;
  icon: typeof CookBookIcon;
  difficulty: "Easy" | "Starter" | "Intermediate" | "Advanced";
  learningBadges: string[];
  surfaceClassName: string;
  shineColor: string[];
  learningRecipeId?: string;
};

const TEMPLATE_CARDS: TemplateCard[] = [
  {
    title: "Instruction from Answer",
    description:
      "Start from seed answer fields and generate matching user instructions for SFT pairs.",
    icon: Plant01Icon,
    difficulty: "Easy",
    learningBadges: ["Seed Dataset", "LLM Text", "Prompting"],
    surfaceClassName:
      "from-emerald-500/15 via-green-500/5 to-transparent dark:from-emerald-400/30 dark:via-green-400/14 dark:to-emerald-950/16",
    shineColor: [
      "rgb(16 185 129 / 0.45)",
      "rgb(34 197 94 / 0.4)",
      "rgb(52 211 153 / 0.45)",
    ],
    learningRecipeId: "instruction-from-answer",
  },
  {
    title: "PDF Document QA",
    description:
      "Unstructured PDF chunks transformed into grounded question-answer training pairs.",
    icon: DocumentAttachmentIcon,
    difficulty: "Easy",
    learningBadges: ["Unstructured", "LLM Text"],
    surfaceClassName:
      "from-violet-500/15 via-fuchsia-500/5 to-transparent dark:from-violet-400/30 dark:via-fuchsia-400/14 dark:to-violet-950/16",
    shineColor: [
      "rgb(139 92 246 / 0.45)",
      "rgb(217 70 239 / 0.4)",
      "rgb(168 85 247 / 0.45)",
    ],
    learningRecipeId: "pdf-grounded-qa",
  },
  {
    title: "OCR Document Extraction",
    description:
      "Use image context from seed data to generate OCR-style extraction outputs.",
    icon: Album02Icon,
    difficulty: "Starter",
    learningBadges: ["Vision", "LLM Text", "Image Context"],
    surfaceClassName:
      "from-lime-500/15 via-emerald-500/5 to-transparent dark:from-lime-400/30 dark:via-emerald-400/14 dark:to-lime-950/16",
    shineColor: [
      "rgb(132 204 22 / 0.45)",
      "rgb(16 185 129 / 0.4)",
      "rgb(74 222 128 / 0.45)",
    ],
    learningRecipeId: "ocr-document-extraction",
  },
  {
    title: "Text to Python",
    description:
      "Instruction-to-code pairs for training models that generate clean Python implementations.",
    icon: CodeIcon,
    difficulty: "Intermediate",
    learningBadges: ["LLM Judge", "LLM Code", "Subcategory", "Category"],
    surfaceClassName:
      "from-amber-500/15 via-orange-500/5 to-transparent dark:from-amber-400/30 dark:via-orange-400/14 dark:to-amber-950/16",
    shineColor: [
      "rgb(245 158 11 / 0.45)",
      "rgb(249 115 22 / 0.4)",
      "rgb(251 146 60 / 0.45)",
    ],
    learningRecipeId: "text-to-python",
  },
  {
    title: "Text to SQL",
    description:
      "Natural language to SQL pairs, including schema-aware query construction patterns.",
    icon: Database02Icon,
    difficulty: "Intermediate",
    learningBadges: ["LLM Code", "Prompting", "Drop Columns"],
    surfaceClassName:
      "from-blue-500/15 via-indigo-500/5 to-transparent dark:from-blue-400/30 dark:via-indigo-400/14 dark:to-blue-950/16",
    shineColor: [
      "rgb(59 130 246 / 0.45)",
      "rgb(99 102 241 / 0.4)",
      "rgb(96 165 250 / 0.45)",
    ],
    learningRecipeId: "text-to-sql",
  },
  {
    title: "Structured Outputs + Jinja Expressions",
    description:
      "Support ticket triage dataset with structured JSON outputs and Jinja if/else refs.",
    icon: FunctionIcon,
    difficulty: "Advanced",
    learningBadges: ["Structured LLM", "Expression", "Jinja"],
    surfaceClassName:
      "from-cyan-500/15 via-sky-500/5 to-transparent dark:from-cyan-400/30 dark:via-sky-400/14 dark:to-cyan-950/16",
    shineColor: [
      "rgb(6 182 212 / 0.45)",
      "rgb(56 189 248 / 0.4)",
      "rgb(34 211 238 / 0.45)",
    ],
    learningRecipeId: "structured-outputs-jinja",
  },
];

const LEARNING_RECIPE_BY_ID = new Map(
  LEARNING_RECIPES.map((recipe) => [recipe.id, recipe]),
);

function formatRelativeTime(value: number): string {
  const now = Date.now();
  const diffMs = Math.max(0, now - value);
  const minute = 60 * 1000;
  const hour = 60 * minute;
  const day = 24 * hour;
  const week = 7 * day;

  if (diffMs < minute) {
    return "just now";
  }
  if (diffMs < hour) {
    const minutes = Math.floor(diffMs / minute);
    return `${minutes} minute${minutes === 1 ? "" : "s"} ago`;
  }
  if (diffMs < day) {
    const hours = Math.floor(diffMs / hour);
    return `${hours} hour${hours === 1 ? "" : "s"} ago`;
  }
  if (diffMs < week) {
    const days = Math.floor(diffMs / day);
    return `${days} day${days === 1 ? "" : "s"} ago`;
  }
  const weeks = Math.floor(diffMs / week);
  return `${weeks} week${weeks === 1 ? "" : "s"} ago`;
}

function LearningRecipeCards({
  onSelect,
  loadingTemplateId,
}: {
  onSelect: (template: TemplateCard) => void;
  loadingTemplateId: string | null;
}): ReactElement {
  return (
    <div className="grid w-full gap-4 sm:grid-cols-2 xl:grid-cols-3">
      {TEMPLATE_CARDS.map((template) => {
        const learningRecipe = template.learningRecipeId
          ? LEARNING_RECIPE_BY_ID.get(template.learningRecipeId)
          : undefined;
        const isReady = Boolean(learningRecipe);
        const isLoading =
          template.learningRecipeId !== undefined &&
          loadingTemplateId === template.learningRecipeId;
        const isDisabled = !isReady || isLoading || Boolean(loadingTemplateId);
        const visibleLearningBadges = template.learningBadges.slice(0, 4);
        const extraLearningBadgeCount = Math.max(
          0,
          template.learningBadges.length - 4,
        );
        return (
          <button
            key={template.title}
            type="button"
            disabled={isDisabled}
            onClick={() => onSelect(template)}
            className={`group shadow-border relative overflow-hidden rounded-2xl bg-gradient-to-br text-left transition-transform ${template.surfaceClassName} enabled:cursor-pointer enabled:hover:-translate-y-0.5 enabled:hover:shadow-md disabled:cursor-not-allowed disabled:opacity-70`}
          >
            <ShineBorder
              borderWidth={1.2}
              duration={13}
              shineColor={template.shineColor}
            />
            <div className="relative flex h-full min-h-40 flex-col justify-between gap-3 p-4">
              <Badge
                className="absolute right-3 top-3"
                variant={
                  template.difficulty === "Advanced" ? "secondary" : "outline"
                }
              >
                {template.difficulty}
              </Badge>
              <div className="inline-flex size-10 items-center justify-center rounded-xl border border-foreground/10 bg-background/80">
                <HugeiconsIcon
                  icon={template.icon}
                  className="size-5 text-foreground/90"
                />
              </div>
              <div className="space-y-1">
                <p className="line-clamp-2 text-sm font-semibold leading-tight text-foreground">
                  {template.title}
                </p>
                <p className="line-clamp-2 text-xs text-muted-foreground">
                  {template.description}
                </p>
              </div>
              <div className="flex items-center gap-1 overflow-hidden whitespace-nowrap">
                {isLoading ? (
                  <Badge variant="outline">Loading...</Badge>
                ) : (
                  <>
                    {visibleLearningBadges.map((badge) => (
                      <Badge
                        key={`${template.title}-${badge}`}
                        variant="outline"
                        className="h-5 shrink-0 px-1.5 text-[10px]"
                      >
                        {badge}
                      </Badge>
                    ))}
                    {extraLearningBadgeCount > 0 ? (
                      <Badge
                        variant="outline"
                        className="h-5 shrink-0 px-1.5 text-[10px]"
                      >
                        +{extraLearningBadgeCount}
                      </Badge>
                    ) : null}
                    {isReady ? null : (
                      <Badge
                        variant="secondary"
                        className="h-5 shrink-0 px-1.5 text-[10px]"
                      >
                        Soon
                      </Badge>
                    )}
                  </>
                )}
              </div>
            </div>
          </button>
        );
      })}
    </div>
  );
}

export function DataRecipesPage(): ReactElement {
  const navigate = useNavigate();
  const { recipes, ready } = useRecipes();
  const [creatingRecipe, setCreatingRecipe] = useState(false);
  const [learningDialogOpen, setLearningDialogOpen] = useState(false);
  const [loadingTemplateId, setLoadingTemplateId] = useState<string | null>(
    null,
  );

  useEffect(() => {
    if (sessionStorage.getItem(OPEN_LEARNING_RECIPES_ON_ARRIVAL_KEY) !== "1") {
      return;
    }
    sessionStorage.removeItem(OPEN_LEARNING_RECIPES_ON_ARRIVAL_KEY);
    setLearningDialogOpen(true);
  }, []);

  async function openNewRecipe(): Promise<void> {
    if (creatingRecipe || loadingTemplateId) {
      return;
    }
    setCreatingRecipe(true);
    try {
      const recipe = await createRecipeDraft();
      primeRecipeCache(recipe);
      await navigate({
        to: "/data-recipes/$recipeId",
        params: { recipeId: recipe.id },
      });
    } finally {
      setCreatingRecipe(false);
    }
  }

  async function openLearningRecipe(template: TemplateCard): Promise<void> {
    if (creatingRecipe || loadingTemplateId) {
      return;
    }
    if (!template.learningRecipeId) {
      toastError("Learning recipe not ready yet.");
      return;
    }
    const recipeTemplate = LEARNING_RECIPE_BY_ID.get(template.learningRecipeId);
    if (!recipeTemplate) {
      toastError("Learning recipe not found.");
      return;
    }

    setLoadingTemplateId(template.learningRecipeId);
    try {
      const payload = await recipeTemplate.loadPayload();
      const recipe = await createRecipeFromLearningRecipe({
        templateId: recipeTemplate.id,
        templateTitle: recipeTemplate.title,
        payload,
      });
      primeRecipeCache(recipe);
      setLearningDialogOpen(false);
      await navigate({
        to: "/data-recipes/$recipeId",
        params: { recipeId: recipe.id },
      });
    } catch (error) {
      toastError(
        "Failed to start learning recipe.",
        error instanceof Error ? error.message : undefined,
      );
    } finally {
      setLoadingTemplateId(null);
    }
  }

  function openRecipe(recipe: (typeof recipes)[number]): void {
    primeRecipeCache(recipe);
    navigate({
      to: "/data-recipes/$recipeId",
      params: { recipeId: recipe.id },
    }).catch(() => undefined);
  }

  async function handleDeleteRecipe(recipeId: string): Promise<void> {
    await deleteRecipe(recipeId);
  }

  const isBusy = creatingRecipe || Boolean(loadingTemplateId);

  return (
    <div className="min-h-screen bg-background">
      <main className="mx-auto w-full max-w-7xl px-6 py-8">
        <div className="flex items-center justify-between gap-4">
          <div>
            <h1 className="text-2xl font-semibold tracking-tight">
              Data Recipes
            </h1>
            <p className="mt-1 text-sm text-muted-foreground">
              Create and manage local recipe workflows.
            </p>
          </div>
          <DropdownMenu>
            <DropdownMenuTrigger asChild={true}>
              <Button type="button" disabled={isBusy}>
                <HugeiconsIcon icon={PlusSignIcon} className="size-4" />
                New Recipe
                <HugeiconsIcon icon={ArrowDown01Icon} className="size-4" />
              </Button>
            </DropdownMenuTrigger>
            <DropdownMenuContent align="end">
              <DropdownMenuItem
                onSelect={() => {
                  openNewRecipe().catch(() => undefined);
                }}
              >
                <HugeiconsIcon icon={PlusSignIcon} className="size-4" />
                Start Empty
              </DropdownMenuItem>
              <DropdownMenuItem
                onSelect={() => {
                  setLearningDialogOpen(true);
                }}
              >
                <HugeiconsIcon icon={CookBookIcon} className="size-4" />
                Start from Learning Recipe
              </DropdownMenuItem>
            </DropdownMenuContent>
          </DropdownMenu>
        </div>

        {!ready ? (
          <div className="mt-8 rounded-2xl border border-border/70 bg-card px-6 py-10 text-center">
            <p className="text-sm font-medium text-foreground">
              Loading recipes
            </p>
            <p className="mt-1 text-xs text-muted-foreground">
              Fetching your saved recipes and learning templates.
            </p>
          </div>
        ) : recipes.length === 0 ? (
          <Empty className="mt-8 border border-dashed border-border/70">
            <EmptyHeader>
              <EmptyMedia variant="icon">
                <HugeiconsIcon icon={CookBookIcon} className="size-5" />
              </EmptyMedia>
              <EmptyTitle>No recipes yet</EmptyTitle>
              <EmptyDescription>
                Browse Learning Recipes below to understand how recipe workflows
                work.
              </EmptyDescription>
            </EmptyHeader>
            <EmptyContent className="max-w-6xl items-stretch">
              {/*<Button*/}
              {/*  type="button"*/}
              {/*  variant="secondary"*/}
              {/*  className="mx-auto"*/}
              {/*  onClick={() => setLearningDialogOpen(true)}*/}
              {/*  disabled={isBusy}*/}
              {/*>*/}
              {/*  <HugeiconsIcon icon={CookBookIcon} className="size-4" />*/}
              {/*  Start Tutorial*/}
              {/*</Button>*/}
              <LearningRecipeCards
                onSelect={(template) => {
                  openLearningRecipe(template).catch(() => undefined);
                }}
                loadingTemplateId={loadingTemplateId}
              />
            </EmptyContent>
          </Empty>
        ) : (
          <div className="mt-8 space-y-2">
            {recipes.map((recipe) => (
              <div
                key={recipe.id}
                className="flex items-center gap-3 rounded-xl border bg-card px-4 py-3"
              >
                <button
                  type="button"
                  className="flex min-w-0 flex-1 items-center gap-3 text-left"
                  onClick={() => openRecipe(recipe)}
                >
                  <div className="flex size-9 shrink-0 items-center justify-center rounded-lg border border-border/70 bg-muted/20">
                    <HugeiconsIcon
                      icon={CookBookIcon}
                      className="size-4 text-muted-foreground"
                    />
                  </div>
                  <div className="min-w-0">
                    <div className="flex items-center gap-2">
                      <p className="truncate text-sm font-medium">
                        {recipe.name}
                      </p>
                      {recipe.learningRecipeId ? (
                        <Badge variant="outline">Learning Recipe</Badge>
                      ) : null}
                    </div>
                    <p className="text-xs text-muted-foreground">
                      Last updated {formatRelativeTime(recipe.updatedAt)} |
                      Created {formatRelativeTime(recipe.createdAt)}
                    </p>
                  </div>
                </button>
                <Button
                  type="button"
                  variant="ghost"
                  size="icon"
                  className="size-8"
                  onClick={() => {
                    handleDeleteRecipe(recipe.id).catch(() => undefined);
                  }}
                  aria-label={`Delete ${recipe.name}`}
                >
                  <HugeiconsIcon icon={Delete02Icon} className="size-4" />
                </Button>
              </div>
            ))}
          </div>
        )}
      </main>

      <Dialog open={learningDialogOpen} onOpenChange={setLearningDialogOpen}>
        <DialogContent
          className="sm:max-w-5xl"
          overlayClassName="bg-background/45 supports-backdrop-filter:backdrop-blur-[1px]"
        >
          <DialogHeader>
            <DialogTitle>Learning Recipes</DialogTitle>
            <DialogDescription>
              Start from a prebuilt recipe to learn patterns, then edit and run.
            </DialogDescription>
          </DialogHeader>
          <LearningRecipeCards
            onSelect={(template) => {
              openLearningRecipe(template).catch(() => undefined);
            }}
            loadingTemplateId={loadingTemplateId}
          />
        </DialogContent>
      </Dialog>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/data-recipes/pages/edit-recipe-page.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import { RecipeStudioPage, type RecipePayload } from "@/features/recipe-studio";
import { useNavigate } from "@tanstack/react-router";
import type { ReactElement } from "react";
import { useCallback, useEffect, useState } from "react";
import { getCachedRecipe, getRecipe, primeRecipeCache, saveRecipe } from "../data/recipes-db";
import type { RecipeRecord } from "../types";

type EditRecipePageProps = {
  recipeId: string;
};

type LoadState =
  | { status: "loading" }
  | { status: "missing" }
  | { status: "ready"; record: RecipeRecord };

function RecipeLoadState({
  title,
  description,
  onBack,
}: {
  title: string;
  description: string;
  onBack: () => void;
}): ReactElement {
  return (
    <div className="min-h-screen bg-background">
      <main className="mx-auto flex min-h-[70vh] w-full max-w-4xl items-center justify-center px-6 py-8">
        <div className="w-full rounded-2xl border bg-card p-8 text-center">
          <h1 className="text-lg font-semibold">{title}</h1>
          <p className="mt-2 text-sm text-muted-foreground">{description}</p>
          <Button type="button" variant="outline" className="mt-5" onClick={onBack}>
            Back to Recipes
          </Button>
        </div>
      </main>
    </div>
  );
}

export function EditRecipePage({ recipeId }: EditRecipePageProps): ReactElement {
  const navigate = useNavigate();
  const [loadState, setLoadState] = useState<LoadState>(() => {
    const cachedRecipe = getCachedRecipe(recipeId);
    if (cachedRecipe) {
      return { status: "ready", record: cachedRecipe };
    }
    return { status: "loading" };
  });

  useEffect(() => {
    let active = true;
    const cachedRecipe = getCachedRecipe(recipeId);
    if (cachedRecipe) {
      setLoadState({ status: "ready", record: cachedRecipe });
    } else {
      setLoadState({ status: "loading" });
    }

    void getRecipe(recipeId).then((record) => {
      if (!active) {
        return;
      }
      if (!record) {
        setLoadState({ status: "missing" });
        return;
      }
      primeRecipeCache(record);
      setLoadState({ status: "ready", record });
    });
    return () => {
      active = false;
    };
  }, [recipeId]);

  const handlePersist = useCallback(
    async (input: { id: string | null; name: string; payload: RecipePayload }) => {
      const record = await saveRecipe({
        id: input.id ?? recipeId,
        name: input.name,
        payload: input.payload,
      });
      primeRecipeCache(record);
      return { id: record.id, updatedAt: record.updatedAt };
    },
    [recipeId],
  );

  if (loadState.status === "loading") {
    return (
      <RecipeLoadState
        title="Loading recipe..."
        description="Please wait while we load your recipe."
        onBack={() => void navigate({ to: "/data-recipes" })}
      />
    );
  }

  if (loadState.status === "missing") {
    return (
      <RecipeLoadState
        title="Recipe not found"
        description="This recipe may have been deleted."
        onBack={() => void navigate({ to: "/data-recipes" })}
      />
    );
  }

  return (
    <RecipeStudioPage
      key={loadState.record.id}
      recipeId={loadState.record.id}
      initialRecipeName={loadState.record.name}
      initialPayload={loadState.record.payload}
      initialSavedAt={loadState.record.updatedAt}
      onPersistRecipe={handlePersist}
    />
  );
}


================================================
FILE: studio/frontend/src/features/data-recipes/types.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { RecipePayload } from "@/features/recipe-studio";

export type RecipeRecord = {
  id: string;
  name: string;
  payload: RecipePayload;
  createdAt: number;
  updatedAt: number;
  learningRecipeId?: string;
  learningRecipeTitle?: string;
};

export type SaveRecipeInput = {
  id?: string | null;
  name: string;
  payload: RecipePayload;
  learningRecipeId?: string;
  learningRecipeTitle?: string;
};


================================================
FILE: studio/frontend/src/features/export/anim.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export const collapseAnim = {
  initial: { height: 0, opacity: 0 },
  animate: { height: "auto" as const, opacity: 1 },
  exit: { height: 0, opacity: 0 },
  transition: { duration: 0.3, ease: [0.25, 0.1, 0.25, 1] as const },
};


================================================
FILE: studio/frontend/src/features/export/api/export-api.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { authFetch } from "@/features/auth";

async function readError(response: Response): Promise<string> {
  try {
    const payload = (await response.json()) as { detail?: string; message?: string };
    return payload.detail || payload.message || `Request failed (${response.status})`;
  } catch {
    return `Request failed (${response.status})`;
  }
}

async function parseJson<T>(response: Response): Promise<T> {
  if (!response.ok) {
    throw new Error(await readError(response));
  }
  return (await response.json()) as T;
}

export interface CheckpointInfo {
  display_name: string;
  path: string;
  loss?: number | null;
}

export interface ModelCheckpoints {
  name: string;
  checkpoints: CheckpointInfo[];
  base_model?: string | null;
  peft_type?: string | null;
  lora_rank?: number | null;
}

export interface CheckpointListResponse {
  outputs_dir: string;
  models: ModelCheckpoints[];
}

export interface ExportOperationResponse {
  success: boolean;
  message: string;
  details?: Record<string, unknown> | null;
}

export async function fetchCheckpoints(): Promise<CheckpointListResponse> {
  const response = await authFetch("/api/models/checkpoints");
  return parseJson<CheckpointListResponse>(response);
}

export async function loadCheckpoint(params: {
  checkpoint_path: string;
  max_seq_length?: number;
  load_in_4bit?: boolean;
  /** Allow loading models with custom code. Only enable for checkpoints you trust. */
  trust_remote_code?: boolean;
}): Promise<ExportOperationResponse> {
  const response = await authFetch("/api/export/load-checkpoint", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(params),
  });
  return parseJson<ExportOperationResponse>(response);
}

export async function exportMerged(params: {
  save_directory: string;
  format_type?: string;
  push_to_hub?: boolean;
  repo_id?: string | null;
  hf_token?: string | null;
  private?: boolean;
}): Promise<ExportOperationResponse> {
  const response = await authFetch("/api/export/export/merged", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(params),
  });
  return parseJson<ExportOperationResponse>(response);
}

export async function exportBase(params: {
  save_directory: string;
  push_to_hub?: boolean;
  repo_id?: string | null;
  hf_token?: string | null;
  private?: boolean;
  base_model_id?: string | null;
}): Promise<ExportOperationResponse> {
  const response = await authFetch("/api/export/export/base", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(params),
  });
  return parseJson<ExportOperationResponse>(response);
}

export async function exportGGUF(params: {
  save_directory: string;
  quantization_method: string;
  push_to_hub?: boolean;
  repo_id?: string | null;
  hf_token?: string | null;
}): Promise<ExportOperationResponse> {
  const response = await authFetch("/api/export/export/gguf", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(params),
  });
  return parseJson<ExportOperationResponse>(response);
}

export async function exportLoRA(params: {
  save_directory: string;
  push_to_hub?: boolean;
  repo_id?: string | null;
  hf_token?: string | null;
  private?: boolean;
}): Promise<ExportOperationResponse> {
  const response = await authFetch("/api/export/export/lora", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(params),
  });
  return parseJson<ExportOperationResponse>(response);
}

export async function cleanupExport(): Promise<ExportOperationResponse> {
  const response = await authFetch("/api/export/cleanup", { method: "POST" });
  return parseJson<ExportOperationResponse>(response);
}


================================================
FILE: studio/frontend/src/features/export/components/export-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import {
  Dialog,
  DialogContent,
  DialogDescription,
  DialogFooter,
  DialogHeader,
  DialogTitle,
} from "@/components/ui/dialog";
import { Input } from "@/components/ui/input";
import {
  InputGroup,
  InputGroupAddon,
  InputGroupInput,
} from "@/components/ui/input-group";
import { Spinner } from "@/components/ui/spinner";
import { Switch } from "@/components/ui/switch";
import { AlertCircleIcon, ArrowRight01Icon, CheckmarkCircle02Icon, Key01Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { AnimatePresence, motion } from "motion/react";
import { collapseAnim } from "../anim";
import { EXPORT_METHODS, type ExportMethod } from "../constants";

type Destination = "local" | "hub";

interface ExportDialogProps {
  open: boolean;
  onOpenChange: (open: boolean) => void;
  checkpoint: string | null;
  exportMethod: ExportMethod | null;
  quantLevels: string[];
  estimatedSize: string;
  baseModelName: string;
  isAdapter: boolean;
  destination: Destination;
  onDestinationChange: (v: Destination) => void;
  hfUsername: string;
  onHfUsernameChange: (v: string) => void;
  modelName: string;
  onModelNameChange: (v: string) => void;
  hfToken: string;
  onHfTokenChange: (v: string) => void;
  privateRepo: boolean;
  onPrivateRepoChange: (v: boolean) => void;
  onExport: () => void;
  exporting: boolean;
  exportError: string | null;
  exportSuccess: boolean;
}

export function ExportDialog({
  open,
  onOpenChange,
  checkpoint,
  exportMethod,
  quantLevels,
  estimatedSize: _estimatedSize,
  baseModelName,
  isAdapter,
  destination,
  onDestinationChange,
  hfUsername,
  onHfUsernameChange,
  modelName,
  onModelNameChange,
  hfToken,
  onHfTokenChange,
  privateRepo,
  onPrivateRepoChange,
  onExport,
  exporting,
  exportError,
  exportSuccess,
}: ExportDialogProps) {
  return (
    <Dialog
      open={open}
      onOpenChange={(v) => {
        if (exporting) return;
        onOpenChange(v);
      }}
    >
      <DialogContent className="sm:max-w-lg" onInteractOutside={(e) => { if (exporting) e.preventDefault(); }}>
        {exportSuccess ? (
          <>
            <div className="flex flex-col items-center gap-3 py-6">
              <div className="flex size-12 items-center justify-center rounded-full bg-emerald-500/10">
                <HugeiconsIcon icon={CheckmarkCircle02Icon} className="size-6 text-emerald-500" />
              </div>
              <div className="text-center">
                <h3 className="text-lg font-semibold">Export Complete</h3>
                <p className="mt-1 text-sm text-muted-foreground">
                  {destination === "hub"
                    ? "Model successfully pushed to Hugging Face Hub."
                    : "Model saved locally."}
                </p>
              </div>
            </div>
            <DialogFooter>
              <Button onClick={() => onOpenChange(false)}>Done</Button>
            </DialogFooter>
          </>
        ) : (
          <>
            <DialogHeader>
              <DialogTitle>Export Model</DialogTitle>
              <DialogDescription>
                Choose where to save your exported model.
              </DialogDescription>
            </DialogHeader>

            <div className="flex gap-2">
              <Button
                variant={destination === "local" ? "dark" : "outline"}
                onClick={() => onDestinationChange("local")}
                disabled={exporting}
                className="flex-1"
              >
                Save Locally
              </Button>
              <Button
                variant={destination === "hub" ? "dark" : "outline"}
                onClick={() => onDestinationChange("hub")}
                disabled={exporting}
                className="flex-1"
              >
                Push to Hub
              </Button>
            </div>

            <AnimatePresence>
              {destination === "hub" && (
                <motion.div {...collapseAnim} className="overflow-hidden">
                  <div className="flex flex-col gap-4 px-0.5">
                    <div className="grid grid-cols-2 gap-3">
                      <div className="flex flex-col gap-1.5">
                        <label className="text-xs font-medium text-muted-foreground">
                          Username / Org
                        </label>
                        <Input
                          placeholder="your-username"
                          value={hfUsername}
                          onChange={(e) => onHfUsernameChange(e.target.value)}
                          disabled={exporting}
                        />
                      </div>
                      <div className="flex flex-col gap-1.5">
                        <label className="text-xs font-medium text-muted-foreground">
                          Model Name
                        </label>
                        <Input
                          placeholder="my-model-gguf"
                          value={modelName}
                          onChange={(e) => onModelNameChange(e.target.value)}
                          disabled={exporting}
                        />
                      </div>
                    </div>

                    <div className="flex flex-col gap-1.5">
                      <div className="flex items-center justify-between">
                        <label className="text-xs font-medium text-muted-foreground">
                          HF Write Token
                        </label>
                        <a
                          href="https://huggingface.co/settings/tokens"
                          target="_blank"
                          rel="noopener noreferrer"
                          className="flex items-center gap-1 text-[11px] text-emerald-600 hover:text-emerald-700 transition-colors"
                        >
                          Get token
                          <HugeiconsIcon
                            icon={ArrowRight01Icon}
                            className="size-3"
                          />
                        </a>
                      </div>
                      <InputGroup>
                        <InputGroupAddon>
                          <HugeiconsIcon icon={Key01Icon} className="size-4" />
                        </InputGroupAddon>
                        <InputGroupInput
                          type="password"
                          autoComplete="new-password"
                          name="hf-token"
                          placeholder="hf_..."
                          value={hfToken}
                          onChange={(e) => onHfTokenChange(e.target.value)}
                          disabled={exporting}
                        />
                      </InputGroup>
                      <p className="text-[11px] text-muted-foreground/70">
                        Leave empty if already logged in via CLI.
                      </p>
                    </div>

                    <div className="flex items-center gap-3">
                      <Switch
                        id="private-repo"
                        size="sm"
                        checked={privateRepo}
                        onCheckedChange={onPrivateRepoChange}
                        disabled={exporting}
                      />
                      <label
                        htmlFor="private-repo"
                        className="text-xs font-medium cursor-pointer"
                      >
                        Private Repository
                      </label>
                    </div>
                  </div>
                </motion.div>
              )}
            </AnimatePresence>

            {/* Error banner */}
            {exportError && (
              <div className="flex items-start gap-2 rounded-lg bg-destructive/10 p-3 text-sm text-destructive">
                <HugeiconsIcon icon={AlertCircleIcon} className="size-4 mt-0.5 shrink-0" />
                <span>{exportError}</span>
              </div>
            )}

            {/* Summary */}
            <div className="rounded-xl bg-muted/50 p-3 text-xs text-muted-foreground flex flex-col gap-1">
              <div className="flex justify-between">
                <span>Base Model</span>
                <span className="font-medium text-foreground">{baseModelName}</span>
              </div>
              <div className="flex justify-between">
                <span>{isAdapter ? "Checkpoint" : "Model"}</span>
                <span className="font-medium text-foreground">{checkpoint}</span>
              </div>
              <div className="flex justify-between">
                <span>Export Method</span>
                <span className="font-medium text-foreground">
                  {EXPORT_METHODS.find((m) => m.value === exportMethod)?.title}
                </span>
              </div>
              {exportMethod === "gguf" && quantLevels.length > 0 && (
                <div className="flex justify-between">
                  <span>Quantizations</span>
                  <span className="font-medium text-foreground">
                    {quantLevels.join(", ")}
                  </span>
                </div>
              )}
              {/* TODO: unhide once estimated size comes from the backend API */}
              {/* <div className="flex justify-between">
            <span>Est. size</span>
            <span className="font-medium text-foreground">{estimatedSize}</span>
          </div> */}
            </div>

            <DialogFooter>
              <Button
                variant="outline"
                onClick={() => onOpenChange(false)}
                disabled={exporting}
              >
                Cancel
              </Button>
              <Button onClick={onExport} disabled={exporting}>
                {exporting ? (
                  <span className="flex items-center gap-2">
                    <Spinner className="size-4" />
                    Exporting…
                  </span>
                ) : (
                  "Start Export"
                )}
              </Button>
            </DialogFooter>
          </>
        )}
      </DialogContent>
    </Dialog>
  );
}


================================================
FILE: studio/frontend/src/features/export/components/method-picker.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Badge } from "@/components/ui/badge";
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import { cn } from "@/lib/utils";
import {
  CheckmarkCircle01Icon,
  InformationCircleIcon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { EXPORT_METHODS, type ExportMethod } from "../constants";

interface MethodPickerProps {
  value: ExportMethod | null;
  onChange: (v: ExportMethod) => void;
}

export function MethodPicker({ value, onChange }: MethodPickerProps) {
  return (
    <div data-tour="export-method" className="flex flex-col gap-3">
      <span className="flex items-center gap-1.5 text-xs font-medium text-muted-foreground">
        Export Method
        <Tooltip>
          <TooltipTrigger asChild={true}>
            <button
              type="button"
              className="text-foreground/70 hover:text-foreground"
            >
              <HugeiconsIcon icon={InformationCircleIcon} className="size-3" />
            </button>
          </TooltipTrigger>
          <TooltipContent>
            How your model is packaged for deployment.{" "}
            <a
              href="https://unsloth.ai/docs/basics/inference-and-deployment"
              target="_blank"
              rel="noopener noreferrer"
              className="text-primary underline"
            >
              Read more
            </a>
          </TooltipContent>
        </Tooltip>
      </span>
      <div className="grid grid-cols-3 gap-3">
        {EXPORT_METHODS.map((m) => {
          const selected = value === m.value;
          return (
            <button
              key={m.value}
              type="button"
              onClick={() => onChange(m.value)}
              className={cn(
                "flex items-start gap-3 rounded-xl p-4 text-left ring-1 transition-all",
                selected
                  ? "ring-2 ring-primary bg-primary/5"
                  : "ring-border hover:-translate-y-0.5 hover:shadow-sm",
              )}
            >
              <div
                className={cn(
                  "mt-0.5 flex size-5 shrink-0 items-center justify-center rounded-full border-2 transition-colors",
                  selected
                    ? "border-primary bg-primary"
                    : "border-muted-foreground/30",
                )}
              >
                {selected && (
                  <HugeiconsIcon
                    icon={CheckmarkCircle01Icon}
                    className="size-3 text-primary-foreground"
                  />
                )}
              </div>
              <div className="flex flex-col gap-1">
                <div className="flex items-center gap-2">
                  <span className="text-sm font-medium">{m.title}</span>
                  <Tooltip>
                    <TooltipTrigger asChild={true}>
                      <button
                        type="button"
                        className="shrink-0 text-foreground/50 hover:text-foreground cursor-help"
                        onClick={(e) => e.stopPropagation()}
                        aria-label={`${m.title} info`}
                      >
                        <HugeiconsIcon
                          icon={InformationCircleIcon}
                          className="size-3"
                        />
                      </button>
                    </TooltipTrigger>
                    <TooltipContent className="max-w-xs">
                      {m.tooltip}{" "}
                      <a
                        href={
                          m.value === "gguf"
                            ? "https://unsloth.ai/docs/basics/inference-and-deployment/saving-to-gguf"
                            : "https://unsloth.ai/docs/basics/inference-and-deployment"
                        }
                        target="_blank"
                        rel="noopener noreferrer"
                        className="text-primary underline"
                      >
                        Read more
                      </a>
                    </TooltipContent>
                  </Tooltip>
                  {m.badge && (
                    <Badge
                      variant="secondary"
                      className="text-[10px] px-1.5 py-0"
                    >
                      {m.badge}
                    </Badge>
                  )}
                </div>
                <span className="text-xs text-muted-foreground">
                  {m.description}
                </span>
              </div>
            </button>
          );
        })}
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/export/components/quant-picker.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import { cn } from "@/lib/utils";
import {
  CheckmarkCircle01Icon,
  InformationCircleIcon,
  LayersIcon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { QUANT_OPTIONS } from "../constants";

interface QuantPickerProps {
  value: string[];
  onChange: (v: string[]) => void;
}

export function QuantPicker({ value, onChange }: QuantPickerProps) {
  const toggle = (qv: string) => {
    onChange(
      value.includes(qv) ? value.filter((q) => q !== qv) : [...value, qv],
    );
  };

  return (
    <div className="flex flex-col gap-3">
      <div className="flex items-center gap-2">
        <HugeiconsIcon
          icon={LayersIcon}
          className="size-4 text-muted-foreground"
        />
        <span className="text-xs font-medium text-muted-foreground">
          Quantization Levels
        </span>
        <Tooltip>
          <TooltipTrigger asChild={true}>
            <button
              type="button"
              className="text-foreground/70 hover:text-foreground"
            >
              <HugeiconsIcon icon={InformationCircleIcon} className="size-3" />
            </button>
          </TooltipTrigger>
          <TooltipContent className="max-w-xs">
            Lower quantization (Q2, Q3) = smaller files but reduced quality.
            Q4–Q5 is a good balance.{" "}
            <a
              href="https://unsloth.ai/docs/basics/inference-and-deployment/saving-to-gguf"
              target="_blank"
              rel="noopener noreferrer"
              className="text-primary underline"
            >
              Read more
            </a>
          </TooltipContent>
        </Tooltip>
        <span className="text-[11px] text-muted-foreground/70">
          — select one or more
        </span>
      </div>
      <div className="flex flex-wrap gap-2 py-1 pl-1">
        {QUANT_OPTIONS.map((q) => {
          const active = value.includes(q.value);
          return (
            <button
              key={q.value}
              type="button"
              onClick={() => toggle(q.value)}
              className={cn(
                "inline-flex items-center gap-1.5 rounded-full px-3 py-1.5 text-xs font-medium ring-1 transition-all",
                active
                  ? "ring-primary bg-primary/10 text-foreground"
                  : "ring-border text-muted-foreground hover:text-foreground hover:ring-foreground/20",
              )}
            >
              {active && (
                <HugeiconsIcon
                  icon={CheckmarkCircle01Icon}
                  className="size-3 text-primary"
                />
              )}
              {q.label}
              <span className="text-[10px] opacity-60">{q.size}</span>
              {q.recommended && !active && (
                <span className="rounded-full bg-emerald-100 px-1.5 py-0 text-[9px] font-semibold text-emerald-700 dark:bg-emerald-900 dark:text-emerald-300">
                  rec
                </span>
              )}
            </button>
          );
        })}
      </div>
      {value.length > 0 && (
        <div className="flex items-center gap-3">
          <span className="text-[11px] text-muted-foreground">
            {value.length} selected
          </span>
          <button
            type="button"
            onClick={() => onChange([])}
            className="text-[11px] text-muted-foreground/70 hover:text-foreground transition-colors"
          >
            Clear all
          </button>
        </div>
      )}
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/export/constants.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { TrainingMethod } from "@/types/training";

export type ExportMethod = "merged" | "lora" | "gguf";

export const EXPORT_METHODS: {
  value: ExportMethod;
  title: string;
  description: string;
  tooltip: string;
  badge?: string;
}[] = [
  {
    value: "merged",
    title: "Merged Model",
    description: "Full 16-bit model ready for inference.",
    tooltip:
      "Merges adapter weights into the base model. Best for direct deployment with vLLM or TGI.",
  },
  {
    value: "lora",
    title: "LoRA Only",
    description: "Lightweight adapter files (~100 MB). Needs base model.",
    tooltip:
      "Exports only the trained adapter. Pair with the base model at inference time to save storage.",
  },
  {
    value: "gguf",
    title: "GGUF / Llama.cpp",
    description: "Quantized formats for local AI runners.",
    tooltip:
      "Converts to GGUF for llama.cpp, Ollama, and other local runners. Pick a quantization level below.",
  },
];

export const QUANT_OPTIONS = [
  { value: "q3_k_m", label: "Q3_K_M", size: "~3.5 GB" },
  { value: "q4_0", label: "Q4_0", size: "~4.1 GB" },
  { value: "q4_k_m", label: "Q4_K_M", size: "~4.8 GB", recommended: true },
  { value: "q5_0", label: "Q5_0", size: "~5.0 GB" },
  { value: "q5_k_m", label: "Q5_K_M", size: "~5.6 GB" },
  { value: "q8_0", label: "Q8_0", size: "~8.2 GB" },
  { value: "f16", label: "F16", size: "~14.2 GB" },
  { value: "f32", label: "F32", size: "~28.4 GB" },
];

export function getEstimatedSize(
  method: ExportMethod | null,
  quantLevels: string[],
) {
  const sizeOf = (v: string) =>
    QUANT_OPTIONS.find((q) => q.value === v)?.size ?? "—";
  if (method === "gguf" && quantLevels.length > 0) {
    if (quantLevels.length === 1) {
      return sizeOf(quantLevels[0]);
    }
    const total = quantLevels
      .map((q) => Number.parseFloat(sizeOf(q).replace(/[^0-9.]/g, "")))
      .reduce((a, b) => a + b, 0);
    return `~${total.toFixed(1)} GB (${quantLevels.length} files)`;
  }
  if (method === "merged") {
    return "~14.2 GB";
  }
  if (method === "lora") {
    return "~100 MB";
  }
  return "—";
}

export const METHOD_LABELS: Record<TrainingMethod, string> = {
  qlora: "QLoRA",
  lora: "LoRA",
  full: "Full Fine-tune",
};

export const GUIDE_STEPS = [
  "Select a training checkpoint to export from",
  "Choose an export method based on your use case",
  "Pick quantization levels if using GGUF",
  "Click Export and choose your destination",
  "Test your model and compare outputs in Chat",
];


================================================
FILE: studio/frontend/src/features/export/export-page.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { SectionCard } from "@/components/section-card";
import { Button } from "@/components/ui/button";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import { Separator } from "@/components/ui/separator";
import { Spinner } from "@/components/ui/spinner";
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import { useTrainingConfigStore } from "@/features/training";
import { AlertCircleIcon, InformationCircleIcon, PackageIcon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { AnimatePresence, motion } from "motion/react";
import { useCallback, useEffect, useMemo, useState } from "react";
import { useShallow } from "zustand/react/shallow";
import { collapseAnim } from "./anim";
import type { ModelCheckpoints } from "./api/export-api";
import {
  cleanupExport,
  exportBase,
  exportGGUF,
  exportLoRA,
  exportMerged,
  fetchCheckpoints,
  loadCheckpoint,
} from "./api/export-api";
import { ExportDialog } from "./components/export-dialog";
import { MethodPicker } from "./components/method-picker";
import { QuantPicker } from "./components/quant-picker";
import {
  type ExportMethod,
  GUIDE_STEPS,
  getEstimatedSize,
} from "./constants";
import { GuidedTour, useGuidedTourController } from "@/features/tour";
import { exportTourSteps } from "./tour";

export function ExportPage() {
  const { hfToken, setHfToken } = useTrainingConfigStore(
    useShallow((s) => ({
      hfToken: s.hfToken,
      setHfToken: s.setHfToken,
    })),
  );

  // ---- API-driven checkpoint state ----
  const [models, setModels] = useState<ModelCheckpoints[]>([]);
  const [loadingCheckpoints, setLoadingCheckpoints] = useState(true);
  const [checkpointError, setCheckpointError] = useState<string | null>(null);

  const [selectedModelIdx, setSelectedModelIdx] = useState<string | null>(null);
  const [checkpoint, setCheckpoint] = useState<string | null>(null);

  const [exportMethod, setExportMethod] = useState<ExportMethod | null>(null);
  const [quantLevels, setQuantLevels] = useState<string[]>([]);
  const [dialogOpen, setDialogOpen] = useState(false);

  const [destination, setDestination] = useState<"local" | "hub">("local");
  const [hfUsername, setHfUsername] = useState("");
  const [modelName, setModelName] = useState("");
  const [privateRepo, setPrivateRepo] = useState(false);

  const [exporting, setExporting] = useState(false);
  const [exportError, setExportError] = useState<string | null>(null);
  const [exportSuccess, setExportSuccess] = useState(false);

  const tour = useGuidedTourController({
    id: "export",
    steps: exportTourSteps,
  });

  // ---- Fetch checkpoints on mount ----
  useEffect(() => {
    let cancelled = false;
    setLoadingCheckpoints(true);
    setCheckpointError(null);
    fetchCheckpoints()
      .then((data) => {
        if (!cancelled) {
          setModels(data.models);
        }
      })
      .catch((err) => {
        if (!cancelled) {
          setCheckpointError(
            err instanceof Error ? err.message : "Failed to load checkpoints",
          );
        }
      })
      .finally(() => {
        if (!cancelled) setLoadingCheckpoints(false);
      });
    return () => {
      cancelled = true;
    };
  }, []);

  // ---- Derived state ----
  const selectedModelData = useMemo(
    () =>
      selectedModelIdx != null
        ? models.find((m) => m.name === selectedModelIdx) ?? null
        : null,
    [models, selectedModelIdx],
  );

  const checkpointsForModel = useMemo(
    () => selectedModelData?.checkpoints ?? [],
    [selectedModelData],
  );

  // Derive training info from selected model's API metadata
  const baseModelName = selectedModelData?.base_model ?? "—";
  const isAdapter = !!selectedModelData?.peft_type;
  const loraRank = selectedModelData?.lora_rank ?? null;
  const trainingMethodLabel = selectedModelData?.peft_type
    ? "LoRA / QLoRA"
    : "Full Fine-tune";

  // Reset checkpoint when the selected model changes
  useEffect(() => {
    setCheckpoint(null);
  }, [selectedModelIdx]);

  const handleMethodChange = (method: ExportMethod) => {
    setExportMethod(method);
    if (method !== "gguf") {
      setQuantLevels([]);
    }
  };

  const estimatedSize = getEstimatedSize(exportMethod, quantLevels);
  const canExport =
    checkpoint &&
    exportMethod &&
    (exportMethod !== "gguf" || quantLevels.length > 0);

  // ---- Export handler ----
  const handleExport = useCallback(async () => {
    if (!checkpoint) return;

    const selectedCp = checkpointsForModel.find(
      (cp) => cp.display_name === checkpoint,
    );
    if (!selectedCp) return;

    setExporting(true);
    setExportError(null);
    setExportSuccess(false);

    // For GGUF, use a flat folder like "exports/gemma-3-4b-it-finetune-gguf"
    // For other formats, nest under training-run/checkpoint
    const saveDir =
      exportMethod === "gguf"
        ? `${baseModelName.split("/").pop() ?? selectedModelIdx ?? "model"}-finetune-gguf`
        : `${selectedModelIdx ?? "model"}/${checkpoint}`;
    const pushToHub = destination === "hub";
    const repoId = pushToHub && hfUsername && modelName
      ? `${hfUsername}/${modelName}`
      : undefined;
    const token = pushToHub && hfToken ? hfToken : undefined;

    try {
      // 1. Load checkpoint
      await loadCheckpoint({ checkpoint_path: selectedCp.path });

      // 2. Run export based on method
      if (exportMethod === "merged") {
        if (isAdapter) {
          await exportMerged({
            save_directory: saveDir,
            push_to_hub: pushToHub,
            repo_id: repoId,
            hf_token: token,
            private: privateRepo,
          });
        } else {
          await exportBase({
            save_directory: saveDir,
            push_to_hub: pushToHub,
            repo_id: repoId,
            hf_token: token,
            private: privateRepo,
            base_model_id: selectedModelData?.base_model,
          });
        }
      } else if (exportMethod === "gguf") {
        for (const quant of quantLevels) {
          await exportGGUF({
            save_directory: saveDir,
            quantization_method: quant,
            push_to_hub: pushToHub,
            repo_id: repoId,
            hf_token: token,
          });
        }
      } else if (exportMethod === "lora") {
        await exportLoRA({
          save_directory: saveDir,
          push_to_hub: pushToHub,
          repo_id: repoId,
          hf_token: token,
          private: privateRepo,
        });
      }

      setExportSuccess(true);
    } catch (err) {
      setExportError(
        err instanceof Error ? err.message : "Export failed",
      );
    } finally {
      try {
        await cleanupExport();
      } catch {
        // cleanup is best-effort
      }
      setExporting(false);
    }
  }, [
    checkpoint,
    checkpointsForModel,
    selectedModelIdx,
    selectedModelData,
    exportMethod,
    isAdapter,
    quantLevels,
    destination,
    hfUsername,
    modelName,
    hfToken,
    privateRepo,
  ]);

  // ---- Render ----
  return (
    <div className="min-h-screen bg-background">
      <main className="mx-auto max-w-7xl px-4 py-4 sm:px-6">
        <GuidedTour {...tour.tourProps} />

        <div className="mb-8 flex flex-col gap-0.5">
          <h1 className="text-2xl font-semibold tracking-tight">
            Export Model
          </h1>
          <p className="text-sm text-muted-foreground">
            Export your fine-tuned model for deployment
          </p>
        </div>

        <SectionCard
          icon={<HugeiconsIcon icon={PackageIcon} className="size-5" />}
          title="Export Configuration"
          description="Select checkpoint, method, and quantization"
          accent="emerald"
          featured={true}
          className="shadow-border ring-1 ring-border"
        >
          {/* Loading / error states */}
          {loadingCheckpoints && (
            <div className="flex items-center gap-2 py-6 justify-center text-sm text-muted-foreground">
              <Spinner className="size-4" />
              Loading checkpoints…
            </div>
          )}

          {checkpointError && (
            <div className="flex items-center gap-2 py-6 justify-center text-sm text-destructive">
              <HugeiconsIcon icon={AlertCircleIcon} className="size-4" />
              {checkpointError}
            </div>
          )}

          {!loadingCheckpoints && !checkpointError && (
            <>
              {/* Top row: Dropdowns + metadata | Guide */}
              <div className="grid grid-cols-1 gap-6 md:grid-cols-2 md:gap-8">
                <div className="flex flex-col gap-4">
                  {/* Training run dropdown */}
                  <div data-tour="export-training-run" className="flex flex-col gap-2">
                    <label className="flex items-center gap-1.5 text-xs font-medium text-muted-foreground">
                      Training Run
                      <Tooltip>
                        <TooltipTrigger asChild={true}>
                          <button
                            type="button"
                            className="text-foreground/70 hover:text-foreground"
                          >
                            <HugeiconsIcon
                              icon={InformationCircleIcon}
                              className="size-3"
                            />
                          </button>
                        </TooltipTrigger>
                        <TooltipContent>
                          Select the training run that produced the checkpoints
                          you want to export.
                        </TooltipContent>
                      </Tooltip>
                    </label>
                    <Select
                      value={selectedModelIdx ?? ""}
                      onValueChange={setSelectedModelIdx}
                    >
                      <SelectTrigger className="w-full">
                        <SelectValue
                          placeholder={
                            models.length === 0
                              ? "No training runs found"
                              : "Select a training run…"
                          }
                        />
                      </SelectTrigger>
                      <SelectContent>
                        {models.map((m) => {
                          const tsMatch = m.name.match(/_(\d{10,})$/);
                          const displayName = tsMatch ? m.name.slice(0, tsMatch.index) : m.name;
                          const timeStr = tsMatch
                            ? new Date(Number(tsMatch[1]) * 1000).toLocaleString(undefined, {
                                dateStyle: "medium",
                                timeStyle: "short",
                              })
                            : null;
                          return (
                            <SelectItem key={m.name} value={m.name}>
                              <span className="flex items-center gap-2">
                                {displayName}
                                <span className="text-muted-foreground text-xs">
                                  {m.checkpoints.length} checkpoint
                                  {m.checkpoints.length !== 1 ? "s" : ""}
                                </span>
                                {timeStr && (
                                  <span className="text-muted-foreground text-xs">
                                    · {timeStr}
                                  </span>
                                )}
                              </span>
                            </SelectItem>
                          );
                        })}
                      </SelectContent>
                    </Select>
                  </div>

                  {/* Checkpoint dropdown */}
                  <div data-tour="export-checkpoint" className="flex flex-col gap-2">
                    <label className="flex items-center gap-1.5 text-xs font-medium text-muted-foreground">
                      Checkpoint
                      <Tooltip>
                        <TooltipTrigger asChild={true}>
                          <button
                            type="button"
                            className="text-foreground/70 hover:text-foreground"
                          >
                            <HugeiconsIcon
                              icon={InformationCircleIcon}
                              className="size-3"
                            />
                          </button>
                        </TooltipTrigger>
                        <TooltipContent>
                          Choose a saved checkpoint to export. Lower loss
                          generally means better quality.{" "}
                          <a
                            href="https://unsloth.ai/docs/basics/inference-and-deployment"
                            target="_blank"
                            rel="noopener noreferrer"
                            className="text-primary underline"
                          >
                            Read more
                          </a>
                        </TooltipContent>
                      </Tooltip>
                    </label>
                    <Select
                      value={checkpoint ?? ""}
                      onValueChange={setCheckpoint}
                      disabled={!selectedModelIdx}
                    >
                      <SelectTrigger className="w-full">
                        <SelectValue
                          placeholder={
                            !selectedModelIdx
                              ? "Select a training run first"
                              : checkpointsForModel.length === 0
                                ? "No checkpoints found"
                                : "Select a checkpoint…"
                          }
                        />
                      </SelectTrigger>
                      <SelectContent>
                        {checkpointsForModel.map((cp) => (
                          <SelectItem key={cp.path} value={cp.display_name}>
                            <span className="flex items-center gap-2">
                              {cp.display_name}
                              {cp.loss != null && (
                                <span className="text-muted-foreground text-xs">
                                  loss: {cp.loss.toFixed(4)}
                                </span>
                              )}
                            </span>
                          </SelectItem>
                        ))}
                      </SelectContent>
                    </Select>
                  </div>

                  <div className="rounded-xl bg-muted/50 p-3 flex flex-col gap-2">
                    <span className="text-[11px] font-medium text-muted-foreground uppercase tracking-wider">
                      Training Info
                    </span>
                    <div className="grid grid-cols-1 gap-x-6 gap-y-1.5 text-xs sm:grid-cols-2">
                      <div className="flex justify-between">
                        <span className="text-muted-foreground">Base Model</span>
                        <span className="font-medium">{baseModelName}</span>
                      </div>
                      <div className="flex justify-between">
                        <span className="text-muted-foreground">Method</span>
                        <span className="font-medium">
                          {trainingMethodLabel}
                        </span>
                      </div>
                      <div className="flex justify-between">
                        <span className="text-muted-foreground">Checkpoints</span>
                        <span className="font-medium">
                          {checkpointsForModel.length}
                        </span>
                      </div>
                      {isAdapter && (
                        <div className="flex justify-between">
                          <span className="text-muted-foreground">LoRA Rank</span>
                          <span className="font-medium">{loraRank}</span>
                        </div>
                      )}
                    </div>
                  </div>
                </div>

                <div className="flex flex-col gap-2.5">
                  <span className="text-xs font-medium text-muted-foreground">
                    Quick Guide
                  </span>
                  <ol className="flex flex-col gap-3">
                    {GUIDE_STEPS.map((step, i) => (
                      <li
                        key={step}
                        className="flex items-start gap-2 text-xs text-muted-foreground"
                      >
                        <span className="flex size-5 shrink-0 items-center justify-center rounded-full bg-muted text-[10px] font-semibold">
                          {i + 1}
                        </span>
                        {step}
                      </li>
                    ))}
                  </ol>
                </div>
              </div>

              <MethodPicker value={exportMethod} onChange={handleMethodChange} />

              <AnimatePresence>
                {exportMethod === "gguf" && (
                  <motion.div {...collapseAnim} className="overflow-hidden">
                    <QuantPicker value={quantLevels} onChange={setQuantLevels} />
                  </motion.div>
                )}
              </AnimatePresence>

              <Separator />
              <div className="flex items-center justify-end">
                {/* TODO: unhide once estimated size comes from the backend API */}
                {/* <div className="flex items-center gap-1.5 text-xs text-muted-foreground">
                  <HugeiconsIcon
                    icon={InformationCircleIcon}
                    className="size-3.5"
                  />
                  <span>Est. size: {estimatedSize} · Free disk space: 120 GB</span>
                </div> */}
                <Button
                  data-tour="export-cta"
                  disabled={!canExport}
                  onClick={() => { setExportSuccess(false); setExportError(null); setDialogOpen(true); }}
                >
                  Export Model
                </Button>
              </div>
            </>
          )}
        </SectionCard>
      </main>

      <ExportDialog
        open={dialogOpen}
        onOpenChange={setDialogOpen}
        checkpoint={checkpoint}
        exportMethod={exportMethod}
        quantLevels={quantLevels}
        estimatedSize={estimatedSize}
        baseModelName={baseModelName}
        isAdapter={isAdapter}
        destination={destination}
        onDestinationChange={setDestination}
        hfUsername={hfUsername}
        onHfUsernameChange={setHfUsername}
        modelName={modelName}
        onModelNameChange={setModelName}
        hfToken={hfToken}
        onHfTokenChange={setHfToken}
        privateRepo={privateRepo}
        onPrivateRepoChange={setPrivateRepo}
        onExport={handleExport}
        exporting={exporting}
        exportError={exportError}
        exportSuccess={exportSuccess}
      />
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/export/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export { ExportPage } from "./export-page";


================================================
FILE: studio/frontend/src/features/export/tour/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export { exportTourSteps } from "./steps";


================================================
FILE: studio/frontend/src/features/export/tour/steps.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { TourStep } from "@/features/tour";

export const exportTourSteps: TourStep[] = [
  {
    id: "training-run",
    target: "export-training-run",
    title: "Pick training run",
    body: (
      <>
        Start by selecting the training run. Each run groups the checkpoints
        produced by that specific fine-tuning job.
      </>
    ),
  },
  {
    id: "checkpoint",
    target: "export-checkpoint",
    title: "Pick checkpoint",
    body: (
      <>
        Pick which checkpoint to export. If you trained multiple checkpoints,
        it’s worth exporting 1-2 candidates and testing in Chat.
      </>
    ),
  },
  {
    id: "method",
    target: "export-method",
    title: "Export method",
    body: (
      <>
        Choose the packaging. GGUF is for llama.cpp-style runtimes (pick a
        quant). Safetensors is for HF/Transformers-style usage. If you’re unsure,
        start with safetensors.
      </>
    ),
  },
  {
    id: "cta",
    target: "export-cta",
    title: "Export",
    body: (
      <>
        Export to local or push to HF Hub. After export, test in Chat and compare
        against base to confirm behavior is what you expect.
      </>
    ),
  },
];


================================================
FILE: studio/frontend/src/features/onboarding/components/splash-screen.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import { Card } from "@/components/ui/card";
import { motion } from "motion/react";

interface SplashScreenProps {
  onStartOnboarding: () => void;
  onGoToStudio: () => void;
}

export function SplashScreen({
  onStartOnboarding,
  onGoToStudio,
}: SplashScreenProps) {
  return (
    <div className="fixed inset-0 z-50 flex items-center justify-center bg-gradient-to-b from-background via-background to-primary/5 p-6">
      <Card className="w-full max-w-md px-8 py-8 shadow-border ring-1 ring-border">
        {/* Mascot */}
        <div className="flex justify-center">
          <motion.img
            src="/Sloth emojis/Sloth loca pc.png"
            alt="Sloth mascot"
            className="size-30"
            initial={{ opacity: 0, y: 40, scale: 0.95 }}
            animate={{ opacity: 1, y: 0, scale: 1 }}
            transition={{
              type: "spring",
              duration: 0.7,
              bounce: 0.3,
              delay: 0.1,
            }}
          />
        </div>

        {/* Brand text */}
        <motion.div
          className="mt-4 flex flex-col items-center gap-1"
          initial={{ opacity: 0, y: 10 }}
          animate={{ opacity: 1, y: 0 }}
          transition={{
            duration: 0.4,
            ease: [0.165, 0.84, 0.44, 1],
            delay: 0.4,
          }}
        >
          <h1 className="text-2xl font-semibold tracking-tight">
            Unsloth Studio
          </h1>
          <p className="text-sm text-muted-foreground">Train and run LLMs locally</p>
        </motion.div>

        {/* Buttons */}
        <motion.div
          className="mt-8 flex flex-col gap-3"
          initial={{ opacity: 0, y: 10 }}
          animate={{ opacity: 1, y: 0 }}
          transition={{
            duration: 0.4,
            ease: [0.165, 0.84, 0.44, 1],
            delay: 0.8,
          }}
        >
          <Button size="lg" onClick={onStartOnboarding}>
            Start Onboarding
          </Button>
          <Button size="lg" variant="outline" onClick={onGoToStudio}>
            Skip Onboarding
          </Button>
        </motion.div>
      </Card>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/onboarding/components/steps/dataset-step.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import {
  Combobox,
  ComboboxContent,
  ComboboxEmpty,
  ComboboxInput,
  ComboboxItem,
  ComboboxList,
} from "@/components/ui/combobox";
import {
  Field,
  FieldDescription,
  FieldGroup,
  FieldLabel,
} from "@/components/ui/field";
import {
  InputGroup,
  InputGroupAddon,
  InputGroupInput,
} from "@/components/ui/input-group";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import { Spinner } from "@/components/ui/spinner";
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import {
  useDebouncedValue,
  useHfDatasetSearch,
  useHfTokenValidation,
  useInfiniteScroll,
} from "@/hooks";
import { cn } from "@/lib/utils";
import {
  HfDatasetSubsetSplitSelectors,
  useTrainingConfigStore,
} from "@/features/training";
import type { DatasetFormat } from "@/types/training";
import {
  InformationCircleIcon,
  Key01Icon,
  Search01Icon,
  SparklesIcon,
  Upload04Icon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { useMemo, useRef, useState } from "react";
import { useShallow } from "zustand/react/shallow";

const FORMAT_OPTIONS: { value: DatasetFormat; label: string }[] = [
  { value: "auto", label: "Auto Detect" },
  { value: "alpaca", label: "Alpaca" },
  { value: "chatml", label: "ChatML" },
  { value: "sharegpt", label: "ShareGPT" },
];

export function DatasetStep() {
  const {
    hfToken,
    setHfToken,
    datasetSource,
    selectHfDataset,
    selectLocalDataset,
    datasetFormat,
    setDatasetFormat,
    dataset,
    setDataset,
    datasetSubset,
    setDatasetSubset,
    datasetSplit,
    setDatasetSplit,
    datasetEvalSplit,
    setDatasetEvalSplit,
    uploadedFile,
    setUploadedFile,
    modelType,
  } = useTrainingConfigStore(
    useShallow((s) => ({
      hfToken: s.hfToken,
      setHfToken: s.setHfToken,
      datasetSource: s.datasetSource,
      selectHfDataset: s.selectHfDataset,
      selectLocalDataset: s.selectLocalDataset,
      datasetFormat: s.datasetFormat,
      setDatasetFormat: s.setDatasetFormat,
      dataset: s.dataset,
      setDataset: s.setDataset,
      datasetSubset: s.datasetSubset,
      setDatasetSubset: s.setDatasetSubset,
      datasetSplit: s.datasetSplit,
      setDatasetSplit: s.setDatasetSplit,
      datasetEvalSplit: s.datasetEvalSplit,
      setDatasetEvalSplit: s.setDatasetEvalSplit,
      uploadedFile: s.uploadedFile,
      setUploadedFile: s.setUploadedFile,
      modelType: s.modelType,
    })),
  );

  const [inputValue, setInputValue] = useState("");
  const selectingRef = useRef(false);
  const debouncedQuery = useDebouncedValue(inputValue);
  const {
    results: hfResults,
    isLoading,
    isLoadingMore,
    fetchMore,
    error: hfSearchError,
  } = useHfDatasetSearch(debouncedQuery, {
    modelType,
    accessToken: hfToken || undefined,
  });

  const { error: tokenValidationError, isChecking: isCheckingToken } =
    useHfTokenValidation(hfToken);

  const resultIds = useMemo(() => hfResults.map((r) => r.id), [hfResults]);

  const comboboxAnchorRef = useRef<HTMLDivElement>(null);
  const { scrollRef, sentinelRef } = useInfiniteScroll(
    fetchMore,
    hfResults.length,
  );

  const handleFileUpload = () => {
    setUploadedFile("my_dataset.jsonl");
  };

  return (
    <FieldGroup>
      <Field>
        <FieldLabel>Source</FieldLabel>
        <div className="flex gap-2">
          <Button
            variant={datasetSource === "huggingface" ? "dark" : "outline"}
            onClick={() =>
              selectHfDataset(datasetSource === "huggingface" ? dataset : null)
            }
            className="flex-1"
          >
            <img
              src="/huggingface.svg"
              alt=""
              className="size-4 invert"
              data-icon="inline-start"
            />
            Hugging Face
          </Button>
          <Button
            variant={datasetSource === "upload" ? "dark" : "outline"}
            onClick={() =>
              selectLocalDataset(
                datasetSource === "upload" ? uploadedFile : null,
              )
            }
            className="flex-1"
          >
            <HugeiconsIcon icon={Upload04Icon} data-icon="inline-start" />
            Upload
          </Button>
        </div>
      </Field>

      {datasetSource === "huggingface" ? (
        <>
          <Field>
            <FieldLabel>
              Hugging Face Token{" "}
              <span className="text-muted-foreground font-normal">
                (Optional)
              </span>
            </FieldLabel>
            <FieldDescription>
              Required for gated or private datasets.{" "}
              <a
                href="https://huggingface.co/settings/tokens"
                target="_blank"
                rel="noopener noreferrer"
                className="text-primary hover:underline"
              >
                Get token
              </a>
            </FieldDescription>
            <InputGroup>
              <InputGroupAddon>
                <HugeiconsIcon icon={Key01Icon} className="size-4" />
              </InputGroupAddon>
              <InputGroupInput
                type="password"
                autoComplete="new-password"
                name="hf-token"
                placeholder="hf_..."
                value={hfToken}
                onChange={(e) => setHfToken(e.target.value)}
              />
            </InputGroup>
            {(tokenValidationError ?? hfSearchError) && (
              <p className="text-xs text-destructive">
                {tokenValidationError ?? hfSearchError}
                {" — "}
                <a
                  href="https://huggingface.co/settings/tokens"
                  target="_blank"
                  rel="noopener noreferrer"
                  className="underline"
                >
                  Get or update token
                </a>
              </p>
            )}
            {isCheckingToken && (
              <p className="text-xs text-muted-foreground">Checking token…</p>
            )}
          </Field>

          <Field>
            <FieldLabel>Search datasets</FieldLabel>
            <div ref={comboboxAnchorRef}>
              <Combobox
                items={resultIds}
                filteredItems={resultIds}
                filter={null}
                value={dataset}
                onValueChange={(id) => {
                  selectingRef.current = true;
                  setDataset(id);
                }}
                onInputValueChange={(val) => {
                  if (selectingRef.current) {
                    selectingRef.current = false;
                    return;
                  }
                  setInputValue(val);
                }}
                itemToStringValue={(id) => id}
                autoHighlight={true}
              >
                <ComboboxInput
                  placeholder="Search datasets..."
                  className="w-full"
                >
                  <InputGroupAddon>
                    <HugeiconsIcon icon={Search01Icon} className="size-4" />
                  </InputGroupAddon>
                </ComboboxInput>
                <ComboboxContent anchor={comboboxAnchorRef}>
                  {isLoading ? (
                    <div className="flex items-center justify-center py-4 gap-2 text-xs text-muted-foreground">
                      <Spinner className="size-4" /> Searching...
                    </div>
                  ) : (
                    <ComboboxEmpty>No datasets found</ComboboxEmpty>
                  )}
                  <div
                    ref={scrollRef}
                    className="max-h-64 overflow-y-auto overscroll-contain [scrollbar-width:thin]"
                  >
                    <ComboboxList className="p-1 !max-h-none !overflow-visible">
                      {(id: string) => {
                        return (
                          <ComboboxItem key={id} value={id} className="gap-2">
                            <Tooltip>
                              <TooltipTrigger asChild={true}>
                                <span className="block min-w-0 flex-1 truncate">
                                  {id}
                                </span>
                              </TooltipTrigger>
                              <TooltipContent
                                side="left"
                                className="max-w-xs break-all"
                              >
                                {id}
                              </TooltipContent>
                            </Tooltip>
                          </ComboboxItem>
                        );
                      }}
                    </ComboboxList>
                    <div ref={sentinelRef} className="h-px" />
                    {isLoadingMore && (
                      <div className="flex items-center justify-center py-2">
                        <Spinner className="size-3.5 text-muted-foreground" />
                      </div>
                    )}
                  </div>
                </ComboboxContent>
              </Combobox>
            </div>
          </Field>

          <HfDatasetSubsetSplitSelectors
            variant="wizard"
            enabled={datasetSource === "huggingface"}
            datasetName={dataset}
            accessToken={hfToken || undefined}
            datasetSubset={datasetSubset}
            setDatasetSubset={setDatasetSubset}
            datasetSplit={datasetSplit}
            setDatasetSplit={setDatasetSplit}
            datasetEvalSplit={datasetEvalSplit}
            setDatasetEvalSplit={setDatasetEvalSplit}
          />
        </>
      ) : (
        <>
          <Field>
            <FieldLabel>Upload Dataset</FieldLabel>
            <FieldDescription>
              Supports JSONL, JSON, CSV formats
            </FieldDescription>
            <button
              type="button"
              className={cn(
                "border-2 border-dashed rounded-xl p-8 text-center transition-colors cursor-pointer hover:border-primary/50 hover:bg-muted/50",
                uploadedFile && "border-primary/50 bg-primary/5",
              )}
              onClick={handleFileUpload}
            >
              {uploadedFile ? (
                <div className="flex flex-col items-center gap-2">
                  <Badge variant="secondary" className="text-sm">
                    {uploadedFile}
                  </Badge>
                  <span className="text-xs text-muted-foreground">
                    Click to replace
                  </span>
                </div>
              ) : (
                <div className="flex flex-col items-center gap-2">
                  <HugeiconsIcon
                    icon={Upload04Icon}
                    className="size-8 text-muted-foreground"
                  />
                  <span className="text-sm text-muted-foreground">
                    Click to upload or drag and drop
                  </span>
                </div>
              )}
            </button>
          </Field>
        </>
      )}

      <Field>
        <div className="flex items-center justify-between">
          <FieldLabel className="flex items-center gap-1.5">
            Format
            <Tooltip>
              <TooltipTrigger asChild={true}>
                <button
                  type="button"
                  className="text-muted-foreground/50 hover:text-muted-foreground"
                >
                  <HugeiconsIcon
                    icon={InformationCircleIcon}
                    className="size-3.5"
                  />
                </button>
              </TooltipTrigger>
              <TooltipContent className="max-w-xs">
                Auto will try to identify and convert your dataset to a
                supported format.{" "}
                <a
                  href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/datasets-guide"
                  target="_blank"
                  rel="noopener noreferrer"
                  className="text-primary underline"
                >
                  Read more
                </a>
              </TooltipContent>
            </Tooltip>
          </FieldLabel>
          <Select
            value={datasetFormat}
            onValueChange={(v) => setDatasetFormat(v as DatasetFormat)}
          >
            <SelectTrigger className="w-40">
              <SelectValue />
            </SelectTrigger>
            <SelectContent>
              {FORMAT_OPTIONS.map((opt) => (
                <SelectItem key={opt.value} value={opt.value}>
                  {opt.value === "auto" && (
                    <HugeiconsIcon
                      icon={SparklesIcon}
                      className="mr-1.5 inline size-3.5 align-text-bottom"
                    />
                  )}
                  {opt.label}
                </SelectItem>
              ))}
            </SelectContent>
          </Select>
        </div>
      </Field>
    </FieldGroup>
  );
}


================================================
FILE: studio/frontend/src/features/onboarding/components/steps/hyperparameters-step.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  FieldGroup,
  FieldLabel,
  FieldLegend,
  FieldSet,
} from "@/components/ui/field";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import { Separator } from "@/components/ui/separator";
import { Slider } from "@/components/ui/slider";
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import { CONTEXT_LENGTHS } from "@/config/training";
import { useMaxStepsEpochsToggle, useTrainingConfigStore } from "@/features/training";
import { InformationCircleIcon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { useMemo } from "react";
import { useShallow } from "zustand/react/shallow";

/** Format a number in scientific notation like 2e-4, 5e-3, etc. */
function formatLR(value: number): string {
  if (value <= 0) return "0";
  const exp = Math.floor(Math.log10(value));
  const mantissa = value / 10 ** exp;
  const rounded = Math.round(mantissa * 10) / 10;
  if (rounded === 10) return `1e${exp + 1}`;
  if (rounded === Math.round(rounded)) return `${Math.round(rounded)}e${exp}`;
  return `${rounded}e${exp}`;
}

/**
 * Step learning rate up in a scientific-notation-friendly sequence:
 * 1e-4 -> 2e-4 -> 3e-4 -> ... -> 9e-4 -> 1e-3 -> 2e-3 -> ...
 */
function stepLR(value: number, direction: 1 | -1): number {
  if (value <= 0) return 1e-5;
  const exp = Math.floor(Math.log10(value) + 1e-9);
  const mantissa = Math.round(value / 10 ** exp);
  let newMantissa = mantissa + direction;
  let newExp = exp;
  if (newMantissa > 9) {
    newMantissa = 1;
    newExp = exp + 1;
  } else if (newMantissa < 1) {
    newMantissa = 9;
    newExp = exp - 1;
  }
  return newMantissa * 10 ** newExp;
}

export function HyperparametersStep() {
  const {
    trainingMethod,
    maxSteps,
    setMaxSteps,
    epochs,
    setEpochs,
    saveSteps,
    setSaveSteps,
    contextLength,
    setContextLength,
    learningRate,
    setLearningRate,
    loraRank,
    setLoraRank,
    loraAlpha,
    setLoraAlpha,
    loraDropout,
    setLoraDropout,
    maxPositionEmbeddings,
  } = useTrainingConfigStore(
    useShallow((s) => ({
      trainingMethod: s.trainingMethod,
      maxSteps: s.maxSteps,
      setMaxSteps: s.setMaxSteps,
      epochs: s.epochs,
      setEpochs: s.setEpochs,
      saveSteps: s.saveSteps,
      setSaveSteps: s.setSaveSteps,
      contextLength: s.contextLength,
      setContextLength: s.setContextLength,
      learningRate: s.learningRate,
      setLearningRate: s.setLearningRate,
      loraRank: s.loraRank,
      setLoraRank: s.setLoraRank,
      loraAlpha: s.loraAlpha,
      setLoraAlpha: s.setLoraAlpha,
      loraDropout: s.loraDropout,
      setLoraDropout: s.setLoraDropout,
      maxPositionEmbeddings: s.maxPositionEmbeddings,
    })),
  );

  const showLoraParams =
    trainingMethod === "lora" || trainingMethod === "qlora";
  const { useEpochs, toggleUseEpochs } = useMaxStepsEpochsToggle({
    maxSteps,
    epochs,
    saveSteps,
    setMaxSteps,
    setEpochs,
    setSaveSteps,
  });

  const maxStepsSliderMax = Math.max(500, maxSteps, 30);
  const epochsSliderMax = Math.max(10, epochs, 1);

  // Use model's max_position_embeddings to cap context length options.
  // Fall back to 65536 (64K) if not available.
  const maxCtx = maxPositionEmbeddings ?? 65536;
  const contextLengthOptions = useMemo(
    () => CONTEXT_LENGTHS.filter((len) => len <= maxCtx),
    [maxCtx],
  );

  return (
    <FieldGroup>
      <FieldSet>
        <FieldLegend variant="label">Choose your training parameters</FieldLegend>
        <div className="flex flex-col gap-4">
          <div
            key={useEpochs ? "epochs" : "steps"}
            className="flex flex-col gap-2 animate-in fade-in-1 slide-in-from-bottom-1 duration-200"
          >
            <div className="flex items-center justify-between">
              <FieldLabel className="flex items-center gap-1.5 !text-sm text-muted-foreground">
                {useEpochs ? "Epochs" : "Max Steps"}
                <Tooltip>
                  <TooltipTrigger asChild={true}>
                    <button
                      type="button"
                      className="text-muted-foreground/50 hover:text-muted-foreground"
                    >
                      <HugeiconsIcon
                        icon={InformationCircleIcon}
                        className="size-3.5"
                      />
                    </button>
                  </TooltipTrigger>
                  <TooltipContent>
                    {useEpochs
                      ? "Number of full passes over the dataset."
                      : "Override total optimizer steps."}{" "}
                    <a
                      href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                      target="_blank"
                      rel="noopener noreferrer"
                      className="text-primary underline"
                    >
                      Read more
                    </a>
                  </TooltipContent>
                </Tooltip>
              </FieldLabel>
              <div className="flex items-center gap-3">
                <button
                  type="button"
                  onClick={toggleUseEpochs}
                  className="text-xs text-primary underline cursor-pointer"
                >
                  {useEpochs ? "Use Max Steps" : "Use Epochs"}
                </button>
                <Slider
                  value={[
                    useEpochs
                      ? Math.min(epochsSliderMax, Math.max(1, epochs))
                      : Math.min(maxStepsSliderMax, Math.max(1, maxSteps)),
                  ]}
                  onValueChange={([v]) =>
                    useEpochs ? setEpochs(v) : setMaxSteps(v)
                  }
                  min={1}
                  max={useEpochs ? epochsSliderMax : maxStepsSliderMax}
                  step={1}
                  className="w-40"
                />
                <input
                  type="number"
                  value={useEpochs ? epochs : maxSteps}
                  onChange={(e) => {
                    const raw = e.target.value;
                    if (raw === "") return;

                    const value = Number(raw);
                    if (!Number.isFinite(value) || value < 1) return;

                    if (useEpochs) {
                      setEpochs(value);
                    } else {
                      setMaxSteps(value);
                    }
                  }}
                  min={1}
                  max={useEpochs ? epochsSliderMax : maxStepsSliderMax}
                  step={1}
                  className="w-16 text-right font-mono text-xs font-medium bg-muted/50 border border-border rounded-lg px-1.5 py-0.5 focus:outline-none focus:ring-1 focus:ring-primary/30 [&::-webkit-inner-spin-button]:appearance-none"
                />
              </div>
            </div>
          </div>

          <div className="flex items-center justify-between">
            <FieldLabel className="flex items-center gap-1.5 !text-sm text-muted-foreground">
              Context Length
              <Tooltip>
                <TooltipTrigger asChild={true}>
                  <button
                    type="button"
                    className="text-muted-foreground/50 hover:text-muted-foreground"
                  >
                    <HugeiconsIcon
                      icon={InformationCircleIcon}
                      className="size-3.5"
                    />
                  </button>
                </TooltipTrigger>
                <TooltipContent>
                  Maximum number of tokens per training sample.{" "}
                  <a
                    href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                    target="_blank"
                    rel="noopener noreferrer"
                    className="text-primary underline"
                  >
                    Read more
                  </a>
                </TooltipContent>
              </Tooltip>
            </FieldLabel>
            <Select
              value={String(contextLength)}
              onValueChange={(v) => setContextLength(Number(v))}
            >
              <SelectTrigger className="w-32 font-mono">
                <SelectValue />
              </SelectTrigger>
              <SelectContent>
                {contextLengthOptions.map((len) => (
                  <SelectItem key={len} value={String(len)}>
                    {len.toLocaleString()}
                  </SelectItem>
                ))}
              </SelectContent>
            </Select>
          </div>

          <div className="flex items-center justify-between">
            <FieldLabel className="flex items-center gap-1.5 !text-sm text-muted-foreground">
              Learning Rate
              <Tooltip>
                <TooltipTrigger asChild={true}>
                  <button
                    type="button"
                    className="text-muted-foreground/50 hover:text-muted-foreground"
                  >
                    <HugeiconsIcon
                      icon={InformationCircleIcon}
                      className="size-3.5"
                    />
                  </button>
                </TooltipTrigger>
                <TooltipContent>
                  Step size for weight updates. Lower = slower but more stable.{" "}
                  <a
                    href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                    target="_blank"
                    rel="noopener noreferrer"
                    className="text-primary underline"
                  >
                    Read more
                  </a>
                </TooltipContent>
              </Tooltip>
            </FieldLabel>
            <div className="flex items-center gap-1">
              <button
                type="button"
                className="flex size-7 items-center justify-center rounded-md border border-border text-muted-foreground hover:bg-muted cursor-pointer"
                onClick={() => setLearningRate(stepLR(learningRate, -1))}
              >
                -
              </button>
              <span className="w-16 text-center font-mono text-sm">
                {formatLR(learningRate)}
              </span>
              <button
                type="button"
                className="flex size-7 items-center justify-center rounded-md border border-border text-muted-foreground hover:bg-muted cursor-pointer"
                onClick={() => setLearningRate(stepLR(learningRate, 1))}
              >
                +
              </button>
            </div>
          </div>

        </div>
      </FieldSet>

      {showLoraParams && (
        <>
          <Separator />
          <FieldSet>
            <FieldLegend variant="label">LoRA Parameters</FieldLegend>
            <div className="flex flex-col gap-4">
              <div className="flex items-center justify-between">
                <FieldLabel className="flex items-center gap-1.5 !text-sm text-muted-foreground">
                  Rank
                  <Tooltip>
                    <TooltipTrigger asChild={true}>
                      <button
                        type="button"
                        className="text-muted-foreground/50 hover:text-muted-foreground"
                      >
                        <HugeiconsIcon
                          icon={InformationCircleIcon}
                          className="size-3.5"
                        />
                      </button>
                    </TooltipTrigger>
                    <TooltipContent>
                      Dimension of the low-rank matrices. Higher = more
                      capacity.{" "}
                      <a
                        href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                        target="_blank"
                        rel="noopener noreferrer"
                        className="text-primary underline"
                      >
                        Read more
                      </a>
                    </TooltipContent>
                  </Tooltip>
                </FieldLabel>
                <div className="flex items-center gap-3">
                  <Slider
                    value={[loraRank]}
                    onValueChange={([v]) => setLoraRank(v)}
                    min={4}
                    max={128}
                    step={4}
                    className="w-40"
                  />
                  <input
                    type="number"
                    value={loraRank}
                    onChange={(e) => setLoraRank(Number(e.target.value))}
                    min={4}
                    max={128}
                    step={4}
                    className="w-12 text-right font-mono text-xs font-medium bg-muted/50 border border-border rounded-lg px-1.5 py-0.5 focus:outline-none focus:ring-1 focus:ring-primary/30 [&::-webkit-inner-spin-button]:appearance-none"
                  />
                </div>
              </div>

              <div className="flex items-center justify-between">
                <FieldLabel className="flex items-center gap-1.5 !text-sm text-muted-foreground">
                  Alpha
                  <Tooltip>
                    <TooltipTrigger asChild={true}>
                      <button
                        type="button"
                        className="text-muted-foreground/50 hover:text-muted-foreground"
                      >
                        <HugeiconsIcon
                          icon={InformationCircleIcon}
                          className="size-3.5"
                        />
                      </button>
                    </TooltipTrigger>
                    <TooltipContent>
                      Scaling factor. Typically set to 2x the rank value.{" "}
                      <a
                        href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                        target="_blank"
                        rel="noopener noreferrer"
                        className="text-primary underline"
                      >
                        Read more
                      </a>
                    </TooltipContent>
                  </Tooltip>
                </FieldLabel>
                <div className="flex items-center gap-3">
                  <Slider
                    value={[loraAlpha]}
                    onValueChange={([v]) => setLoraAlpha(v)}
                    min={8}
                    max={256}
                    step={8}
                    className="w-40"
                  />
                  <input
                    type="number"
                    value={loraAlpha}
                    onChange={(e) => setLoraAlpha(Number(e.target.value))}
                    min={8}
                    max={256}
                    step={8}
                    className="w-12 text-right font-mono text-xs font-medium bg-muted/50 border border-border rounded-lg px-1.5 py-0.5 focus:outline-none focus:ring-1 focus:ring-primary/30 [&::-webkit-inner-spin-button]:appearance-none"
                  />
                </div>
              </div>

              <div className="flex items-center justify-between">
                <FieldLabel className="flex items-center gap-1.5 !text-sm text-muted-foreground">
                  Dropout
                  <Tooltip>
                    <TooltipTrigger asChild={true}>
                      <button
                        type="button"
                        className="text-muted-foreground/50 hover:text-muted-foreground"
                      >
                        <HugeiconsIcon
                          icon={InformationCircleIcon}
                          className="size-3.5"
                        />
                      </button>
                    </TooltipTrigger>
                    <TooltipContent>
                      Probability of dropping neurons during training for
                      regularization.{" "}
                      <a
                        href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                        target="_blank"
                        rel="noopener noreferrer"
                        className="text-primary underline"
                      >
                        Read more
                      </a>
                    </TooltipContent>
                  </Tooltip>
                </FieldLabel>
                <div className="flex items-center gap-3">
                  <Slider
                    value={[loraDropout]}
                    onValueChange={([v]) => setLoraDropout(v)}
                    min={0}
                    max={0.5}
                    step={0.01}
                    className="w-40"
                  />
                  <input
                    type="number"
                    value={loraDropout}
                    onChange={(e) => setLoraDropout(Number(e.target.value))}
                    min={0}
                    max={0.5}
                    step={0.01}
                    className="w-12 text-right font-mono text-xs font-medium bg-muted/50 border border-border rounded-lg px-1.5 py-0.5 focus:outline-none focus:ring-1 focus:ring-primary/30 [&::-webkit-inner-spin-button]:appearance-none"
                  />
                </div>
              </div>
            </div>
          </FieldSet>
        </>
      )}
    </FieldGroup>
  );
}


================================================
FILE: studio/frontend/src/features/onboarding/components/steps/model-selection-step.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  Combobox,
  ComboboxContent,
  ComboboxEmpty,
  ComboboxInput,
  ComboboxItem,
  ComboboxList,
} from "@/components/ui/combobox";
import {
  Field,
  FieldDescription,
  FieldGroup,
  FieldLabel,
} from "@/components/ui/field";
import {
  InputGroup,
  InputGroupAddon,
  InputGroupInput,
} from "@/components/ui/input-group";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import { Spinner } from "@/components/ui/spinner";
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import { MODEL_TYPE_TO_HF_TASK, PRIORITY_TRAINING_MODELS, applyPriorityOrdering } from "@/config/training";
import {
  useDebouncedValue,
  useGpuInfo,
  useHfModelSearch,
  useHfTokenValidation,
  useInfiniteScroll,
} from "@/hooks";
import { formatCompact } from "@/lib/utils";
import {
  type TrainingMethod as VramTrainingMethod,
  type VramFitStatus,
  buildModelVramMap,
} from "@/lib/vram";
import { useTrainingConfigStore } from "@/features/training";
import type { TrainingMethod } from "@/types/training";
import {
  InformationCircleIcon,
  Key01Icon,
  Search01Icon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { useEffect, useMemo, useRef, useState } from "react";
import { useShallow } from "zustand/react/shallow";

export function ModelSelectionStep() {
  const gpu = useGpuInfo();
  const {
    modelType,
    selectedModel,
    setSelectedModel,
    ensureModelDefaultsLoaded,
    trainingMethod,
    setTrainingMethod,
    hfToken,
    setHfToken,
  } = useTrainingConfigStore(
    useShallow((s) => ({
      modelType: s.modelType,
      selectedModel: s.selectedModel,
      setSelectedModel: s.setSelectedModel,
      ensureModelDefaultsLoaded: s.ensureModelDefaultsLoaded,
      trainingMethod: s.trainingMethod,
      setTrainingMethod: s.setTrainingMethod,
      hfToken: s.hfToken,
      setHfToken: s.setHfToken,
    })),
  );

  const [inputValue, setInputValue] = useState("");
  const selectingRef = useRef(false);
  const debouncedQuery = useDebouncedValue(inputValue);
  const task = modelType ? MODEL_TYPE_TO_HF_TASK[modelType] : undefined;
  const {
    results: hfResults,
    isLoading,
    isLoadingMore,
    fetchMore,
    error: hfSearchError,
  } = useHfModelSearch(debouncedQuery, {
    task,
    accessToken: hfToken || undefined,
    excludeGguf: true,
    priorityIds: PRIORITY_TRAINING_MODELS,
  });

  const { error: tokenValidationError, isChecking: isCheckingToken } =
    useHfTokenValidation(hfToken);

  const resultIds = useMemo(() => {
    const ids = hfResults.map((r) => r.id);
    return applyPriorityOrdering(ids);
  }, [hfResults]);

  // Match Studio behavior: only show exception signals (OOM/TIGHT) in training flows.
  const vramMap = useMemo(() => {
    const fitMap = buildModelVramMap(
      hfResults,
      trainingMethod as VramTrainingMethod,
      gpu,
    );
    const map = new Map<string, { status: VramFitStatus | null; detail: string | null }>();
    for (const r of hfResults) {
      const fit = fitMap.get(r.id);
      map.set(r.id, {
        status: fit?.status ?? null,
        detail: r.totalParams ? formatCompact(r.totalParams) : null,
      });
    }
    return map;
  }, [hfResults, gpu, trainingMethod]);

  const comboboxAnchorRef = useRef<HTMLDivElement>(null);
  const { scrollRef, sentinelRef } = useInfiniteScroll(
    fetchMore,
    hfResults.length,
  );

  useEffect(() => {
    ensureModelDefaultsLoaded();
  }, [selectedModel, ensureModelDefaultsLoaded]);

  return (
    <FieldGroup>
      <Field>
        <FieldLabel>
          Hugging Face Token{" "}
          <span className="text-muted-foreground font-normal">(Optional)</span>
        </FieldLabel>
        <FieldDescription>
          Required for gated or private models.{" "}
          <a
            href="https://huggingface.co/settings/tokens"
            target="_blank"
            rel="noopener noreferrer"
            className="text-primary hover:underline"
          >
            Get token
          </a>
        </FieldDescription>
        <InputGroup>
          <InputGroupAddon>
            <HugeiconsIcon icon={Key01Icon} className="size-4" />
          </InputGroupAddon>
          <InputGroupInput
            type="password"
            autoComplete="new-password"
            name="hf-token"
            placeholder="hf_..."
            value={hfToken}
            onChange={(e) => setHfToken(e.target.value)}
          />
        </InputGroup>
        {(tokenValidationError ?? hfSearchError) && (
          <p className="text-xs text-destructive">
            {tokenValidationError ?? hfSearchError}
            {" — "}
            <a
              href="https://huggingface.co/settings/tokens"
              target="_blank"
              rel="noopener noreferrer"
              className="underline"
            >
              Get or update token
            </a>
          </p>
        )}
        {isCheckingToken && (
          <p className="text-xs text-muted-foreground">Checking token…</p>
        )}
      </Field>

      <Field>
        <FieldLabel className="flex items-center gap-1.5">
          Search models
          <Tooltip>
            <TooltipTrigger asChild={true}>
              <button
                type="button"
                className="text-muted-foreground/50 hover:text-muted-foreground"
              >
                <HugeiconsIcon
                  icon={InformationCircleIcon}
                  className="size-3.5"
                />
              </button>
            </TooltipTrigger>
            <TooltipContent>
              Search Hugging Face models or pick from our recommended list.{" "}
              <a
                href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/what-model-should-i-use"
                target="_blank"
                rel="noopener noreferrer"
                className="text-primary underline"
              >
                Read more
              </a>
            </TooltipContent>
          </Tooltip>
        </FieldLabel>
        <div ref={comboboxAnchorRef}>
          <Combobox
            items={resultIds}
            filteredItems={resultIds}
            filter={null}
            value={selectedModel}
            onValueChange={(id) => {
              selectingRef.current = true;
              setSelectedModel(id);
            }}
            onInputValueChange={(val) => {
              if (selectingRef.current) {
                selectingRef.current = false;
                return;
              }
              setInputValue(val);
            }}
            itemToStringValue={(id) => id}
            autoHighlight={true}
          >
            <ComboboxInput placeholder="Search models..." className="w-full">
              <InputGroupAddon>
                <HugeiconsIcon icon={Search01Icon} className="size-4" />
              </InputGroupAddon>
            </ComboboxInput>
            <ComboboxContent anchor={comboboxAnchorRef}>
              {isLoading ? (
                <div className="flex items-center justify-center py-4 gap-2 text-xs text-muted-foreground">
                  <Spinner className="size-4" /> Searching…
                </div>
              ) : (
                <ComboboxEmpty>No models found</ComboboxEmpty>
              )}
              <div
                ref={scrollRef}
                className="max-h-64 overflow-y-auto overscroll-contain [scrollbar-width:thin]"
              >
                <ComboboxList className="p-1 !max-h-none !overflow-visible">
                  {(id: string) => {
                    const entry = vramMap.get(id);
                    const sizeLabel = entry?.detail ?? null;
                    const fitStatus = entry?.status ?? null;
                    const exceeds = fitStatus === "exceeds";
                    return (
                      <ComboboxItem
                        key={id}
                        value={id}
                        className={`justify-between ${exceeds ? "opacity-50" : ""}`}
                      >
                        <Tooltip>
                          <TooltipTrigger asChild={true}>
                            <span
                              className={`min-w-0 flex-1 truncate ${exceeds ? "line-through decoration-muted-foreground/50" : ""}`}
                            >
                              {id}
                            </span>
                          </TooltipTrigger>
                          <TooltipContent
                            side="left"
                            className="max-w-xs break-all"
                          >
                            {id}
                          </TooltipContent>
                        </Tooltip>
                        <span className="flex items-center gap-1.5 shrink-0">
                          {fitStatus === "exceeds" && (
                            <span className="text-[9px] font-medium text-red-400">
                              OOM
                            </span>
                          )}
                          {fitStatus === "tight" && (
                            <span className="text-[9px] font-medium text-amber-400">
                              TIGHT
                            </span>
                          )}
                          {sizeLabel ? (
                            <span className="text-xs text-muted-foreground">
                              {sizeLabel}
                            </span>
                          ) : null}
                        </span>
                      </ComboboxItem>
                    );
                  }}
                </ComboboxList>
                <div ref={sentinelRef} className="h-px" />
                {isLoadingMore && (
                  <div className="flex items-center justify-center py-2">
                    <Spinner className="size-3.5 text-muted-foreground" />
                  </div>
                )}
              </div>
            </ComboboxContent>
          </Combobox>
        </div>
      </Field>

      {selectedModel && (
        <Field>
          <div className="flex items-center justify-between">
            <div>
              <FieldLabel className="flex items-center gap-1.5">
                Training method
                <Tooltip>
                  <TooltipTrigger asChild={true}>
                    <button
                      type="button"
                      className="text-muted-foreground/50 hover:text-muted-foreground"
                    >
                      <HugeiconsIcon
                        icon={InformationCircleIcon}
                        className="size-3.5"
                      />
                    </button>
                  </TooltipTrigger>
                  <TooltipContent className="max-w-xs">
                    QLoRA uses 4-bit quantization for lowest VRAM. LoRA uses
                    16-bit for better quality. Full fine-tune updates all
                    weights.{" "}
                    <a
                      href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                      target="_blank"
                      rel="noopener noreferrer"
                      className="text-primary underline"
                    >
                      Read more
                    </a>
                  </TooltipContent>
                </Tooltip>
              </FieldLabel>
              <FieldDescription>
                Choose how to fine-tune {selectedModel}
              </FieldDescription>
            </div>
            <Select
              value={trainingMethod}
              onValueChange={(v) => setTrainingMethod(v as TrainingMethod)}
            >
              <SelectTrigger className="w-40">
                <SelectValue />
              </SelectTrigger>
              <SelectContent>
                <SelectItem value="qlora">QLoRA (4-bit)</SelectItem>
                <SelectItem value="lora">LoRA (16-bit)</SelectItem>
                <SelectItem value="full">Full Fine-tune</SelectItem>
              </SelectContent>
            </Select>
          </div>
        </Field>
      )}
    </FieldGroup>
  );
}


================================================
FILE: studio/frontend/src/features/onboarding/components/steps/model-type-step.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Badge } from "@/components/ui/badge";
import { Card, CardContent } from "@/components/ui/card";
import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group";
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import { MODEL_TYPES } from "@/config/training";
import { cn } from "@/lib/utils";
import { useTrainingConfigStore } from "@/features/training";
import type { ModelType } from "@/types/training";
import {
  BubbleChatIcon,
  Database02Icon,
  ImageIcon,
  InformationCircleIcon,
  TextIcon,
  VoiceIcon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { type ReactElement, useState } from "react";
import { useShallow } from "zustand/react/shallow";

const TYPE_ICONS: Record<ModelType, typeof ImageIcon> = {
  vision: ImageIcon,
  audio: VoiceIcon,
  embeddings: Database02Icon,
  text: TextIcon,
};

const TYPE_TOOLTIPS: Record<ModelType, string> = {
  vision: "Fine-tune models that understand images and text together",
  audio: "Fine-tune text-to-speech and audio models",
  embeddings: "Fine-tune models for semantic search and similarity",
  text: "Fine-tune large language models for text generation",
};

const COMING_SOON: ModelType[] = [];

export function ModelTypeStep(): ReactElement {
  const { modelType, setModelType } = useTrainingConfigStore(
    useShallow((s) => ({
      modelType: s.modelType,
      setModelType: s.setModelType,
    })),
  );
  const [chatOnlySelected, setChatOnlySelected] = useState(false);

  return (
    <div className="flex flex-col gap-6">
      <div>
        <h2 className="text-lg font-semibold">Welcome to Unsloth Studio</h2>
        <p className="text-sm text-muted-foreground">
          Choose a path - fine-tune LLMs, vision, embedding, audio models or just chat.{" "}
          <a
            href="https://unsloth.ai/docs/new/studio/start"
            target="_blank"
            rel="noreferrer"
            className="text-primary underline"
          >
            Get started with our guide
          </a>
        </p>
      </div>
      <RadioGroup
        value={chatOnlySelected ? "" : (modelType ?? "")}
        onValueChange={(v) => {
          if (!COMING_SOON.includes(v as ModelType)) {
            setChatOnlySelected(false);
            sessionStorage.removeItem("unsloth_chat_only");
            setModelType(v as ModelType);
          }
        }}
        className="grid grid-cols-2 gap-4"
      >
        {MODEL_TYPES.map((type) => {
          const Icon = TYPE_ICONS[type.value];
          const isSelected = !chatOnlySelected && modelType === type.value;
          const isDisabled = COMING_SOON.includes(type.value);
          const inputId = `model-type-${type.value}`;

          return (
            <label
              key={type.value}
              htmlFor={inputId}
              className={cn(
                isDisabled ? "cursor-not-allowed" : "cursor-pointer",
              )}
            >
              <Card
                size="sm"
                className={cn(
                  "relative shadow-primary/30 transition-all duration-150 ease-out",
                  isDisabled && "opacity-50 bg-muted/50",
                  !isDisabled &&
                    "hover:ring-primary/40 hover:-translate-y-0.5 hover:shadow-sm",
                  isSelected &&
                    !isDisabled &&
                    "ring-2 ring-primary -translate-y-0.5 shadow-sm",
                )}
              >
                {isDisabled && (
                  <Badge
                    variant="secondary"
                    className="absolute top-2 right-2 text-[10px]"
                  >
                    Coming Soon
                  </Badge>
                )}
                <CardContent className="flex items-center gap-4 py-4">
                  <RadioGroupItem
                    id={inputId}
                    value={type.value}
                    className="sr-only"
                    disabled={isDisabled}
                  />
                  <div
                    className={cn(
                      "size-10 rounded-xl corner-squircle flex items-center justify-center shrink-0",
                      "transition-all duration-100 ease-out",
                      isDisabled && "bg-muted/50 text-muted-foreground/50",
                      !isDisabled &&
                        isSelected &&
                        "bg-primary/10 text-primary scale-105",
                      !(isDisabled || isSelected) &&
                        "bg-muted text-muted-foreground",
                    )}
                  >
                    <HugeiconsIcon
                      icon={Icon}
                      className={cn(
                        "size-5 transition-transform duration-100 ease-out",
                        isSelected && !isDisabled && "scale-110",
                      )}
                      strokeWidth={isSelected && !isDisabled ? 2.5 : 2}
                    />
                  </div>
                  <div className="flex flex-col gap-0.5 flex-1">
                    <div className="flex items-center gap-1.5">
                      <span
                        className={cn(
                          "font-medium",
                          isDisabled && "text-muted-foreground",
                        )}
                      >
                        {type.label}
                      </span>
                      <Tooltip>
                        <TooltipTrigger asChild={true}>
                          <button
                            type="button"
                            className="text-muted-foreground/50 hover:text-muted-foreground"
                          >
                            <HugeiconsIcon
                              icon={InformationCircleIcon}
                              className="size-3.5"
                            />
                          </button>
                        </TooltipTrigger>
                        <TooltipContent>
                          {TYPE_TOOLTIPS[type.value]}
                        </TooltipContent>
                      </Tooltip>
                    </div>
                    <span className="text-xs text-muted-foreground">
                      {type.description}
                    </span>
                  </div>
                </CardContent>
              </Card>
            </label>
          );
        })}
        <div
          className="cursor-pointer"
          onClick={() => {
            setChatOnlySelected(true);
            setModelType("text" as ModelType);
            sessionStorage.setItem("unsloth_chat_only", "1");
          }}
        >
          <Card
            size="sm"
            className={cn(
              "relative shadow-primary/30 transition-all duration-150 ease-out",
              "hover:ring-primary/40 hover:-translate-y-0.5 hover:shadow-sm",
              chatOnlySelected && "ring-2 ring-primary -translate-y-0.5 shadow-sm",
            )}
          >
            <CardContent className="flex items-center gap-4 py-4">
              {/* Invisible spacer matching RadioGroupItem (size-4 flex) in other cards */}
              <div className="size-4 shrink-0" aria-hidden="true" />
              <div
                className={cn(
                  "size-10 rounded-xl corner-squircle flex items-center justify-center shrink-0",
                  "transition-all duration-100 ease-out",
                  chatOnlySelected
                    ? "bg-primary/10 text-primary scale-105"
                    : "bg-muted text-muted-foreground",
                )}
              >
                <HugeiconsIcon
                  icon={BubbleChatIcon}
                  className={cn(
                    "size-5 transition-transform duration-100 ease-out",
                    chatOnlySelected && "scale-110",
                  )}
                  strokeWidth={chatOnlySelected ? 2.5 : 2}
                />
              </div>
              <div className="flex flex-col gap-0.5 flex-1">
                <div className="flex items-center gap-1.5">
                  <span className="font-medium">Chat</span>
                  <Tooltip>
                    <TooltipTrigger asChild={true}>
                      <button
                        type="button"
                        className="text-muted-foreground/50 hover:text-muted-foreground"
                        onClick={(e) => e.stopPropagation()}
                      >
                        <HugeiconsIcon
                          icon={InformationCircleIcon}
                          className="size-3.5"
                        />
                      </button>
                    </TooltipTrigger>
                    <TooltipContent>
                      Chat with any model. Has tool calling, web search and more.
                    </TooltipContent>
                  </Tooltip>
                </div>
                <span className="text-xs text-muted-foreground">
                  Chat with LLMs & vision models + audio generation.
                </span>
              </div>
            </CardContent>
          </Card>
        </div>
      </RadioGroup>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/onboarding/components/steps/summary-step.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Badge } from "@/components/ui/badge";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import { Separator } from "@/components/ui/separator";
import { useTrainingConfigStore } from "@/features/training";
import { useHardwareInfo } from "@/hooks";
import { isAdapterMethod } from "@/types/training";
import { ChipIcon, Database02Icon, GpuIcon, Settings04Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { useShallow } from "zustand/react/shallow";

function Row({
  label,
  value,
  mono,
  capitalize,
  uppercase,
}: {
  label: string;
  value: React.ReactNode;
  mono?: boolean;
  capitalize?: boolean;
  uppercase?: boolean;
}) {
  return (
    <div className="flex items-center justify-between">
      <span className="text-muted-foreground">{label}</span>
      <span
        className={
          mono
            ? "font-mono text-xs"
            : capitalize
              ? "capitalize"
              : uppercase
                ? "uppercase"
                : undefined
        }
      >
        {value}
      </span>
    </div>
  );
}

export function SummaryStep() {
  const hw = useHardwareInfo();
  const {
    modelType,
    selectedModel,
    trainingMethod,
    datasetSource,
    datasetFormat,
    dataset,
    datasetSubset,
    datasetSplit,
    uploadedFile,
    epochs,
    contextLength,
    learningRate,
    loraRank,
    loraAlpha,
    loraDropout,
  } = useTrainingConfigStore(
    useShallow(
      ({
        modelType,
        selectedModel,
        trainingMethod,
        datasetSource,
        datasetFormat,
        dataset,
        datasetSubset,
        datasetSplit,
        uploadedFile,
        epochs,
        contextLength,
        learningRate,
        loraRank,
        loraAlpha,
        loraDropout,
      }) => ({
        modelType,
        selectedModel,
        trainingMethod,
        datasetSource,
        datasetFormat,
        dataset,
        datasetSubset,
        datasetSplit,
        uploadedFile,
        epochs,
        contextLength,
        learningRate,
        loraRank,
        loraAlpha,
        loraDropout,
      }),
    ),
  );

  const showLoraParams = isAdapterMethod(trainingMethod);
  const datasetName = datasetSource === "upload" ? uploadedFile : dataset;

  return (
    <div className="grid grid-cols-2 gap-3">
      <Card size="sm" className="flex flex-col rounded-2xl">
        <CardHeader className="pb-2">
          <CardTitle className="text-sm text-muted-foreground">
            System
          </CardTitle>
        </CardHeader>
        <CardContent className="flex flex-1 flex-col">
          <div className="flex items-start gap-3">
            <div className="mt-0.5 flex size-8 shrink-0 items-center justify-center rounded-lg bg-emerald-500/10">
              <HugeiconsIcon icon={GpuIcon} className="size-4 text-emerald-600" />
            </div>
            <div className="flex flex-1 flex-col">
              <span className="text-xs text-muted-foreground">GPU</span>
              <div className="flex items-center gap-2">
                <span className="text-sm font-medium">{hw.gpuName ?? "---"}</span>
                <Badge variant="secondary">{hw.vramTotalGb != null ? `${hw.vramTotalGb} GB` : "---"}</Badge>
              </div>
            </div>
          </div>
          <Separator className="my-2" />
          <div className="space-y-1 text-sm">
            <Row label="unsloth" value={hw.unsloth ?? "---"} mono />
            <Row label="torch" value={hw.torch ?? "---"} mono />
            <Row label="transformers" value={hw.transformers ?? "---"} mono />
          </div>
        </CardContent>
      </Card>

      <Card size="sm" className="flex flex-col rounded-2xl">
        <CardHeader className="pb-2">
          <CardTitle className="text-sm text-muted-foreground">Model</CardTitle>
        </CardHeader>
        <CardContent className="flex flex-1 flex-col">
          <div className="flex items-start gap-3">
            <div className="mt-0.5 flex size-8 shrink-0 items-center justify-center rounded-lg bg-emerald-500/10">
              <HugeiconsIcon icon={ChipIcon} className="size-4 text-emerald-600" />
            </div>
            <div className="flex flex-1 flex-col overflow-hidden">
              <span className="text-xs text-muted-foreground">Model</span>
              <span className="truncate text-sm font-medium">{selectedModel ?? "---"}</span>
            </div>
          </div>
          <Separator className="my-2" />
          <div className="space-y-1 text-sm">
            <Row label="Type" value={modelType} capitalize />
            <Row label="Method" value={trainingMethod === "qlora" ? "QLoRA" : trainingMethod === "lora" ? "LoRA" : "Full"} />
          </div>
        </CardContent>
      </Card>

      <Card size="sm" className="flex flex-col rounded-2xl">
        <CardHeader className="pb-2">
          <CardTitle className="text-sm text-muted-foreground">
            Dataset
          </CardTitle>
        </CardHeader>
        <CardContent className="flex flex-1 flex-col">
          <div className="flex items-start gap-3">
            <div className="mt-0.5 flex size-8 shrink-0 items-center justify-center rounded-lg bg-indigo-500/10">
              <HugeiconsIcon icon={Database02Icon} className="size-4 text-indigo-600" />
            </div>
            <div className="flex flex-1 flex-col overflow-hidden">
              <span className="text-xs text-muted-foreground">Dataset</span>
              <span className="truncate text-sm font-medium">{datasetName ?? "---"}</span>
            </div>
          </div>
          <Separator className="my-2" />
          <div className="space-y-1 text-sm">
            <Row label="Source" value={datasetSource} capitalize />
            {datasetSubset && (
              <Row label="Subset" value={datasetSubset} mono />
            )}
            {datasetSplit && (
              <Row label="Split" value={datasetSplit} mono />
            )}
            <Row label="Format" value={datasetFormat} capitalize />
          </div>
        </CardContent>
      </Card>

      <Card size="sm" className="flex flex-col rounded-2xl">
        <CardHeader className="pb-2">
          <CardTitle className="text-sm text-muted-foreground">
            Hyperparameters
          </CardTitle>
        </CardHeader>
        <CardContent className="flex flex-1 flex-col">
          <div className="flex items-start gap-3">
            <div className="mt-0.5 flex size-8 shrink-0 items-center justify-center rounded-lg bg-orange-500/10">
              <HugeiconsIcon icon={Settings04Icon} className="size-4 text-orange-600" />
            </div>
            <div className="flex flex-1 flex-col">
              <span className="text-xs text-muted-foreground">Training</span>
              <span className="text-sm font-medium">
                {trainingMethod === "qlora" ? "QLoRA" : trainingMethod === "lora" ? "LoRA" : "Full"}
              </span>
            </div>
          </div>
          <Separator className="my-2" />
          <div className="grid grid-cols-2 gap-x-6 gap-y-1 text-sm">
            <Row label="Epochs" value={epochs} mono />
            <Row label="Context" value={contextLength.toLocaleString()} mono />
            <Row label="LR" value={learningRate.toExponential()} mono />
            {showLoraParams && (
              <>
                <Row label="Rank" value={loraRank} mono />
                <Row label="Alpha" value={loraAlpha} mono />
                <Row label="Dropout" value={loraDropout} mono />
              </>
            )}
          </div>
        </CardContent>
      </Card>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/onboarding/components/wizard-content.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { STEPS } from "@/config/training";
import { useTrainingConfigStore } from "@/features/training";
import type { StepNumber } from "@/types/training";
import { DatasetStep } from "./steps/dataset-step";
import { HyperparametersStep } from "./steps/hyperparameters-step";
import { ModelSelectionStep } from "./steps/model-selection-step";
import { ModelTypeStep } from "./steps/model-type-step";
import { SummaryStep } from "./steps/summary-step";

const STEP_COMPONENTS = {
  1: ModelTypeStep,
  2: ModelSelectionStep,
  3: DatasetStep,
  4: HyperparametersStep,
  5: SummaryStep,
} as const;

const STEP_MASCOTS: Record<StepNumber, string> = {
  1: "/Sloth emojis/large sloth wave.png",
  2: "/Sloth emojis/sloth magnify final.png",
  3: "/Sloth emojis/sloth huglove large.png",
  4: "/Sloth emojis/large sloth glasses.png",
  5: "/Sloth emojis/large sloth yay.png",
};

export function WizardContent() {
  const currentStep = useTrainingConfigStore((s) => s.currentStep);
  const stepConfig = STEPS[currentStep - 1];
  const StepComponent = STEP_COMPONENTS[currentStep];
  const mascotSrc = STEP_MASCOTS[currentStep];

  return (
    <main className="flex-1 flex flex-col overflow-y-auto">
      <header className="flex flex-wrap items-start gap-3 p-4 pb-3 sm:p-6 sm:pb-4">
        <img src={mascotSrc} alt="Unsloth mascot" className="size-12 sm:size-14" />
        <div className="flex flex-col min-w-0">
          <h1 className="text-lg font-semibold sm:text-xl">{stepConfig.title}</h1>
          <p className="text-sm text-muted-foreground">
            {stepConfig.description}
          </p>
        </div>
        <p className="ml-auto hidden shrink-0 text-xs text-muted-foreground uppercase tracking-wider md:block">
          Step {currentStep} of {STEPS.length}
        </p>
      </header>
      <div className="flex-1 p-4 pt-1.5 sm:p-6 sm:pt-2">
        <StepComponent />
      </div>
    </main>
  );
}


================================================
FILE: studio/frontend/src/features/onboarding/components/wizard-footer.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import { STEPS } from "@/config/training";
import { markOnboardingDone } from "@/features/auth";
import { useTrainingConfigStore } from "@/features/training";
import { ArrowLeft02Icon, ArrowRight02Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { useNavigate } from "@tanstack/react-router";
import { useShallow } from "zustand/react/shallow";

export function WizardFooter({ onBackToSplash }: { onBackToSplash: () => void }) {
  const { currentStep, prevStep, nextStep, canProceed } = useTrainingConfigStore(
    useShallow((s) => ({
      currentStep: s.currentStep,
      prevStep: s.prevStep,
      nextStep: s.nextStep,
      canProceed: s.canProceed(),
    })),
  );
  const navigate = useNavigate();
  const isFirst = currentStep === 1;
  const isLast = currentStep === STEPS.length;

  return (
    <footer>
      <div className="flex items-center justify-between p-6">
        <Button
          variant="outline"
          className="px-4 !pl-4"
          onClick={isFirst ? onBackToSplash : prevStep}
        >
          <HugeiconsIcon icon={ArrowLeft02Icon} data-icon="inline-start" />
          Back
        </Button>
        <div className="flex items-center gap-2">
          {!isLast && (
            <Button
              variant="outline"
              className="px-4"
              onClick={() => {
                markOnboardingDone();
                navigate({ to: "/studio" });
              }}
            >
              Skip
            </Button>
          )}
          {isLast ? (
            <Button
              onClick={() => {
                markOnboardingDone();
                navigate({ to: "/studio" });
              }}
              disabled={!canProceed}
              className="px-4 !pr-4"
            >
              Go to Studio
              <HugeiconsIcon icon={ArrowRight02Icon} data-icon="inline-end" />
            </Button>
          ) : (
            <Button
              onClick={() => {
                if (currentStep === 1 && sessionStorage.getItem("unsloth_chat_only") === "1") {
                  sessionStorage.removeItem("unsloth_chat_only");
                  markOnboardingDone();
                  window.location.href = "/chat";
                } else {
                  nextStep();
                }
              }}
              className="px-4 !pl-4"
              disabled={!canProceed}
            >
              Continue
              <HugeiconsIcon icon={ArrowRight02Icon} data-icon="inline-end" />
            </Button>
          )}
        </div>
      </div>
    </footer>
  );
}


================================================
FILE: studio/frontend/src/features/onboarding/components/wizard-layout.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Card } from "@/components/ui/card";
import { useNavigate } from "@tanstack/react-router";
import { motion } from "motion/react";
import { Suspense, lazy, useEffect, useRef, useState } from "react";

import type { ConfettiRef } from "@/components/ui/confetti";
import { STEPS } from "@/config/training";
import { isOnboardingDone, markOnboardingDone } from "@/features/auth";
import { useTrainingConfigStore } from "@/features/training";
import { SplashScreen } from "./splash-screen";
import { WizardContent } from "./wizard-content";
import { WizardFooter } from "./wizard-footer";
import { WizardSidebar } from "./wizard-sidebar";

const Confetti = lazy(() =>
  import("@/components/ui/confetti").then((m) => ({ default: m.Confetti })),
);

export function WizardLayout() {
  const navigate = useNavigate();
  const [showSplash, setShowSplash] = useState(true);
  const currentStep = useTrainingConfigStore((s) => s.currentStep);
  const confettiRef = useRef<ConfettiRef>(null);
  const hasFiredRef = useRef(false);
  const isFinalStep = currentStep === STEPS.length;

  // Only redirect on initial mount — not on re-renders after markOnboardingDone()
  // which would override explicit /chat navigation from skip buttons.
  const checkedRef = useRef(false);
  useEffect(() => {
    if (!checkedRef.current) {
      checkedRef.current = true;
      if (isOnboardingDone()) {
        navigate({ to: "/studio" });
      }
    }
  }, [navigate]);

  useEffect(() => {
    if (isFinalStep && !hasFiredRef.current) {
      hasFiredRef.current = true;
      confettiRef.current?.fire({
        particleCount: 80,
        angle: 60,
        spread: 55,
        origin: { x: 0, y: 0.6 },
        colors: ["#34b482", "#26ccff", "#a25afd", "#88ff5a"],
      });
      confettiRef.current?.fire({
        particleCount: 80,
        angle: 120,
        spread: 55,
        origin: { x: 1, y: 0.6 },
        colors: ["#34b482", "#26ccff", "#a25afd", "#88ff5a"],
      });
    }
    if (!isFinalStep) {
      hasFiredRef.current = false;
    }
  }, [isFinalStep]);

  return (
    <div className="relative min-h-screen flex items-center justify-center overflow-hidden bg-gradient-to-br from-primary/5 via-background to-primary/3 p-4 sm:p-6 md:p-8">
      {showSplash && (
        <SplashScreen
          onStartOnboarding={() => setShowSplash(false)}
          onGoToStudio={() => {
            markOnboardingDone();
            window.location.href = "/studio";
          }}
        />
      )}
      <Suspense fallback={null}>
        <Confetti
          ref={confettiRef}
          manualstart={true}
          className="pointer-events-none fixed inset-0 z-50 size-full"
        />
      </Suspense>
      {!showSplash && (
        <motion.div
          className="w-full max-w-5xl"
          initial={{ opacity: 0, scale: 0.98, y: 10 }}
          animate={{ opacity: 1, scale: 1, y: 0 }}
          transition={{
            duration: 0.4,
            ease: [0.165, 0.84, 0.44, 1],
          }}
        >
          <Card className="relative z-10 w-full !gap-0 !m-0 !p-0 flex min-h-[560px] flex-col overflow-hidden shadow-border ring-1 ring-border md:min-h-[620px] md:flex-row lg:h-[660px]">
            <WizardSidebar />
            <div className="flex-1 flex flex-col">
              <WizardContent />
              <WizardFooter onBackToSplash={() => setShowSplash(true)} />
            </div>
          </Card>
        </motion.div>
      )}
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/onboarding/components/wizard-sidebar.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import { Progress } from "@/components/ui/progress";
import { STEPS } from "@/config/training";
import { markOnboardingDone } from "@/features/auth";
import { useTrainingConfigStore } from "@/features/training";
import { ArrowRight02Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { WizardStepItem } from "./wizard-step-item";

export function WizardSidebar() {
  const currentStep = useTrainingConfigStore((s) => s.currentStep);
  const progress = ((currentStep - 1) / (STEPS.length - 1)) * 100;

  return (
    <aside className="w-full shrink-0 bg-muted/70 p-4 md:w-64 md:p-6">
      <div className="flex items-center gap-3 py-1 md:py-2">
        <img
          src="https://unsloth.ai/cgi/image/unsloth_sticker_no_shadow_ldN4V4iydw00qSIIWDCUv.png?width=96&quality=80&format=auto"
          alt="Unsloth"
          className="size-12"
        />
        <div className="flex flex-col">
          <span className="font-semibold text-lg leading-tight">Unsloth</span>
          <span className="text-xs text-muted-foreground">Studio</span>
        </div>
      </div>
      <div className="mt-3 md:mt-0">
        <Progress value={progress} className="h-1.5" />
      </div>
      <p className="mt-2 text-xs text-muted-foreground md:hidden">
        Step {currentStep} of {STEPS.length}
      </p>
      <Button
        size="sm"
        className="mt-2 w-full md:hidden"
        onClick={() => {
          markOnboardingDone();
          window.location.href = "/chat";
        }}
      >
        Skip to Chat
        <HugeiconsIcon icon={ArrowRight02Icon} data-icon="inline-end" />
      </Button>
      <nav className="mt-3 hidden flex-col gap-1 md:flex">
        {STEPS.map((step) => (
          <WizardStepItem key={step.number} step={step} />
        ))}
      </nav>
      <Button
        size="sm"
        className="mt-3 hidden w-full md:flex"
        onClick={() => {
          markOnboardingDone();
          window.location.href = "/chat";
        }}
      >
        Skip to Chat
        <HugeiconsIcon icon={ArrowRight02Icon} data-icon="inline-end" />
      </Button>
    </aside>
  );
}


================================================
FILE: studio/frontend/src/features/onboarding/components/wizard-step-item.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { cn } from "@/lib/utils";
import { useTrainingConfigStore } from "@/features/training";
import type { StepConfig, StepNumber } from "@/types/training";
import { useShallow } from "zustand/react/shallow";

interface WizardStepItemProps {
  step: StepConfig;
}

export function WizardStepItem({ step }: WizardStepItemProps) {
  const { currentStep, setStep } = useTrainingConfigStore(
    useShallow((s) => ({ currentStep: s.currentStep, setStep: s.setStep })),
  );
  const isActive = currentStep === step.number;
  const isCompleted = currentStep > step.number;
  const canClick = isCompleted;

  return (
    <button
      type="button"
      onClick={() => canClick && setStep(step.number as StepNumber)}
      disabled={!canClick}
      className={cn(
        "flex items-start gap-3 text-left w-full py-2 transition-colors",
        canClick && "cursor-pointer hover:opacity-80",
        !(canClick || isActive) && "opacity-50",
      )}
    >
      <div
        className={cn(
          "size-5 rounded-full flex items-center justify-center text-xs font-medium shrink-0 mt-0.5 transition-colors",
          isActive && "bg-primary text-primary-foreground",
          isCompleted && "bg-primary/20 text-primary",
          !(isActive || isCompleted) && "bg-muted text-muted-foreground",
        )}
      >
        {isCompleted ? "✓" : step.number}
      </div>
      <div className="flex flex-col gap-1">
        <span
          className={cn(
            "text-sm font-medium",
            isActive && "text-foreground",
            !isActive && "text-muted-foreground",
          )}
        >
          {step.title}
        </span>
        <span className="text-xs text-muted-foreground">{step.subtitle}</span>
      </div>
    </button>
  );
}


================================================
FILE: studio/frontend/src/features/onboarding/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export { WizardLayout } from "./components/wizard-layout";


================================================
FILE: studio/frontend/src/features/recipe-studio/api/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { authFetch } from "@/features/auth";

const DEFAULT_BASE = "/api/data-recipe";

export const DATA_DESIGNER_API_BASE =
  import.meta.env.VITE_DATA_DESIGNER_API ?? DEFAULT_BASE;

export type JobCreateResponse = {
  // biome-ignore lint/style/useNamingConvention: api schema
  job_id: string;
};

export type PublishRecipeJobRequest = {
  repo_id: string;
  description: string;
  hf_token?: string | null;
  private?: boolean;
  artifact_path?: string | null;
};

export type PublishRecipeJobResponse = {
  success: boolean;
  url: string;
  message: string;
};

export type JobStatusResponse = {
  // biome-ignore lint/style/useNamingConvention: api schema
  job_id: string;
  status: string;
  stage?: string | null;
  // biome-ignore lint/style/useNamingConvention: api schema
  current_column?: string | null;
  // biome-ignore lint/style/useNamingConvention: api schema
  completed_columns?: string[] | null;
  batch?: {
    idx?: number | null;
    total?: number | null;
  };
  progress?: {
    done?: number | null;
    total?: number | null;
    percent?: number | null;
    // biome-ignore lint/style/useNamingConvention: api schema
    eta_sec?: number | null;
    rate?: number | null;
    ok?: number | null;
    failed?: number | null;
  };
  // biome-ignore lint/style/useNamingConvention: api schema
  column_progress?: {
    done?: number | null;
    total?: number | null;
    percent?: number | null;
    // biome-ignore lint/style/useNamingConvention: api schema
    eta_sec?: number | null;
    rate?: number | null;
    ok?: number | null;
    failed?: number | null;
  };
  // biome-ignore lint/style/useNamingConvention: api schema
  model_usage?: Record<string, unknown>;
  rows?: number | null;
  cols?: number | null;
  error?: string | null;
  // biome-ignore lint/style/useNamingConvention: api schema
  has_analysis?: boolean;
  // biome-ignore lint/style/useNamingConvention: api schema
  dataset_rows?: number | null;
  // biome-ignore lint/style/useNamingConvention: api schema
  artifact_path?: string | null;
  // biome-ignore lint/style/useNamingConvention: api schema
  started_at?: number | null;
  // biome-ignore lint/style/useNamingConvention: api schema
  finished_at?: number | null;
};

export type JobDatasetResponse = {
  dataset?: unknown[];
  total?: number;
  limit?: number;
  offset?: number;
};

export type JobEvent = {
  event: string;
  id: number | null;
  payload: Record<string, unknown>;
};

export type SeedInspectRequest = {
  // biome-ignore lint/style/useNamingConvention: api schema
  dataset_name: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  hf_token?: string;
  subset?: string;
  split?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  preview_size?: number;
};

export type SeedInspectUploadRequest = {
  filename: string;
  // base64 payload without data URL prefix
  content_base64: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  preview_size?: number;
  // biome-ignore lint/style/useNamingConvention: api schema
  seed_source_type?: "local" | "unstructured";
  // biome-ignore lint/style/useNamingConvention: api schema
  unstructured_chunk_size?: number;
  // biome-ignore lint/style/useNamingConvention: api schema
  unstructured_chunk_overlap?: number;
};

export type SeedInspectResponse = {
  // biome-ignore lint/style/useNamingConvention: api schema
  dataset_name: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  resolved_path: string;
  columns: string[];
  // biome-ignore lint/style/useNamingConvention: api schema
  preview_rows: Record<string, unknown>[];
  split?: string | null;
  subset?: string | null;
};

export type ValidateError = {
  message: string;
  path?: string | null;
  code?: string | null;
};

export type ValidateResponse = {
  valid: boolean;
  errors: ValidateError[];
  // biome-ignore lint/style/useNamingConvention: api schema
  raw_detail?: string | null;
};

export type McpToolsListRequest = {
  // biome-ignore lint/style/useNamingConvention: api schema
  mcp_providers: Record<string, unknown>[];
  // biome-ignore lint/style/useNamingConvention: api schema
  timeout_sec?: number;
};

export type McpToolsProviderResult = {
  name: string;
  tools: string[];
  error?: string | null;
};

export type McpToolsListResponse = {
  providers: McpToolsProviderResult[];
  // biome-ignore lint/style/useNamingConvention: api schema
  duplicate_tools: Record<string, string[]>;
};

async function parseErrorResponse(response: Response): Promise<string> {
  const text = (await response.text()).trim();
  if (!text) {
    return "Request failed.";
  }
  try {
    const parsed = JSON.parse(text) as {
      detail?: string;
      message?: string;
      // biome-ignore lint/style/useNamingConvention: api schema
      raw_detail?: string;
    };
    return (
      parsed.detail ??
      parsed.message ??
      parsed.raw_detail ??
      text
    );
  } catch {
    return text;
  }
}

async function postJson<T>(path: string, payload: unknown): Promise<T> {
  const response = await authFetch(`${DATA_DESIGNER_API_BASE}${path}`, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify(payload),
  });

  if (!response.ok) {
    throw new Error(await parseErrorResponse(response));
  }

  return response.json();
}

async function getJson<T>(path: string): Promise<T> {
  const response = await authFetch(`${DATA_DESIGNER_API_BASE}${path}`);
  if (!response.ok) {
    throw new Error(await parseErrorResponse(response));
  }
  return response.json();
}

function parseJobEvent(rawEvent: string): JobEvent | null {
  const lines = rawEvent.split(/\r?\n/);
  let eventName = "message";
  let id: number | null = null;
  const dataLines: string[] = [];

  for (const line of lines) {
    if (!line) {
      continue;
    }
    if (line.startsWith("event:")) {
      eventName = line.slice(6).trim() || "message";
      continue;
    }
    if (line.startsWith("id:")) {
      const value = Number(line.slice(3).trim());
      id = Number.isFinite(value) ? value : null;
      continue;
    }
    if (line.startsWith("data:")) {
      dataLines.push(line.slice(5).trimStart());
    }
  }

  if (dataLines.length === 0) {
    return null;
  }
  let payload: Record<string, unknown>;
  try {
    payload = JSON.parse(dataLines.join("\n")) as Record<string, unknown>;
  } catch {
    return null;
  }
  return {
    event: eventName,
    id,
    payload,
  };
}

export async function validateRecipe(
  payload: unknown,
): Promise<ValidateResponse> {
  return postJson<ValidateResponse>("/validate", payload);
}

export async function createRecipeJob(payload: unknown): Promise<JobCreateResponse> {
  return postJson<JobCreateResponse>("/jobs", payload);
}

export async function getRecipeJobStatus(jobId: string): Promise<JobStatusResponse> {
  return getJson<JobStatusResponse>(`/jobs/${jobId}/status`);
}

export async function getRecipeJobAnalysis(
  jobId: string,
): Promise<Record<string, unknown>> {
  return getJson<Record<string, unknown>>(`/jobs/${jobId}/analysis`);
}

export async function getRecipeJobDataset(
  jobId: string,
  options?: {
    limit?: number;
    offset?: number;
  },
): Promise<JobDatasetResponse> {
  const limit = options?.limit ?? 20;
  const offset = options?.offset ?? 0;
  return getJson<JobDatasetResponse>(
    `/jobs/${jobId}/dataset?limit=${limit}&offset=${offset}`,
  );
}

export async function cancelRecipeJob(jobId: string): Promise<JobStatusResponse> {
  return postJson<JobStatusResponse>(`/jobs/${jobId}/cancel`, {});
}

export async function publishRecipeJob(
  jobId: string,
  payload: PublishRecipeJobRequest,
): Promise<PublishRecipeJobResponse> {
  return postJson<PublishRecipeJobResponse>(`/jobs/${jobId}/publish`, payload);
}

export async function inspectSeedDataset(
  payload: SeedInspectRequest,
): Promise<SeedInspectResponse> {
  return postJson<SeedInspectResponse>("/seed/inspect", payload);
}

export async function inspectSeedUpload(
  payload: SeedInspectUploadRequest,
): Promise<SeedInspectResponse> {
  return postJson<SeedInspectResponse>("/seed/inspect-upload", payload);
}

export async function listMcpTools(
  payload: McpToolsListRequest,
): Promise<McpToolsListResponse> {
  return postJson<McpToolsListResponse>("/mcp/tools", payload);
}

export async function streamRecipeJobEvents(options: {
  jobId: string;
  signal: AbortSignal;
  lastEventId?: number | null;
  onOpen?: () => void;
  onEvent: (event: JobEvent) => void;
}): Promise<void> {
  const headers = new Headers();
  let query = "";
  if (typeof options.lastEventId === "number") {
    headers.set("Last-Event-ID", String(options.lastEventId));
    query = `?after=${options.lastEventId}`;
  }

  const response = await authFetch(
    `${DATA_DESIGNER_API_BASE}/jobs/${options.jobId}/events${query}`,
    {
      method: "GET",
      headers,
      signal: options.signal,
    },
  );
  if (!response.ok) {
    throw new Error(await parseErrorResponse(response));
  }
  if (!response.body) {
    throw new Error("Job stream unavailable.");
  }

  options.onOpen?.();

  const reader = response.body.getReader();
  const decoder = new TextDecoder();
  let buffer = "";

  while (true) {
    const { value, done } = await reader.read();
    if (done) {
      break;
    }
    buffer += decoder.decode(value, { stream: true });
    let separatorIndex = buffer.search(/\r?\n\r?\n/);
    while (separatorIndex >= 0) {
      const rawEvent = buffer.slice(0, separatorIndex);
      const separatorLength = buffer[separatorIndex] === "\r" ? 4 : 2;
      buffer = buffer.slice(separatorIndex + separatorLength);

      if (rawEvent.startsWith("retry:")) {
        separatorIndex = buffer.search(/\r?\n\r?\n/);
        continue;
      }

      const parsed = parseJobEvent(rawEvent);
      if (parsed) {
        options.onEvent(parsed);
      }
      separatorIndex = buffer.search(/\r?\n\r?\n/);
    }
  }
}

// NOTE: preview endpoints removed from harness.


================================================
FILE: studio/frontend/src/features/recipe-studio/blocks/definitions.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  BalanceScaleIcon,
  Clock01Icon,
  CodeIcon,
  CodeSimpleIcon,
  DiceFaces03Icon,
  DocumentAttachmentIcon,
  DocumentCodeIcon,
  EqualSignIcon,
  FingerPrintIcon,
  FunctionIcon,
  Plug01Icon,
  Parabola02Icon,
  PencilEdit02Icon,
  Plant01Icon,
  Shield02Icon,
  Tag01Icon,
  TagsIcon,
  UserAccountIcon,
} from "@hugeicons/core-free-icons";
import type {
  LlmType,
  NodeConfig,
  SamplerType,
  SeedSourceType,
} from "../types";
import {
  makeExpressionConfig,
  makeLlmConfig,
  makeMarkdownNoteConfig,
  makeModelConfig,
  makeModelProviderConfig,
  makeToolProfileConfig,
  makeSamplerConfig,
  makeSeedConfig,
  makeValidatorConfig,
} from "../utils";

export type BlockKind =
  | "sampler"
  | "llm"
  | "validator"
  | "expression"
  | "seed"
  | "note";
export type BlockType =
  | SamplerType
  | LlmType
  | "validator_python"
  | "validator_sql"
  | "validator_oxc"
  | "expression"
  | "markdown_note"
  | "seed"
  | "seed_hf"
  | "seed_local"
  | "seed_unstructured"
  | "model_provider"
  | "model_config"
  | "tool_config";

export type SeedBlockType = "seed_hf" | "seed_local" | "seed_unstructured";

type IconType = typeof CodeIcon;

export type BlockGroup = {
  kind: BlockKind;
  title: string;
  description: string;
  icon: IconType;
};

export type BlockDialogKey =
  | "seed"
  | "markdown_note"
  | "category"
  | "subcategory"
  | "uniform"
  | "gaussian"
  | "bernoulli"
  | "datetime"
  | "timedelta"
  | "uuid"
  | "person"
  | "llm"
  | "validator"
  | "model_provider"
  | "model_config"
  | "tool_config"
  | "expression";

export type BlockDefinition = {
  kind: BlockKind;
  type: BlockType;
  title: string;
  description: string;
  icon: IconType;
  dialogKey: BlockDialogKey;
  createConfig: (id: string, existing: NodeConfig[]) => NodeConfig;
};

export const BLOCK_GROUPS: BlockGroup[] = [
  {
    kind: "sampler",
    title: "Generated fields",
    description: "Create fields from lists, ranges, and reusable patterns.",
    icon: DiceFaces03Icon,
  },
  {
    kind: "seed",
    title: "Source data",
    description: "Start from an existing dataset or file.",
    icon: Plant01Icon,
  },
  {
    kind: "llm",
    title: "AI generation",
    description: "Generate content, connect models, and manage tools.",
    icon: PencilEdit02Icon,
  },
  {
    kind: "validator",
    title: "Checks",
    description: "Lint or filter generated code as it moves through the recipe.",
    icon: Shield02Icon,
  },
  {
    kind: "expression",
    title: "Formulas",
    description: "Build a field from other fields.",
    icon: FunctionIcon,
  },
  {
    kind: "note",
    title: "Notes",
    description: "Add markdown notes to document your flow.",
    icon: PencilEdit02Icon,
  },
];

const BLOCK_DEFINITIONS: BlockDefinition[] = [
  {
    kind: "seed",
    type: "seed_hf",
    title: "Hugging Face dataset",
    description: "Use rows from a Hugging Face dataset as source data.",
    icon: Plant01Icon,
    dialogKey: "seed",
    createConfig: (id, existing) => makeSeedConfig(id, existing, "hf"),
  },
  {
    kind: "seed",
    type: "seed_local",
    title: "CSV or JSON file",
    description: "Upload CSV, JSON, or JSONL and use its rows as source data.",
    icon: DocumentCodeIcon,
    dialogKey: "seed",
    createConfig: (id, existing) => makeSeedConfig(id, existing, "local"),
  },
  {
    kind: "seed",
    type: "seed_unstructured",
    title: "Document file",
    description: "Upload PDF, DOCX, or TXT and turn it into source rows.",
    icon: DocumentAttachmentIcon,
    dialogKey: "seed",
    createConfig: (id, existing) => makeSeedConfig(id, existing, "unstructured"),
  },
  {
    kind: "sampler",
    type: "category",
    title: "Category",
    description: "Generate values from a list you define, with optional weights or rules.",
    icon: Tag01Icon,
    dialogKey: "category",
    createConfig: (id, existing) => makeSamplerConfig(id, "category", existing),
  },
  {
    kind: "sampler",
    type: "subcategory",
    title: "Subcategory",
    description: "Generate values from groups you define for each category.",
    icon: TagsIcon,
    dialogKey: "subcategory",
    createConfig: (id, existing) => makeSamplerConfig(id, "subcategory", existing),
  },
  {
    kind: "sampler",
    type: "uniform",
    title: "Random number",
    description: "Generate a number anywhere between a minimum and maximum.",
    icon: EqualSignIcon,
    dialogKey: "uniform",
    createConfig: (id, existing) => makeSamplerConfig(id, "uniform", existing),
  },
  {
    kind: "sampler",
    type: "gaussian",
    title: "Bell-curve number",
    description: "Generate numbers around an average value.",
    icon: Parabola02Icon,
    dialogKey: "gaussian",
    createConfig: (id, existing) => makeSamplerConfig(id, "gaussian", existing),
  },
  {
    kind: "sampler",
    type: "bernoulli",
    title: "Yes/no value",
    description: "Generate a binary result from a probability.",
    icon: EqualSignIcon,
    dialogKey: "bernoulli",
    createConfig: (id, existing) => makeSamplerConfig(id, "bernoulli", existing),
  },
  {
    kind: "sampler",
    type: "datetime",
    title: "Date and time",
    description: "Generate timestamps inside a date range.",
    icon: Clock01Icon,
    dialogKey: "datetime",
    createConfig: (id, existing) => makeSamplerConfig(id, "datetime", existing),
  },
  {
    kind: "sampler",
    type: "timedelta",
    title: "Time offset",
    description: "Generate a time difference from another date field.",
    icon: Clock01Icon,
    dialogKey: "timedelta",
    createConfig: (id, existing) => makeSamplerConfig(id, "timedelta", existing),
  },
  {
    kind: "sampler",
    type: "uuid",
    title: "Unique ID",
    description: "Generate unique identifiers.",
    icon: FingerPrintIcon,
    dialogKey: "uuid",
    createConfig: (id, existing) => makeSamplerConfig(id, "uuid", existing),
  },
  {
    kind: "sampler",
    type: "person",
    title: "Synthetic person",
    description: "Generate realistic person details.",
    icon: UserAccountIcon,
    dialogKey: "person",
    createConfig: (id, existing) => makeSamplerConfig(id, "person", existing),
  },
  {
    kind: "llm",
    type: "text",
    title: "AI text",
    description: "Generate text from your prompt.",
    icon: PencilEdit02Icon,
    dialogKey: "llm",
    createConfig: (id, existing) => makeLlmConfig(id, "text", existing),
  },
  {
    kind: "llm",
    type: "structured",
    title: "AI structured data",
    description: "Generate JSON that follows a response format.",
    icon: CodeIcon,
    dialogKey: "llm",
    createConfig: (id, existing) => makeLlmConfig(id, "structured", existing),
  },
  {
    kind: "llm",
    type: "code",
    title: "AI code",
    description: "Generate code in the language you choose.",
    icon: CodeSimpleIcon,
    dialogKey: "llm",
    createConfig: (id, existing) => makeLlmConfig(id, "code", existing),
  },
  {
    kind: "llm",
    type: "judge",
    title: "AI scorer",
    description: "Score outputs against your criteria.",
    icon: BalanceScaleIcon,
    dialogKey: "llm",
    createConfig: (id, existing) => makeLlmConfig(id, "judge", existing),
  },
  {
    kind: "llm",
    type: "model_provider",
    title: "Provider connection",
    description: "Choose where model requests go and how to sign in.",
    icon: Shield02Icon,
    dialogKey: "model_provider",
    createConfig: (id, existing) => makeModelProviderConfig(id, existing),
  },
  {
    kind: "llm",
    type: "model_config",
    title: "Model preset",
    description: "Pick a model and save reusable generation settings.",
    icon: Plant01Icon,
    dialogKey: "model_config",
    createConfig: (id, existing) => makeModelConfig(id, existing),
  },
  {
    kind: "llm",
    type: "tool_config",
    title: "Tool access",
    description: "Choose which tools an AI step can use.",
    icon: Plug01Icon,
    dialogKey: "tool_config",
    createConfig: (id, existing) => makeToolProfileConfig(id, existing),
  },
  {
    kind: "validator",
    type: "validator_python",
    title: "Python check",
    description: "Lint generated Python and filter out rows that fail.",
    icon: Shield02Icon,
    dialogKey: "validator",
    createConfig: (id, existing) =>
      makeValidatorConfig(id, "code", "python", existing),
  },
  {
    kind: "validator",
    type: "validator_sql",
    title: "SQL check",
    description: "Lint generated SQL and filter out rows that fail.",
    icon: Shield02Icon,
    dialogKey: "validator",
    createConfig: (id, existing) =>
      makeValidatorConfig(id, "code", "sql:sqlite", existing),
  },
  {
    kind: "validator",
    type: "validator_oxc",
    title: "JS/TS check",
    description: "Lint generated JavaScript or TypeScript and filter out rows that fail.",
    icon: Shield02Icon,
    dialogKey: "validator",
    createConfig: (id, existing) =>
      makeValidatorConfig(id, "oxc", "javascript", existing),
  },
  {
    kind: "expression",
    type: "expression",
    title: "Formula",
    description: "Build or transform a field using other fields.",
    icon: FunctionIcon,
    dialogKey: "expression",
    createConfig: (id, existing) => makeExpressionConfig(id, existing),
  },
  {
    kind: "note",
    type: "markdown_note",
    title: "Note",
    description: "Add a note to the canvas. Notes do not affect the run.",
    icon: PencilEdit02Icon,
    dialogKey: "markdown_note",
    createConfig: (id, existing) => makeMarkdownNoteConfig(id, existing),
  },
];

export function getBlocksForKind(kind: BlockKind): BlockDefinition[] {
  return BLOCK_DEFINITIONS.filter((block) => block.kind === kind);
}

export function getBlockDefinition(
  kind: BlockKind,
  type: BlockType,
): BlockDefinition | null {
  return (
    BLOCK_DEFINITIONS.find((block) => block.kind === kind && block.type === type) ??
    null
  );
}

export function getBlockDefinitionForConfig(
  config: NodeConfig | null,
): BlockDefinition | null {
  if (!config) {
    return null;
  }
  if (config.kind === "seed") {
    const seedType: Record<SeedSourceType, SeedBlockType> = {
      hf: "seed_hf",
      local: "seed_local",
      unstructured: "seed_unstructured",
    };
    return getBlockDefinition("seed", seedType[config.seed_source_type ?? "hf"]);
  }
  if (config.kind === "sampler") {
    const samplerType =
      config.sampler_type === "person_from_faker" ? "person" : config.sampler_type;
    return getBlockDefinition("sampler", samplerType);
  }
  if (config.kind === "llm") {
    return getBlockDefinition("llm", config.llm_type);
  }
  if (config.kind === "validator") {
    if (config.validator_type === "oxc") {
      return getBlockDefinition("validator", "validator_oxc");
    }
    const isSql = config.code_lang.startsWith("sql:");
    return getBlockDefinition(
      "validator",
      isSql ? "validator_sql" : "validator_python",
    );
  }
  if (config.kind === "model_provider") {
    return getBlockDefinition("llm", "model_provider");
  }
  if (config.kind === "model_config") {
    return getBlockDefinition("llm", "model_config");
  }
  if (config.kind === "tool_config") {
    return getBlockDefinition("llm", "tool_config");
  }
  if (config.kind === "markdown_note") {
    return getBlockDefinition("note", "markdown_note");
  }
  return getBlockDefinition("expression", "expression");
}


================================================
FILE: studio/frontend/src/features/recipe-studio/blocks/registry.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export type {
  BlockDefinition,
  BlockDialogKey,
  BlockGroup,
  BlockKind,
  BlockType,
  SeedBlockType,
} from "./definitions";
export {
  BLOCK_GROUPS,
  getBlockDefinition,
  getBlockDefinitionForConfig,
  getBlocksForKind,
} from "./definitions";
export { renderBlockDialog } from "./render-dialog";


================================================
FILE: studio/frontend/src/features/recipe-studio/blocks/render-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ReactElement } from "react";
import type { NodeConfig, SamplerConfig } from "../types";
import { getBlockDefinitionForConfig } from "./definitions";
import { ExpressionDialog } from "../dialogs/expression/expression-dialog";
import { LlmDialog } from "../dialogs/llm/llm-dialog";
import { ModelConfigDialog } from "../dialogs/models/model-config-dialog";
import { ModelProviderDialog } from "../dialogs/models/model-provider-dialog";
import { SeedDialog } from "../dialogs/seed/seed-dialog";
import { CategoryDialog } from "../dialogs/samplers/category-dialog";
import { DatetimeDialog } from "../dialogs/samplers/datetime-dialog";
import { BernoulliDialog } from "../dialogs/samplers/bernoulli-dialog";
import { GaussianDialog } from "../dialogs/samplers/gaussian-dialog";
import { PersonDialog } from "../dialogs/samplers/person-dialog";
import { SubcategoryDialog } from "../dialogs/samplers/subcategory-dialog";
import { TimedeltaDialog } from "../dialogs/samplers/timedelta-dialog";
import { UniformDialog } from "../dialogs/samplers/uniform-dialog";
import { UuidDialog } from "../dialogs/samplers/uuid-dialog";
import { MarkdownNoteDialog } from "../dialogs/markdown-note/markdown-note-dialog";
import { ToolProfileDialog } from "../dialogs/tool-profile/tool-profile-dialog";
import { ValidatorDialog } from "../dialogs/validators/validator-dialog";

export function renderBlockDialog(
  config: NodeConfig | null,
  open: boolean,
  categoryOptions: SamplerConfig[],
  modelConfigAliases: string[],
  modelProviderOptions: string[],
  toolProfileAliases: string[],
  datetimeOptions: string[],
  onUpdate: (id: string, patch: Partial<NodeConfig>) => void,
): ReactElement | null {
  const definition = getBlockDefinitionForConfig(config);
  if (!definition || !config) {
    return null;
  }

  const update = (patch: Partial<NodeConfig>) => onUpdate(config.id, patch);

  switch (definition.dialogKey) {
    case "seed":
      return config.kind === "seed" ? (
        <SeedDialog config={config} onUpdate={update} open={open} />
      ) : null;
    case "category":
      return config.kind === "sampler" && config.sampler_type === "category" ? (
        <CategoryDialog key={config.id} config={config} onUpdate={update} />
      ) : null;
    case "subcategory":
      return config.kind === "sampler" && config.sampler_type === "subcategory" ? (
        <SubcategoryDialog
          config={config}
          categoryOptions={categoryOptions}
          onUpdate={update}
        />
      ) : null;
    case "uniform":
      return config.kind === "sampler" && config.sampler_type === "uniform" ? (
        <UniformDialog config={config} onUpdate={update} />
      ) : null;
    case "gaussian":
      return config.kind === "sampler" && config.sampler_type === "gaussian" ? (
        <GaussianDialog config={config} onUpdate={update} />
      ) : null;
    case "bernoulli":
      return config.kind === "sampler" && config.sampler_type === "bernoulli" ? (
        <BernoulliDialog config={config} onUpdate={update} />
      ) : null;
    case "datetime":
      return config.kind === "sampler" && config.sampler_type === "datetime" ? (
        <DatetimeDialog config={config} onUpdate={update} />
      ) : null;
    case "timedelta":
      return config.kind === "sampler" && config.sampler_type === "timedelta" ? (
        <TimedeltaDialog
          config={config}
          datetimeOptions={datetimeOptions}
          onUpdate={update}
        />
      ) : null;
    case "uuid":
      return config.kind === "sampler" && config.sampler_type === "uuid" ? (
        <UuidDialog config={config} onUpdate={update} />
      ) : null;
    case "person":
      return config.kind === "sampler" &&
        (config.sampler_type === "person" ||
          config.sampler_type === "person_from_faker") ? (
        <PersonDialog config={config} onUpdate={update} />
      ) : null;
    case "llm":
      return config.kind === "llm" ? (
        <LlmDialog
          config={config}
          modelConfigAliases={modelConfigAliases}
          modelProviderOptions={modelProviderOptions}
          toolProfileAliases={toolProfileAliases}
          onUpdate={update}
        />
      ) : null;
    case "model_provider":
      return config.kind === "model_provider" ? (
        <ModelProviderDialog config={config} onUpdate={update} />
      ) : null;
    case "model_config":
      return config.kind === "model_config" ? (
        <ModelConfigDialog
          config={config}
          providerOptions={modelProviderOptions}
          onUpdate={update}
        />
      ) : null;
    case "tool_config":
      return config.kind === "tool_config" ? (
        <ToolProfileDialog config={config} onUpdate={update} />
      ) : null;
    case "expression":
      return config.kind === "expression" ? (
        <ExpressionDialog config={config} onUpdate={update} />
      ) : null;
    case "validator":
      return config.kind === "validator" ? (
        <ValidatorDialog config={config} onUpdate={update} />
      ) : null;
    case "markdown_note":
      return config.kind === "markdown_note" ? (
        <MarkdownNoteDialog config={config} onUpdate={update} />
      ) : null;
  }
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/block-sheet.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import { Badge } from "@/components/ui/badge";
import { Input } from "@/components/ui/input";
import {
  Sheet,
  SheetContent,
  SheetHeader,
  SheetTitle,
  SheetTrigger,
} from "@/components/ui/sheet";
import {
  ArrowLeft02Icon,
  ArrowRight01Icon,
  CodeIcon,
  Copy02Icon,
  type Database02Icon,
  DragDropVerticalIcon,
  DocumentAttachmentIcon,
  PlusSignIcon,
  Search01Icon,
  Tick02Icon,
  Upload01Icon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import {
  useCallback,
  type DragEvent as ReactDragEvent,
  type ReactElement,
  useMemo,
  useState,
} from "react";
import { RECIPE_FLOATING_ICON_BUTTON_CLASS } from "./recipe-floating-icon-button-class";
import type { LlmType, SamplerType } from "../types";
import {
  BLOCK_GROUPS,
  getBlocksForKind,
  type BlockType,
  type SeedBlockType,
} from "../blocks/registry";
import {
  RECIPE_STUDIO_ONBOARDING_ICON_TONE,
  RECIPE_STUDIO_ONBOARDING_SURFACE_TONE,
} from "../utils/ui-tones";

type SheetView =
  | "root"
  | "sampler"
  | "seed"
  | "llm"
  | "validator"
  | "expression"
  | "note"
  | "processor";
type SheetKind =
  | "sampler"
  | "seed"
  | "llm"
  | "validator"
  | "expression"
  | "note";
type RootSheetView = Exclude<SheetView, "root">;
type RootGroup = {
  kind: RootSheetView;
  title: string;
  description: string;
  icon: typeof Database02Icon;
};

type BlockSheetProps = {
  container: HTMLDivElement | null;
  sheetView: SheetView;
  onViewChange: (sheetView: SheetView) => void;
  open?: boolean;
  onOpenChange?: (open: boolean) => void;
  onAddSampler: (type: SamplerType) => void;
  onAddSeed: (type: SeedBlockType) => void;
  onAddLlm: (type: LlmType) => void;
  onAddModelProvider: () => void;
  onAddModelConfig: () => void;
  onAddToolProfile: () => void;
  onAddExpression: () => void;
  onAddValidator: (
    type: "validator_python" | "validator_sql" | "validator_oxc",
  ) => void;
  onAddMarkdownNote: () => void;
  onOpenProcessors: () => void;
  copied: boolean;
  onCopy: () => void;
  onImport: () => void;
};

export const RECIPE_BLOCK_DND_MIME = "application/x-recipe-studio-block";
export type RecipeBlockDragPayload = {
  kind: SheetKind;
  type: BlockType;
};

function getSheetTitle(sheetView: SheetView): string {
  if (sheetView === "root") {
    return "Add a step";
  }
  if (sheetView === "sampler") {
    return "Generated fields";
  }
  if (sheetView === "seed") {
    return "Source data";
  }
  if (sheetView === "expression") {
    return "Formulas";
  }
  if (sheetView === "validator") {
    return "Checks";
  }
  if (sheetView === "note") {
    return "Notes";
  }
  if (sheetView === "processor") {
    return "Processor blocks";
  }
  return "AI generation";
}

const VIEW_KIND: Record<SheetView, SheetKind | null> = {
  root: null,
  sampler: "sampler",
  seed: "seed",
  llm: "llm",
  validator: "validator",
  expression: "expression",
  note: "note",
  processor: null,
};

const ROOT_GROUPS: RootGroup[] = [...BLOCK_GROUPS];
const ROOT_GROUPS_WITH_SEED_FIRST: RootGroup[] = [
  ...ROOT_GROUPS.filter((group) => group.kind === "seed"),
  ...ROOT_GROUPS.filter((group) => group.kind !== "seed"),
];
const SEARCHABLE_KINDS: SheetKind[] = [
  "sampler",
  "seed",
  "llm",
  "validator",
  "expression",
  "note",
];
const PROCESSOR_TITLE = "Final dataset shape";
const PROCESSOR_DESCRIPTION = "Rename, reorder, or reshape the final dataset.";
const SHOW_PROCESSOR_IN_BLOCK_SHEET = false;
const LLM_SETUP_TYPES = new Set<BlockType>([
  "model_provider",
  "model_config",
  "tool_config",
]);

function BlockSheetButton({
  icon,
  title,
  description,
  onClick,
  isActive = false,
  draggable = false,
  onDragStart,
  trailing = "chevron",
  disabled = false,
  badge,
}: {
  icon: typeof Database02Icon;
  title: string;
  description: string;
  onClick: () => void;
  isActive?: boolean;
  draggable?: boolean;
  onDragStart?: (event: ReactDragEvent<HTMLButtonElement>) => void;
  trailing?: "chevron" | "drag" | "none";
  disabled?: boolean;
  badge?: string;
}): ReactElement {
  return (
    <button
      type="button"
      onClick={disabled ? undefined : onClick}
      disabled={disabled}
      draggable={disabled ? false : draggable}
      onDragStart={disabled ? undefined : onDragStart}
      className={`flex w-full items-center gap-3 border-l-2 bg-background px-3 py-3 text-left transition ${
        disabled ? "cursor-not-allowed opacity-60" : "hover:bg-muted/35"
      } ${
        isActive
          ? "border-emerald-500"
          : disabled
            ? "border-transparent"
            : "border-transparent hover:border-border/60"
      } ${draggable ? "cursor-grab active:cursor-grabbing" : ""}`}
    >
      <div className="flex size-9 items-center justify-center rounded-xl text-foreground/70">
        <HugeiconsIcon icon={icon} className="size-5" />
      </div>
      <div className="min-w-0 flex-1">
        <div className="flex items-center gap-2">
          <p className="break-words text-sm font-semibold text-foreground">
            {title}
          </p>
          {badge ? (
            <Badge variant="outline" className="rounded-full text-[10px]">
              {badge}
            </Badge>
          ) : null}
        </div>
        <p className="break-words text-[11px] text-muted-foreground">
          {description}
        </p>
      </div>
      {trailing === "chevron" ? (
        <HugeiconsIcon
          icon={ArrowRight01Icon}
          className="size-3.5 text-muted-foreground"
        />
      ) : trailing === "drag" ? (
        <HugeiconsIcon
          icon={DragDropVerticalIcon}
          strokeWidth={3.5}
          className="size-5 text-foreground"
        />
      ) : null}
    </button>
  );
}

export function BlockSheet({
  container,
  sheetView,
  onViewChange,
  open,
  onOpenChange,
  onAddSampler,
  onAddSeed,
  onAddLlm,
  onAddModelProvider,
  onAddModelConfig,
  onAddToolProfile,
  onAddExpression,
  onAddValidator,
  onAddMarkdownNote,
  onOpenProcessors,
  copied,
  onCopy,
  onImport,
}: BlockSheetProps): ReactElement {
  const sheetTitle = getSheetTitle(sheetView);
  const [uncontrolledOpen, setUncontrolledOpen] = useState(false);
  const [search, setSearch] = useState("");
  const expressionBlocks = useMemo(() => getBlocksForKind("expression"), []);
  const noteBlocks = useMemo(() => getBlocksForKind("note"), []);
  const seedBlocks = useMemo(() => getBlocksForKind("seed"), []);
  const isControlled = typeof open === "boolean";
  const sheetOpen = isControlled ? (open as boolean) : uncontrolledOpen;
  const normalizedSearch = search.trim().toLowerCase();
  const hasSearch = normalizedSearch.length > 0;
  const isProcessorView = sheetView === "processor";
  const isRootView = sheetView === "root";
  const isScopedBlockView = !isRootView && !isProcessorView;

  const setSheetOpen = (nextOpen: boolean) => {
    if (!isControlled) {
      setUncontrolledOpen(nextOpen);
    }
    onOpenChange?.(nextOpen);
  };
  const matchesSearch = useCallback(
    (title: string, description: string) =>
      title.toLowerCase().includes(normalizedSearch) ||
      description.toLowerCase().includes(normalizedSearch),
    [normalizedSearch],
  );

  const searchableBlocks = useMemo(
    () => SEARCHABLE_KINDS.flatMap((kind) => getBlocksForKind(kind)),
    [],
  );
  const rootSearchBlocks = useMemo(() => {
    if (!hasSearch) {
      return [];
    }
    return searchableBlocks.filter((item) =>
      matchesSearch(item.title, item.description),
    );
  }, [hasSearch, matchesSearch, searchableBlocks]);

  const scopedBlocks = useMemo(() => {
    if (!isScopedBlockView) {
      return [];
    }
    const blocks = getBlocksForKind(VIEW_KIND[sheetView] ?? "sampler");
    if (!hasSearch) {
      return blocks;
    }
    return blocks.filter((item) => matchesSearch(item.title, item.description));
  }, [hasSearch, isScopedBlockView, matchesSearch, sheetView]);
  const llmCreateBlocks =
    sheetView === "llm"
      ? scopedBlocks.filter((item) => !LLM_SETUP_TYPES.has(item.type))
      : [];
  const llmSetupBlocks =
    sheetView === "llm"
      ? scopedBlocks.filter((item) => LLM_SETUP_TYPES.has(item.type))
      : [];
  const featuredSeedBlock =
    sheetView === "seed" && !hasSearch
      ? scopedBlocks.find((item) => item.type === "seed_unstructured") ?? null
      : null;
  const otherSeedBlocks =
    sheetView === "seed" && !hasSearch
      ? scopedBlocks.filter((item) => item.type !== "seed_unstructured")
      : scopedBlocks;

  const rootGroups = useMemo(() => {
    if (!hasSearch) {
      return ROOT_GROUPS_WITH_SEED_FIRST;
    }
    return ROOT_GROUPS.filter((group) => {
      if (matchesSearch(group.title, group.description)) {
        return true;
      }
      if (group.kind === "processor") {
        return matchesSearch(PROCESSOR_TITLE, PROCESSOR_DESCRIPTION);
      }
      return getBlocksForKind(group.kind).some((item) =>
        matchesSearch(item.title, item.description),
      );
    });
  }, [hasSearch, matchesSearch]);
  const showNoMatches =
    (isRootView && hasSearch && rootSearchBlocks.length === 0) ||
    (isScopedBlockView && scopedBlocks.length === 0) ||
    (isProcessorView &&
      hasSearch &&
      !matchesSearch(PROCESSOR_TITLE, PROCESSOR_DESCRIPTION));

  const buildDragStart =
    (kind: SheetKind, type: BlockType) =>
    (event: ReactDragEvent<HTMLButtonElement>) => {
      const payload: RecipeBlockDragPayload = { kind, type };
      const serialized = JSON.stringify(payload);
      event.dataTransfer.setData(RECIPE_BLOCK_DND_MIME, serialized);
      event.dataTransfer.setData("text/plain", serialized);
      event.dataTransfer.effectAllowed = "copy";
    };
  const getTrailing = (): "drag" => "drag";
  const onBlockClick = (kind: SheetKind, type: BlockType) => {
    setSheetOpen(false);
    if (kind === "sampler") {
      onAddSampler(type as SamplerType);
      return;
    }
    if (kind === "seed") {
      onAddSeed(type as SeedBlockType);
      return;
    }
    if (kind === "llm") {
      if (type === "model_provider") {
        onAddModelProvider();
        return;
      }
      if (type === "model_config") {
        onAddModelConfig();
        return;
      }
      if (type === "tool_config") {
        onAddToolProfile();
        return;
      }
      onAddLlm(type as LlmType);
      return;
    }
    if (kind === "validator") {
      onAddValidator(
        type as "validator_python" | "validator_sql" | "validator_oxc",
      );
      return;
    }
    if (kind === "expression") {
      onAddExpression();
      return;
    }
    onAddMarkdownNote();
  };

  return (
    <div className="flex flex-col items-end gap-2">
      <Sheet
        open={sheetOpen}
        onOpenChange={(nextOpen) => {
          setSheetOpen(nextOpen);
          if (nextOpen) {
            onViewChange("root");
            setSearch("");
          }
        }}
      >
        <SheetTrigger asChild={true}>
          <Button
            size="icon"
            className={RECIPE_FLOATING_ICON_BUTTON_CLASS}
            variant="ghost"
            aria-label="Add a step"
            title="Add a step"
          >
            <HugeiconsIcon
              icon={PlusSignIcon}
              className="size-5 text-muted-foreground group-hover:text-primary"
            />
          </Button>
        </SheetTrigger>
        <SheetContent
          side="right"
          container={container}
          position="absolute"
          overlayPosition="absolute"
          className="absolute gap-0 p-0 shadow-none"
          overlayClassName="bg-transparent pointer-events-none backdrop-blur-none supports-backdrop-filter:backdrop-blur-none"
        >
          <SheetHeader className="px-6 py-5">
            <div className="flex items-center gap-2">
              {sheetView !== "root" && (
                <Button
                  type="button"
                  variant="ghost"
                  size="icon-sm"
                  onClick={() => onViewChange("root")}
                  aria-label="Back to step groups"
                  title="Back to step groups"
                >
                  <HugeiconsIcon icon={ArrowLeft02Icon} className="size-4" />
                </Button>
              )}
              <SheetTitle>{sheetTitle}</SheetTitle>
            </div>
            <div className="relative mt-3">
              <HugeiconsIcon
                icon={Search01Icon}
                className="pointer-events-none absolute left-2.5 top-1/2 size-4 -translate-y-1/2 text-muted-foreground"
              />
              <Input
                value={search}
                onChange={(event) => setSearch(event.target.value)}
                placeholder="Search steps..."
                className="corner-squircle h-9 pl-8"
                aria-label="Search steps"
              />
            </div>
          </SheetHeader>
          <div className="flex-1 min-h-0 overflow-y-auto py-4">
            <div className="mt-4 flex flex-col gap-2">
              {isRootView && !hasSearch && (
                <div className={`mx-3 mb-2 rounded-2xl border px-4 py-4 ${RECIPE_STUDIO_ONBOARDING_SURFACE_TONE}`}>
                  <div className="flex items-start gap-3">
                    <div className={`mt-0.5 flex size-9 shrink-0 items-center justify-center rounded-xl ${RECIPE_STUDIO_ONBOARDING_ICON_TONE}`}>
                      <HugeiconsIcon
                        icon={DocumentAttachmentIcon}
                        className="size-4"
                      />
                    </div>
                    <div className="min-w-0 flex-1 space-y-2">
                      <div>
                        <p className="text-sm font-semibold text-foreground">
                          Need a place to start?
                        </p>
                        <p className="text-xs text-muted-foreground">
                          Open Source data first, then add generation and checks
                          on top of it.
                        </p>
                      </div>
                      <Button
                        type="button"
                        size="sm"
                        variant="ghost"
                        className="corner-squircle justify-start px-0 text-primary hover:bg-transparent hover:text-primary/80"
                        onClick={() => onViewChange("seed")}
                      >
                        Start with source data
                      </Button>
                    </div>
                  </div>
                </div>
              )}
              {isRootView &&
                hasSearch &&
                rootSearchBlocks.map((item) => (
                  <BlockSheetButton
                    key={`${item.kind}:${item.type}`}
                    icon={item.icon}
                    title={item.title}
                    description={item.description}
                    draggable={true}
                    onDragStart={buildDragStart(item.kind, item.type)}
                    trailing={getTrailing()}
                    onClick={() => onBlockClick(item.kind, item.type)}
                  />
                ))}
              {isRootView &&
                !hasSearch &&
                rootGroups.map((item) => (
                  <BlockSheetButton
                    key={item.kind}
                    icon={item.icon}
                    title={item.title}
                    description={item.description}
                    draggable={item.kind === "expression" || item.kind === "note"}
                    onDragStart={
                      item.kind === "expression" && expressionBlocks[0]
                        ? buildDragStart("expression", expressionBlocks[0].type)
                        : item.kind === "note" && noteBlocks[0]
                          ? buildDragStart("note", noteBlocks[0].type)
                          : undefined
                    }
                    trailing={
                      item.kind === "expression" || item.kind === "note"
                        ? "drag"
                        : "chevron"
                    }
                    onClick={() => {
                      if (item.kind === "seed" && seedBlocks.length === 1) {
                        setSheetOpen(false);
                        onAddSeed(seedBlocks[0].type as SeedBlockType);
                        return;
                      }
                      if (item.kind === "expression" && expressionBlocks.length === 1) {
                        setSheetOpen(false);
                        onAddExpression();
                        return;
                      }
                      if (item.kind === "note" && noteBlocks.length === 1) {
                        setSheetOpen(false);
                        onAddMarkdownNote();
                        return;
                      }
                      onViewChange(item.kind);
                    }}
                  />
                ))}
              {SHOW_PROCESSOR_IN_BLOCK_SHEET && isProcessorView && (
                (!hasSearch ||
                  matchesSearch(PROCESSOR_TITLE, PROCESSOR_DESCRIPTION)) && (
                  <BlockSheetButton
                    icon={CodeIcon}
                    title={PROCESSOR_TITLE}
                    description={PROCESSOR_DESCRIPTION}
                    onClick={onOpenProcessors}
                  />
                )
              )}
              {isScopedBlockView &&
                sheetView === "seed" &&
                featuredSeedBlock && (
                  <div className="pb-2">
                    <div className="px-3 pb-2">
                      <p className="text-xs font-semibold uppercase tracking-wide text-muted-foreground">
                        Recommended first step
                      </p>
                      <p className="text-xs text-muted-foreground">
                        Best when you want to turn PDFs, DOCX files, or text
                        files into source rows.
                      </p>
                    </div>
                    <BlockSheetButton
                      icon={featuredSeedBlock.icon}
                      title={featuredSeedBlock.title}
                      description={featuredSeedBlock.description}
                      draggable={true}
                      onDragStart={buildDragStart(
                        featuredSeedBlock.kind,
                        featuredSeedBlock.type,
                      )}
                      trailing={getTrailing()}
                      badge="Start here"
                      onClick={() =>
                        onBlockClick(
                          featuredSeedBlock.kind,
                          featuredSeedBlock.type,
                        )
                      }
                    />
                  </div>
                )}
              {isScopedBlockView &&
                sheetView === "seed" &&
                !hasSearch &&
                otherSeedBlocks.length > 0 && (
                  <div className="px-3 pt-2 pb-2">
                    <p className="text-xs font-semibold uppercase tracking-wide text-muted-foreground">
                      Other source options
                    </p>
                    <p className="text-xs text-muted-foreground">
                      Use a dataset or structured file when your source is
                      already tabular.
                    </p>
                  </div>
                )}
              {isScopedBlockView &&
                sheetView === "llm" &&
                llmCreateBlocks.length > 0 && (
                  <div className="px-3 pb-2">
                    <p className="text-xs font-semibold uppercase tracking-wide text-muted-foreground">
                      Create
                    </p>
                    <p className="text-xs text-muted-foreground">
                      Start with the kind of output you want to generate.
                    </p>
                  </div>
                )}
              {isScopedBlockView &&
                sheetView === "llm" &&
                llmCreateBlocks.map((item) => (
                  <BlockSheetButton
                    key={item.type}
                    icon={item.icon}
                    title={item.title}
                    description={item.description}
                    draggable={true}
                    onDragStart={buildDragStart(item.kind, item.type)}
                    trailing={getTrailing()}
                    onClick={() => onBlockClick(item.kind, item.type)}
                  />
                ))}
              {isScopedBlockView &&
                sheetView === "llm" &&
                llmSetupBlocks.length > 0 && (
                  <div className="px-3 pt-4 pb-2">
                    <p className="text-xs font-semibold uppercase tracking-wide text-muted-foreground">
                      Setup
                    </p>
                    <p className="text-xs text-muted-foreground">
                      Add these only when you need a new model or tool setup.
                    </p>
                  </div>
                )}
              {isScopedBlockView &&
                sheetView === "llm" &&
                llmSetupBlocks.map((item) => (
                  <BlockSheetButton
                    key={item.type}
                    icon={item.icon}
                    title={item.title}
                    description={item.description}
                    draggable={true}
                    onDragStart={buildDragStart(item.kind, item.type)}
                    trailing={getTrailing()}
                    onClick={() => onBlockClick(item.kind, item.type)}
                  />
                ))}
              {isScopedBlockView &&
                sheetView === "seed" &&
                otherSeedBlocks.map((item) => (
                  <BlockSheetButton
                    key={item.type}
                    icon={item.icon}
                    title={item.title}
                    description={item.description}
                    draggable={true}
                    onDragStart={buildDragStart(item.kind, item.type)}
                    trailing={getTrailing()}
                    onClick={() => onBlockClick(item.kind, item.type)}
                  />
                ))}
              {isScopedBlockView &&
                sheetView !== "llm" &&
                sheetView !== "seed" &&
                scopedBlocks.map(
                  (item) => (
                    <BlockSheetButton
                      key={item.type}
                      icon={item.icon}
                      title={item.title}
                      description={item.description}
                      draggable={true}
                      onDragStart={buildDragStart(item.kind, item.type)}
                      trailing={getTrailing()}
                      onClick={() => onBlockClick(item.kind, item.type)}
                    />
                  ),
                )}
              {SHOW_PROCESSOR_IN_BLOCK_SHEET && isRootView && !hasSearch && (
                <div className="px-3 pt-3">
                  <button
                    type="button"
                    onClick={() => {
                      setSheetOpen(false);
                      onOpenProcessors();
                    }}
                    className="flex w-full items-center justify-between gap-3 rounded-xl border border-border/60 px-3 py-3 text-left transition hover:bg-muted/25"
                  >
                    <div className="min-w-0">
                      <p className="text-sm font-medium text-foreground">
                        Edit final dataset shape
                      </p>
                      <p className="break-words text-xs text-muted-foreground">
                        Rename, reorder, or reshape your final output.
                      </p>
                    </div>
                    <HugeiconsIcon
                      icon={CodeIcon}
                      className="size-4 text-muted-foreground"
                    />
                  </button>
                </div>
              )}
              {showNoMatches && (
                <p className="px-3 py-2 text-xs text-muted-foreground">
                  No matching steps.
                </p>
              )}
            </div>
          </div>
        </SheetContent>
      </Sheet>
      <Button
        type="button"
        variant="ghost"
        size="icon"
        className={RECIPE_FLOATING_ICON_BUTTON_CLASS}
        onClick={onImport}
        aria-label="Paste recipe JSON"
        title="Paste recipe JSON"
      >
        <HugeiconsIcon
          icon={Upload01Icon}
          className="size-5 text-muted-foreground group-hover:text-primary"
        />
      </Button>
      <Button
        type="button"
        variant="ghost"
        size="icon"
        className={RECIPE_FLOATING_ICON_BUTTON_CLASS}
        onClick={onCopy}
        aria-label={copied ? "Recipe JSON copied" : "Copy recipe JSON"}
        title={copied ? "Recipe JSON copied" : "Copy recipe JSON"}
      >
        <HugeiconsIcon
          icon={copied ? Tick02Icon : Copy02Icon}
          className="size-5 text-muted-foreground group-hover:text-primary"
        />
      </Button>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/chip-input.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import { Cancel01Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import {
  type KeyboardEvent,
  type ReactElement,
  useEffect,
  useId,
  useMemo,
  useRef,
  useState,
} from "react";

type ChipInputProps = {
  values: string[];
  onAdd: (value: string) => void;
  onRemove: (index: number) => void;
  placeholder?: string;
  suggestions?: string[];
};

export function ChipInput({
  values,
  onAdd,
  onRemove,
  placeholder = "Type and press Enter",
  suggestions,
}: ChipInputProps): ReactElement {
  const [draft, setDraft] = useState("");
  const [isWrapped, setIsWrapped] = useState(false);
  const containerRef = useRef<HTMLDivElement | null>(null);
  const listId = useId();
  const suggestionSet = useMemo(
    () => new Set((suggestions ?? []).map((value) => value.trim())),
    [suggestions],
  );

  useEffect(() => {
    const element = containerRef.current;
    if (!element) {
      return;
    }
    const syncWrapped = () => {
      setIsWrapped(element.clientHeight > 44);
    };
    syncWrapped();
    const observer = new ResizeObserver(syncWrapped);
    observer.observe(element);
    return () => observer.disconnect();
  }, [values.length, draft]);

  function addValue(rawValue: string, allowAny: boolean): void {
    const trimmed = rawValue.trim();
    if (!trimmed) {
      return;
    }
    if (!allowAny && !suggestionSet.has(trimmed)) {
      return;
    }
    onAdd(trimmed);
    setDraft("");
  }

  const handleKeyDown = (event: KeyboardEvent<HTMLInputElement>) => {
    if (event.key === "Enter") {
      event.preventDefault();
      addValue(draft, true);
    }
    if (event.key === "Backspace" && !draft && values.length > 0) {
      onRemove(values.length - 1);
    }
  };

  function handleChange(nextDraft: string): void {
    setDraft(nextDraft);
    if (suggestionSet.has(nextDraft.trim())) {
      addValue(nextDraft, false);
    }
  }

  return (
    <div
      ref={containerRef}
      className={`bg-input/30 border-input focus-within:border-ring focus-within:ring-ring/50 flex min-h-9 flex-wrap items-center gap-1.5 border bg-clip-padding px-1.5 py-1.5 text-sm transition-colors focus-within:ring-[3px] ${isWrapped ? "corner-squircle rounded-xl" : "rounded-4xl"}`}
    >
      {values.map((value, index) => (
        <span
          key={`${value}-${index}`}
          className="bg-muted-foreground/10 text-foreground flex h-[calc(--spacing(5.5))] w-fit items-center justify-center gap-1 rounded-4xl pr-0 pl-2 text-xs font-medium whitespace-nowrap"
        >
          {value}
          <Button
            type="button"
            variant="ghost"
            size="icon-xs"
            className="-ml-1 opacity-50 hover:opacity-100"
            onClick={() => onRemove(index)}
          >
            <HugeiconsIcon
              icon={Cancel01Icon}
              strokeWidth={2}
              className="pointer-events-none"
            />
          </Button>
        </span>
      ))}
      <input
        className="nodrag min-w-16 flex-1 bg-transparent text-sm outline-none placeholder:text-muted-foreground"
        placeholder={values.length === 0 ? placeholder : ""}
        value={draft}
        list={suggestions && suggestions.length > 0 ? listId : undefined}
        onChange={(event) => handleChange(event.target.value)}
        onBlur={() => addValue(draft, false)}
        onKeyDown={handleKeyDown}
      />
      {suggestions && suggestions.length > 0 && (
        <datalist id={listId}>
          {suggestions.map((value) => (
            <option key={value} value={value} />
          ))}
        </datalist>
      )}
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/controls/layout-controls.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { type ReactElement, useCallback } from "react";
import {
  Panel,
  useReactFlow,
  useUpdateNodeInternals,
} from "@xyflow/react";
import { Button } from "@/components/ui/button";
import { getFitNodeIdsIgnoringNotes } from "../../utils/graph/fit-view";

type LayoutControlsProps = {
  direction: "LR" | "TB";
  onLayout: () => void;
  onToggleDirection: () => void;
};

export function LayoutControls({
  direction,
  onLayout,
  onToggleDirection,
}: LayoutControlsProps): ReactElement {
  const { fitView, getNodes } = useReactFlow();
  const updateNodeInternals = useUpdateNodeInternals();

  const refreshNodeInternals = useCallback(() => {
    const nodeIds = getNodes().map((node) => node.id);
    if (nodeIds.length > 0) {
      updateNodeInternals(nodeIds);
    }
  }, [getNodes, updateNodeInternals]);

  const handleLayout = useCallback(() => {
    onLayout();
    requestAnimationFrame(() => {
      refreshNodeInternals();
      requestAnimationFrame(() => {
        fitView({
          duration: 250,
          nodes: getFitNodeIdsIgnoringNotes(getNodes()),
        });
      });
    });
  }, [fitView, getNodes, onLayout, refreshNodeInternals]);

  const handleToggleDirection = useCallback(() => {
    onToggleDirection();
    requestAnimationFrame(() => {
      onLayout();
      requestAnimationFrame(() => {
        refreshNodeInternals();
        requestAnimationFrame(() => {
          fitView({
            duration: 250,
            nodes: getFitNodeIdsIgnoringNotes(getNodes()),
          });
        });
      });
    });
  }, [fitView, getNodes, onLayout, onToggleDirection, refreshNodeInternals]);

  return (
    <Panel position="top-left" className="m-3 flex items-center gap-2">
      <Button size="sm" className="corner-squircle" variant="secondary" onClick={handleLayout}>
        Auto layout
      </Button>
      <Button size="sm" className="corner-squircle" variant="outline" onClick={handleToggleDirection}>
        {direction}
      </Button>
    </Panel>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/controls/run-validate-floating-controls.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { CookBookIcon, TestTube01Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import type { ReactElement } from "react";
import { Button } from "@/components/ui/button";
import type { RecipeExecutionKind } from "../../execution-types";

type RunValidateFloatingControlsProps = {
  runBusy: boolean;
  runDialogKind: RecipeExecutionKind;
  validateLoading: boolean;
  executionLocked: boolean;
  onOpenRunDialog: (kind: RecipeExecutionKind) => void;
  onValidate: () => void;
};

export function RunValidateFloatingControls({
  runBusy,
  runDialogKind,
  validateLoading,
  executionLocked,
  onOpenRunDialog,
  onValidate,
}: RunValidateFloatingControlsProps): ReactElement {
  return (
    <div className="pointer-events-none absolute inset-x-0 bottom-3 z-20 flex justify-center">
      <div className="pointer-events-auto flex items-center gap-2">
        <Button
          type="button"
          className="corner-squircle h-11 px-5"
          onClick={() => onOpenRunDialog(runDialogKind)}
          disabled={runBusy}
        >
          <HugeiconsIcon icon={CookBookIcon} className="size-4" />
          {runBusy ? "Running..." : "Run"}
        </Button>
        <Button
          type="button"
          variant="outline"
          className="corner-squircle h-11 px-5"
          onClick={onValidate}
          disabled={validateLoading || executionLocked}
        >
          <HugeiconsIcon icon={TestTube01Icon} className="size-4" />
          {validateLoading ? "Checking..." : "Check"}
        </Button>
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/controls/viewport-controls.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { type ReactElement, useCallback } from "react";
import { Lock, LockOpen, Maximize2, Minus, Plus } from "lucide-react";
import { Panel, useReactFlow } from "@xyflow/react";
import { Button } from "@/components/ui/button";
import { getFitNodeIdsIgnoringNotes } from "../../utils/graph/fit-view";
import { RECIPE_FLOATING_ICON_BUTTON_CLASS } from "../recipe-floating-icon-button-class";

type ViewportControlsProps = {
  interactive: boolean;
  lockDisabled?: boolean;
  onToggleInteractive: () => void;
};

export function ViewportControls({
  interactive,
  lockDisabled = false,
  onToggleInteractive,
}: ViewportControlsProps): ReactElement {
  const { zoomIn, zoomOut, fitView, getNodes } = useReactFlow();

  const handleZoomIn = useCallback(() => {
    zoomIn({ duration: 150 });
  }, [zoomIn]);

  const handleZoomOut = useCallback(() => {
    zoomOut({ duration: 150 });
  }, [zoomOut]);

  const handleFitView = useCallback(() => {
    fitView({
      duration: 250,
      nodes: getFitNodeIdsIgnoringNotes(getNodes()),
    });
  }, [fitView, getNodes]);

  return (
    <Panel position="bottom-left" className="m-3 flex items-center gap-2">
      <Button
        type="button"
        variant="ghost"
        size="icon"
        className={RECIPE_FLOATING_ICON_BUTTON_CLASS}
        onClick={handleZoomIn}
        aria-label="Zoom in"
      >
        <Plus className="size-4" />
      </Button>
      <Button
        type="button"
        variant="ghost"
        size="icon"
        className={RECIPE_FLOATING_ICON_BUTTON_CLASS}
        onClick={handleZoomOut}
        aria-label="Zoom out"
      >
        <Minus className="size-4" />
      </Button>
      <Button
        type="button"
        variant="ghost"
        size="icon"
        className={RECIPE_FLOATING_ICON_BUTTON_CLASS}
        onClick={handleFitView}
        aria-label="Fit view"
      >
        <Maximize2 className="size-4" />
      </Button>
      <Button
        type="button"
        variant="ghost"
        size="icon"
        className={RECIPE_FLOATING_ICON_BUTTON_CLASS}
        disabled={lockDisabled}
        onClick={onToggleInteractive}
        aria-label={interactive ? "Lock interaction" : "Unlock interaction"}
      >
        {interactive ? <LockOpen className="size-4" /> : <Lock className="size-4" />}
      </Button>
    </Panel>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/executions/execution-columns-tab.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ReactElement } from "react";
import {
  Table,
  TableBody,
  TableCell,
  TableHead,
  TableHeader,
  TableRow,
} from "@/components/ui/table";
import type { AnalysisColumnStat } from "./executions-view-helpers";

type ExecutionColumnsTabProps = {
  analysisColumns: AnalysisColumnStat[];
};

export function ExecutionColumnsTab({
  analysisColumns,
}: ExecutionColumnsTabProps): ReactElement {
  return (
    <div className="mt-3 rounded-xl border p-3">
      <p className="mb-2 text-sm font-semibold">Column statistics</p>
      {analysisColumns.length === 0 ? (
        <p className="text-xs text-muted-foreground">No column statistics yet.</p>
      ) : (
        <Table>
          <TableHeader>
            <TableRow>
              <TableHead>Column</TableHead>
              <TableHead>Type</TableHead>
              <TableHead>Data type</TableHead>
              <TableHead>Unique</TableHead>
              <TableHead>Nulls</TableHead>
              <TableHead>Input tok avg</TableHead>
              <TableHead>Output tok avg</TableHead>
            </TableRow>
          </TableHeader>
          <TableBody>
            {analysisColumns.map((column) => (
              <TableRow key={column.column_name}>
                <TableCell>{column.column_name}</TableCell>
                <TableCell>{column.column_type}</TableCell>
                <TableCell>{column.simple_dtype}</TableCell>
                <TableCell>{column.num_unique ?? "--"}</TableCell>
                <TableCell>{column.num_null ?? "--"}</TableCell>
                <TableCell>{column.input_tokens_mean ?? "--"}</TableCell>
                <TableCell>{column.output_tokens_mean ?? "--"}</TableCell>
              </TableRow>
            ))}
          </TableBody>
        </Table>
      )}
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/executions/execution-data-tab.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ReactElement } from "react";
import type { ColumnDef } from "@tanstack/react-table";
import { Button } from "@/components/ui/button";
import { DataTable } from "@/components/ui/data-table";
import {
  DropdownMenu,
  DropdownMenuCheckboxItem,
  DropdownMenuContent,
  DropdownMenuLabel,
  DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu";
import { cn } from "@/lib/utils";
import { isExecutionInProgress } from "../../executions/execution-helpers";
import type { RecipeExecutionRecord } from "../../execution-types";
import { hasExpandableTextCell } from "./executions-view-helpers";

type ExecutionDataTabProps = {
  execution: RecipeExecutionRecord;
  datasetColumnNames: string[];
  hiddenDatasetColumns: string[];
  canPageDataset: boolean;
  currentDatasetPage: number;
  totalPages: number;
  tableColumns: ColumnDef<Record<string, unknown>>[];
  datasetRowsForTable: Record<string, unknown>[];
  visibleDatasetColumnNames: string[];
  expandedDatasetRows: Record<string, boolean>;
  selectedExecutionIdSafe: string | null;
  onSetHiddenColumns: (updater: (current: string[]) => string[]) => void;
  onPrevPage: () => void;
  onNextPage: () => void;
  onToggleRowExpanded: (rowId: string) => void;
};

export function ExecutionDataTab({
  execution,
  datasetColumnNames,
  hiddenDatasetColumns,
  canPageDataset,
  currentDatasetPage,
  totalPages,
  tableColumns,
  datasetRowsForTable,
  visibleDatasetColumnNames,
  expandedDatasetRows,
  selectedExecutionIdSafe,
  onSetHiddenColumns,
  onPrevPage,
  onNextPage,
  onToggleRowExpanded,
}: ExecutionDataTabProps): ReactElement {
  return (
    <div className="mt-3">
      <div className="mb-2 flex flex-wrap items-center justify-between gap-2">
        <p className="text-sm font-semibold">Dataset sample</p>
        <div className="flex items-center gap-2 text-xs text-muted-foreground">
          {datasetColumnNames.length > 0 && (
            <DropdownMenu>
              <DropdownMenuTrigger asChild>
                <Button type="button" size="sm" variant="outline">
                  Columns
                </Button>
              </DropdownMenuTrigger>
              <DropdownMenuContent align="end">
                <DropdownMenuLabel>Visible columns</DropdownMenuLabel>
                {datasetColumnNames.map((columnName) => (
                  <DropdownMenuCheckboxItem
                    key={columnName}
                    checked={!hiddenDatasetColumns.includes(columnName)}
                    onSelect={(event) => {
                      event.preventDefault();
                    }}
                    onCheckedChange={(checked) => {
                      onSetHiddenColumns((currentColumns) => {
                        if (checked) {
                          return currentColumns.filter((name) => name !== columnName);
                        }
                        return [...currentColumns, columnName];
                      });
                    }}
                  >
                    {columnName}
                  </DropdownMenuCheckboxItem>
                ))}
              </DropdownMenuContent>
            </DropdownMenu>
          )}
          {canPageDataset && (
            <>
              <span>
                Page {currentDatasetPage}/{totalPages}
              </span>
              <Button
                type="button"
                size="sm"
                variant="outline"
                disabled={
                  isExecutionInProgress(execution.status) || currentDatasetPage <= 1
                }
                onClick={onPrevPage}
              >
                Prev
              </Button>
              <Button
                type="button"
                size="sm"
                variant="outline"
                disabled={
                  isExecutionInProgress(execution.status) ||
                  currentDatasetPage >= totalPages
                }
                onClick={onNextPage}
              >
                Next
              </Button>
            </>
          )}
        </div>
      </div>
      {execution.dataset.length === 0 ? (
        <p className="text-xs text-muted-foreground">No rows returned.</p>
      ) : tableColumns.length === 0 ? (
        <p className="text-xs text-muted-foreground">
          All columns hidden. Use Columns to show at least one.
        </p>
      ) : (
        <div className="max-h-[55vh] overflow-auto">
          <DataTable
            columns={tableColumns}
            data={datasetRowsForTable}
            getRowClassName={(row, _rowIndex, rowId) => {
              const canExpand = hasExpandableTextCell(row, visibleDatasetColumnNames);
              if (!canExpand) {
                return undefined;
              }
              return cn(
                "cursor-pointer",
                expandedDatasetRows[rowId] ? "bg-primary/[0.05]" : "hover:bg-primary/[0.06]",
              );
            }}
            onRowClick={(row, _rowIndex, rowId) => {
              const canExpand = hasExpandableTextCell(row, visibleDatasetColumnNames);
              if (!canExpand || !selectedExecutionIdSafe) {
                return;
              }
              onToggleRowExpanded(rowId);
            }}
          />
        </div>
      )}
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/executions/execution-overview-tab.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ReactElement, RefObject, UIEvent } from "react";
import {
  Database01Icon,
  Database02Icon,
  Flag02Icon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import {
  Table,
  TableBody,
  TableCell,
  TableHead,
  TableHeader,
  TableRow,
} from "@/components/ui/table";
import { isExecutionInProgress } from "../../executions/execution-helpers";
import type { RecipeExecutionRecord } from "../../execution-types";
import type { ModelUsageRow } from "./executions-view-helpers";
import { formatMetricValue } from "./executions-view-helpers";

type ExecutionOverviewTabProps = {
  execution: RecipeExecutionRecord;
  showSummaryCards: boolean;
  recordsMetric: number | null;
  totalMetric: number | null;
  runDuration: string;
  columnCount: number;
  llmColumnCount: number;
  nullRate: number | null;
  sideEffects: string[];
  lowUniquenessColumns: string[];
  modelUsageRows: ModelUsageRow[];
  terminalLines: string[];
  terminalRef: RefObject<HTMLDivElement | null>;
  onTerminalScroll: (event: UIEvent<HTMLDivElement>) => void;
  canPublish: boolean;
  onOpenPublish: () => void;
};

export function ExecutionOverviewTab({
  execution,
  showSummaryCards,
  recordsMetric,
  totalMetric,
  runDuration,
  columnCount,
  llmColumnCount,
  nullRate,
  sideEffects,
  lowUniquenessColumns,
  modelUsageRows,
  terminalLines,
  terminalRef,
  onTerminalScroll,
  canPublish,
  onOpenPublish,
}: ExecutionOverviewTabProps): ReactElement {
  return (
    <div className="mt-3 space-y-3">
      {showSummaryCards && (
        <div className="space-y-3">
          {canPublish && (
            <div className="flex flex-col gap-3 rounded-xl border border-border/60 bg-card/55 p-3 sm:flex-row sm:items-center sm:justify-between">
              <div className="space-y-1">
                <p className="text-sm font-medium text-foreground">Next step</p>
                <p className="text-xs text-muted-foreground">
                  This run is complete. Publish the generated dataset to Hugging Face.
                </p>
              </div>
              <Button type="button" variant="outline" size="sm" onClick={onOpenPublish}>
                Publish to Hugging Face
              </Button>
            </div>
          )}
          <div className="grid gap-3 md:grid-cols-2">
            <div className="h-full rounded-xl border border-border/60 bg-card/55 p-3">
              <div className="mb-2 flex items-center justify-between">
                <p className="text-xs text-muted-foreground">Run summary</p>
                <HugeiconsIcon
                  icon={Database01Icon}
                  className="size-4 text-muted-foreground"
                />
              </div>
              <div className="space-y-1.5 text-xs">
                <p className="flex items-center justify-between gap-3">
                  <span className="text-muted-foreground">Records</span>
                  <span className="font-semibold">
                    {formatMetricValue(recordsMetric)} / {formatMetricValue(totalMetric)}
                  </span>
                </p>
                <p className="flex items-center justify-between gap-3">
                  <span className="text-muted-foreground">Duration</span>
                  <span className="font-semibold">{runDuration}</span>
                </p>
                <p className="flex items-center justify-between gap-3">
                  <span className="text-muted-foreground">Columns analyzed</span>
                  <span className="font-semibold">{formatMetricValue(columnCount)}</span>
                </p>
                <p className="flex items-center justify-between gap-3">
                  <span className="text-muted-foreground">Final stage</span>
                  <span className="truncate font-semibold">{execution.stage ?? "--"}</span>
                </p>
              </div>
            </div>
            <div className="h-full rounded-xl border border-border/60 bg-card/55 p-3">
              <div className="mb-2 flex items-center justify-between">
                <p className="text-xs text-muted-foreground">Insights</p>
                <HugeiconsIcon
                  icon={Database02Icon}
                  className="size-4 text-muted-foreground"
                />
              </div>
              <div className="space-y-1.5 text-xs">
                <p className="flex items-center justify-between gap-3">
                  <span className="text-muted-foreground">LLM columns</span>
                  <span className="font-semibold">{formatMetricValue(llmColumnCount)}</span>
                </p>
                <p className="flex items-center justify-between gap-3">
                  <span className="text-muted-foreground">Null rate</span>
                  <span className="font-semibold">{nullRate?.toFixed(1) ?? "--"}%</span>
                </p>
                <p className="flex items-center justify-between gap-3">
                  <span className="text-muted-foreground">Side-effect columns</span>
                  <span className="font-semibold">{formatMetricValue(sideEffects.length)}</span>
                </p>
                {sideEffects.length > 0 && (
                  <div className="pt-0.5">
                    <div className="flex flex-wrap gap-1.5">
                      {sideEffects.map((name) => (
                        <Badge key={name} variant="outline">
                          {name}
                        </Badge>
                      ))}
                    </div>
                  </div>
                )}
                <p className="flex items-center justify-between gap-3">
                  <span className="text-muted-foreground">Low uniqueness flags</span>
                  <span className="font-semibold">
                    {formatMetricValue(lowUniquenessColumns.length)}
                  </span>
                </p>
                {lowUniquenessColumns.length > 0 && (
                  <div className="pt-0.5">
                    <div className="flex flex-wrap gap-1.5">
                      {lowUniquenessColumns.slice(0, 3).map((name) => (
                        <Badge key={name} variant="outline">
                          {name}
                        </Badge>
                      ))}
                      {lowUniquenessColumns.length > 3 && (
                        <Badge variant="outline">
                          +{lowUniquenessColumns.length - 3} more
                        </Badge>
                      )}
                    </div>
                  </div>
                )}
              </div>
            </div>
          </div>
          <div className="rounded-xl border border-border/60 bg-card/55 p-3">
            <div className="mb-2 flex items-center justify-between">
              <p className="text-xs text-muted-foreground">Model usage</p>
              <HugeiconsIcon icon={Flag02Icon} className="size-4 text-muted-foreground" />
            </div>
            {modelUsageRows.length === 0 ? (
              <p className="text-xs text-muted-foreground">No model usage yet.</p>
            ) : (
              <div className="overflow-hidden rounded-lg border border-border/60 bg-card/50">
                <Table>
                  <TableHeader>
                    <TableRow>
                      <TableHead>Model</TableHead>
                      <TableHead className="text-right">Input</TableHead>
                      <TableHead className="text-right">Output</TableHead>
                    </TableRow>
                  </TableHeader>
                  <TableBody>
                    {modelUsageRows.map((usage) => (
                      <TableRow key={usage.model}>
                        <TableCell className="max-w-[320px] truncate">{usage.model}</TableCell>
                        <TableCell className="text-right">
                          {formatMetricValue(usage.input)}
                        </TableCell>
                        <TableCell className="text-right">
                          {formatMetricValue(usage.output)}
                        </TableCell>
                      </TableRow>
                    ))}
                  </TableBody>
                </Table>
              </div>
            )}
          </div>
        </div>
      )}
      <div className="overflow-hidden rounded-xl corner-squircle border">
        <div className="flex items-center justify-between border-b px-3 py-2">
          <p className="text-sm font-semibold">Terminal output</p>
          <p className="text-xs text-muted-foreground">{terminalLines.length} lines</p>
        </div>
        <div
          ref={terminalRef}
          className="max-h-72 overflow-auto bg-zinc-900/80 px-3 py-2 font-mono text-xs text-zinc-200"
          onScroll={onTerminalScroll}
        >
          {terminalLines.length === 0 ? (
            <p className="text-zinc-400">
              {isExecutionInProgress(execution.status)
                ? "Waiting for logs..."
                : "No logs captured."}
            </p>
          ) : (
            terminalLines.map((line, index) => (
              <p
                key={`${index}-${line.slice(0, 24)}`}
                className="whitespace-pre-wrap break-words leading-relaxed"
              >
                {line}
              </p>
            ))
          )}
        </div>
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/executions/execution-raw-tab.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ReactElement } from "react";

type ExecutionRawTabProps = {
  rawExecution: Record<string, unknown> | null;
};

export function ExecutionRawTab({
  rawExecution,
}: ExecutionRawTabProps): ReactElement {
  return (
    <div className="mt-3 rounded-xl border p-3">
      <p className="mb-2 text-sm font-semibold">Raw execution</p>
      <pre className="max-h-96 overflow-auto rounded-md bg-muted/40 p-3 text-xs">
        {JSON.stringify(rawExecution, null, 2)}
      </pre>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/executions/execution-sidebar.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ReactElement } from "react";
import { Badge } from "@/components/ui/badge";
import { cn } from "@/lib/utils";
import type { RecipeExecutionRecord } from "../../execution-types";
import {
  executionLabel,
  isExecutionInProgress,
  normalizeRunName,
} from "../../executions/execution-helpers";
import {
  formatStatus,
  formatTimestamp,
  statusRightBorder,
  statusTone,
} from "./executions-view-helpers";

type ExecutionSidebarProps = {
  executions: RecipeExecutionRecord[];
  selectedExecutionId: string | null;
  onSelectExecution: (id: string) => void;
};

export function ExecutionSidebar({
  executions,
  selectedExecutionId,
  onSelectExecution,
}: ExecutionSidebarProps): ReactElement {
  return (
    <aside className="w-72 shrink-0 border-r border-border/60 bg-card/20">
      <div className="flex items-center justify-between  border-border/60 px-3 py-2">
        <p className="text-xs font-semibold uppercase text-muted-foreground">
          Runs
        </p>
      </div>
      <div className="h-[calc(100%-45px)] space-y-2 overflow-auto p-2">
        {executions.length === 0 ? (
          <div className="rounded-xl border border-dashed border-border/60 p-3 text-xs text-muted-foreground">
            No runs yet.
          </div>
        ) : (
          executions.map((execution) => {
            const title =
              execution.kind === "full"
                ? (normalizeRunName(execution.run_name) ??
                  executionLabel(execution.kind))
                : executionLabel(execution.kind);
            return (
              <button
                key={execution.id}
                type="button"
                onClick={() => onSelectExecution(execution.id)}
                className={cn(
                  "w-full rounded-xl corner-squircle border border-r-2 border-border/60 bg-card/60 p-3 text-left transition-colors",
                  selectedExecutionId === execution.id
                    ? "border-primary/35 bg-primary/[0.045]"
                    : "hover:bg-muted/25",
                  statusRightBorder(execution.status),
                )}
              >
                <div className="mb-2 flex items-center justify-between gap-2">
                  <p className="truncate text-sm font-medium">
                    {title}
                  </p>
                  <Badge
                    variant="outline"
                    className={cn("capitalize text-[11px]", statusTone(execution.status))}
                  >
                    {formatStatus(execution.status)}
                  </Badge>
                </div>
                <p className="text-xs text-muted-foreground">{execution.rows} rows</p>
                {isExecutionInProgress(execution.status) &&
                  typeof execution.batch?.total === "number" &&
                  execution.batch.total > 1 && (
                    <p className="text-xs text-muted-foreground">
                      Batch {execution.batch.idx ?? "--"}/{execution.batch.total}
                    </p>
                  )}
                <p className="text-xs text-muted-foreground">
                  {formatTimestamp(execution.createdAt)}
                </p>
              </button>
            );
          })
        )}
      </div>
    </aside>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/executions/executions-view-helpers.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  RecipeExecutionAnalysis,
  RecipeExecutionStatus,
} from "../../execution-types";
import { isExecutionInProgress } from "../../executions/execution-helpers";
import { resolveImagePreview } from "../../utils/image-preview";

export type AnalysisColumnStat = {
  column_name: string;
  column_type: string;
  simple_dtype: string;
  num_unique: number | null;
  num_null: number | null;
  input_tokens_mean: number | null;
  output_tokens_mean: number | null;
};

export type ModelUsageRow = {
  model: string;
  input: number | null;
  output: number | null;
};

export const PREVIEW_DATASET_PAGE_SIZE = 20;
export const TERMINAL_STICKY_BOTTOM_THRESHOLD_PX = 24;

export function formatTimestamp(value: number): string {
  return new Date(value).toLocaleString();
}

export function formatCellValue(value: unknown): string {
  if (value === null || value === undefined) {
    return "--";
  }
  if (typeof value === "string") {
    return value;
  }
  if (typeof value === "number" || typeof value === "boolean") {
    return String(value);
  }
  try {
    return JSON.stringify(value);
  } catch {
    return String(value);
  }
}

export function isExpandableCellValue(value: string): boolean {
  return value.length > 180;
}

export function truncateCellValue(value: string): string {
  if (value.length <= 180) {
    return value;
  }
  return `${value.slice(0, 180).trimEnd()}...`;
}

export function hasExpandableTextCell(
  row: Record<string, unknown>,
  visibleColumnNames: string[],
): boolean {
  return visibleColumnNames.some((columnName) => {
    if (resolveImagePreview(row[columnName])) {
      return false;
    }
    return isExpandableCellValue(formatCellValue(row[columnName]));
  });
}

function parseNumber(value: unknown): number | null {
  return typeof value === "number" && Number.isFinite(value) ? value : null;
}

function parseString(value: unknown): string {
  return typeof value === "string" && value.length > 0 ? value : "--";
}

export function parseAnalysisColumns(
  analysis: RecipeExecutionAnalysis | null,
): AnalysisColumnStat[] {
  const items = Array.isArray(analysis?.column_statistics)
    ? analysis.column_statistics
    : [];
  return items
    .map((item) => {
      if (!item || typeof item !== "object" || Array.isArray(item)) {
        return null;
      }
      const row = item as Record<string, unknown>;
      return {
        column_name: parseString(row.column_name),
        column_type: parseString(row.column_type),
        simple_dtype: parseString(row.simple_dtype),
        num_unique: parseNumber(row.num_unique),
        num_null: parseNumber(row.num_null),
        input_tokens_mean: parseNumber(row.input_tokens_mean),
        output_tokens_mean: parseNumber(row.output_tokens_mean),
      };
    })
    .filter((item): item is AnalysisColumnStat => item !== null);
}

export function statusTone(status: RecipeExecutionStatus): string {
  if (status === "completed") {
    return "border-emerald-500/30 text-emerald-700 dark:text-emerald-300";
  }
  if (status === "error" || status === "cancelled") {
    return "border-red-500/30 text-red-700 dark:text-red-300";
  }
  if (isExecutionInProgress(status)) {
    return "border-amber-500/30 text-amber-700 dark:text-amber-300";
  }
  return "border-border/60 text-muted-foreground";
}

export function statusRightBorder(status: RecipeExecutionStatus): string {
  if (status === "completed") {
    return "border-r-emerald-500/40";
  }
  if (status === "error" || status === "cancelled") {
    return "border-r-red-500/40";
  }
  if (isExecutionInProgress(status)) {
    return "border-r-amber-500/40";
  }
  return "border-r-border/50";
}

export function formatStatus(status: RecipeExecutionStatus): string {
  if (status === "cancelled") {
    return "cancelled";
  }
  return status;
}

export function formatPercent(value: number | null | undefined): string {
  if (typeof value !== "number" || Number.isNaN(value)) {
    return "--";
  }
  return `${value.toFixed(1)}%`;
}

export function formatDuration(startedAt: number, finishedAt: number | null): string {
  if (!finishedAt || finishedAt <= startedAt) {
    return "--";
  }
  const seconds = Math.round((finishedAt - startedAt) / 1000);
  return `${seconds}s`;
}

export function formatMetricValue(value: number | null | undefined): string {
  if (typeof value !== "number" || Number.isNaN(value)) {
    return "--";
  }
  return value.toLocaleString();
}

export function parseModelUsageRows(
  value: Record<string, unknown> | null,
): ModelUsageRow[] {
  if (!value) {
    return [];
  }
  return Object.entries(value)
    .map(([name, data]) => {
      if (!data || typeof data !== "object" || Array.isArray(data)) {
        return null;
      }
      const modelObj = data as Record<string, unknown>;
      const tokens =
        modelObj.tokens &&
        typeof modelObj.tokens === "object" &&
        !Array.isArray(modelObj.tokens)
          ? (modelObj.tokens as Record<string, unknown>)
          : null;
      const modelName =
        typeof modelObj.model === "string" && modelObj.model.length > 0
          ? modelObj.model
          : name;
      return {
        model: modelName,
        input: parseNumber(tokens?.input),
        output: parseNumber(tokens?.output),
      };
    })
    .filter((item): item is ModelUsageRow => item !== null);
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/executions/executions-view.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useEffect, useMemo, useRef, useState, type ReactElement } from "react";
import type { ColumnDef } from "@tanstack/react-table";
import {
  CheckmarkCircle02Icon,
  Flag02Icon,
  Share08Icon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { publishRecipeJob } from "../../api";
import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import { Progress } from "@/components/ui/progress";
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
import { cn } from "@/lib/utils";
import { resolveImagePreview } from "../../utils/image-preview";
import type {
  RecipeExecutionRecord,
} from "../../execution-types";
import { isExecutionInProgress } from "../../executions/execution-helpers";
import { ExecutionColumnsTab } from "./execution-columns-tab";
import { ExecutionDataTab } from "./execution-data-tab";
import { ExecutionOverviewTab } from "./execution-overview-tab";
import { ExecutionRawTab } from "./execution-raw-tab";
import { ExecutionSidebar } from "./execution-sidebar";
import { PublishExecutionDialog } from "./publish-execution-dialog";
import {
  PREVIEW_DATASET_PAGE_SIZE,
  TERMINAL_STICKY_BOTTOM_THRESHOLD_PX,
  formatCellValue,
  formatDuration,
  formatPercent,
  hasExpandableTextCell,
  parseAnalysisColumns,
  parseModelUsageRows,
  truncateCellValue,
} from "./executions-view-helpers";

type ExecutionsViewProps = {
  executions: RecipeExecutionRecord[];
  selectedExecutionId: string | null;
  currentSignature: string;
  onSelectExecution: (id: string) => void;
  onCancelExecution: (id: string) => void;
  onLoadDatasetPage: (id: string, page: number) => void;
};

export function ExecutionsView({
  executions,
  selectedExecutionId,
  currentSignature,
  onSelectExecution,
  onCancelExecution,
  onLoadDatasetPage,
}: ExecutionsViewProps): ReactElement {
  const formatEta = (value: number | null | undefined): string =>
    typeof value === "number" && Number.isFinite(value)
      ? `${value.toLocaleString()} s`
      : "--";
  const [detailTab, setDetailTab] = useState("overview");
  const [hiddenDatasetColumnsByExecution, setHiddenDatasetColumnsByExecution] = useState<
    Record<string, string[]>
  >({});
  const [expandedDatasetRowsByExecution, setExpandedDatasetRowsByExecution] = useState<
    Record<string, Record<string, boolean>>
  >({});
  const [previewDatasetPageByExecution, setPreviewDatasetPageByExecution] = useState<
    Record<string, number>
  >({});
  const [publishDialogOpen, setPublishDialogOpen] = useState(false);
  const terminalRef = useRef<HTMLDivElement | null>(null);
  const shouldStickTerminalToBottomRef = useRef(true);
  const selectedExecution = useMemo(
    () =>
      executions.find((execution) => execution.id === selectedExecutionId) ??
      null,
    [executions, selectedExecutionId],
  );
  const isStale = Boolean(
    selectedExecution &&
      selectedExecution.recipeSignature.length > 0 &&
      selectedExecution.recipeSignature !== currentSignature,
  );

  const selectedExecutionIdSafe = selectedExecution?.id ?? null;
  const hiddenDatasetColumns = useMemo(() => {
    if (!selectedExecutionIdSafe) {
      return [];
    }
    return hiddenDatasetColumnsByExecution[selectedExecutionIdSafe] ?? [];
  }, [hiddenDatasetColumnsByExecution, selectedExecutionIdSafe]);
  const expandedDatasetRows = useMemo(() => {
    if (!selectedExecutionIdSafe) {
      return {};
    }
    return expandedDatasetRowsByExecution[selectedExecutionIdSafe] ?? {};
  }, [expandedDatasetRowsByExecution, selectedExecutionIdSafe]);

  const datasetColumnNames = useMemo(() => {
    if (!selectedExecution) {
      return [];
    }
    const names = new Set<string>();
    for (const row of selectedExecution.dataset) {
      for (const key of Object.keys(row)) {
        names.add(key);
      }
    }
    return Array.from(names);
  }, [selectedExecution]);

  const visibleDatasetColumnNames = useMemo(
    () =>
      datasetColumnNames.filter(
        (name) => !hiddenDatasetColumns.includes(name),
      ),
    [datasetColumnNames, hiddenDatasetColumns],
  );

  const tableColumns = useMemo<ColumnDef<Record<string, unknown>>[]>(() => {
    if (!selectedExecution) {
      return [];
    }
    return visibleDatasetColumnNames.map((name) => ({
      accessorKey: name,
      header: name,
      cell: ({ getValue, row }) => {
        const rawValue = getValue();
        const imagePreview = resolveImagePreview(rawValue);
        if (imagePreview?.kind === "ready") {
          return (
            <div className="max-w-[32rem]">
              <img
                src={imagePreview.src}
                alt={`${name} preview`}
                loading="lazy"
                className="h-24 w-auto max-w-[260px] rounded-md border border-border/60 bg-muted/20 object-contain"
              />
            </div>
          );
        }
        if (imagePreview?.kind === "too_large") {
          return (
            <div className="max-w-[32rem]">
              <p className="text-xs text-muted-foreground">
                Image too large to preview
              </p>
            </div>
          );
        }
        const value = formatCellValue(rawValue);
        const rowExpanded = Boolean(expandedDatasetRows[row.id]);
        const rowHasExpandableCell = hasExpandableTextCell(
          row.original,
          visibleDatasetColumnNames,
        );
        const showTruncated = rowHasExpandableCell && !rowExpanded;

        return (
          <div className="max-w-[32rem]">
            <p className="whitespace-pre-wrap break-all">
              {showTruncated ? truncateCellValue(value) : value}
            </p>
          </div>
        );
      },
    }));
  }, [expandedDatasetRows, selectedExecution, visibleDatasetColumnNames]);

  const analysisColumns = useMemo(
    () => parseAnalysisColumns(selectedExecution?.analysis ?? null),
    [selectedExecution?.analysis],
  );
  const modelUsageRows = useMemo(
    () => parseModelUsageRows(selectedExecution?.model_usage ?? null),
    [selectedExecution?.model_usage],
  );
  const sideEffects = useMemo(() => {
    const values = selectedExecution?.analysis?.side_effect_column_names;
    return Array.isArray(values)
      ? values.filter((value): value is string => typeof value === "string")
      : [];
  }, [selectedExecution?.analysis?.side_effect_column_names]);

  const canCancel = Boolean(
    selectedExecution?.jobId && isExecutionInProgress(selectedExecution.status),
  );
  const canPublish = Boolean(
    selectedExecution &&
      selectedExecution.kind === "full" &&
      selectedExecution.status === "completed" &&
      selectedExecution.jobId &&
      selectedExecution.artifact_path,
  );
  const datasetPage = selectedExecution?.datasetPage ?? 1;
  const datasetPageSize = selectedExecution?.datasetPageSize ?? 20;
  const datasetTotal = selectedExecution?.datasetTotal ?? 0;
  const previewPageRaw = selectedExecutionIdSafe
    ? previewDatasetPageByExecution[selectedExecutionIdSafe] ?? 1
    : 1;
  const previewTotalPages = useMemo(() => {
    if (!selectedExecution || selectedExecution.kind !== "preview") {
      return 1;
    }
    return Math.max(
      1,
      Math.ceil(selectedExecution.dataset.length / PREVIEW_DATASET_PAGE_SIZE),
    );
  }, [selectedExecution]);
  const previewPage = Math.min(previewPageRaw, previewTotalPages);
  const totalPages =
    selectedExecution?.kind === "preview"
      ? previewTotalPages
      : Math.max(1, Math.ceil(datasetTotal / datasetPageSize));
  const canPageDataset =
    selectedExecution?.kind === "preview" ||
    (selectedExecution?.kind === "full" && Boolean(selectedExecution.jobId));
  const datasetRowsForTable = useMemo(() => {
    if (!selectedExecution) {
      return [];
    }
    if (selectedExecution.kind !== "preview") {
      return selectedExecution.dataset;
    }
    const start = (previewPage - 1) * PREVIEW_DATASET_PAGE_SIZE;
    return selectedExecution.dataset.slice(start, start + PREVIEW_DATASET_PAGE_SIZE);
  }, [previewPage, selectedExecution]);
  const currentDatasetPage = selectedExecution?.kind === "preview" ? previewPage : datasetPage;
  const recordsMetric = useMemo(() => {
    if (!selectedExecution || selectedExecution.status !== "completed") {
      return null;
    }
    if (typeof selectedExecution.analysis?.num_records === "number") {
      return selectedExecution.analysis.num_records;
    }
    if (selectedExecution.datasetTotal > 0) {
      return selectedExecution.datasetTotal;
    }
    if (selectedExecution.dataset.length > 0) {
      return selectedExecution.dataset.length;
    }
    return null;
  }, [selectedExecution]);
  const totalMetric = useMemo(() => {
    if (!selectedExecution || selectedExecution.status !== "completed") {
      return null;
    }
    if (typeof selectedExecution.analysis?.target_num_records === "number") {
      return selectedExecution.analysis.target_num_records;
    }
    return selectedExecution.rows > 0 ? selectedExecution.rows : null;
  }, [selectedExecution]);
  const columnCount = analysisColumns.length;
  const llmColumnCount = useMemo(
    () =>
      analysisColumns.reduce(
        (acc, column) => (column.column_type.startsWith("llm") ? acc + 1 : acc),
        0,
      ),
    [analysisColumns],
  );
  const totalNulls = useMemo(
    () =>
      analysisColumns.reduce(
        (acc, column) => acc + (typeof column.num_null === "number" ? column.num_null : 0),
        0,
      ),
    [analysisColumns],
  );
  const nullRate = useMemo(() => {
    if (
      typeof recordsMetric !== "number" ||
      recordsMetric <= 0 ||
      columnCount <= 0
    ) {
      return null;
    }
    return (totalNulls / (recordsMetric * columnCount)) * 100;
  }, [columnCount, recordsMetric, totalNulls]);
  const lowUniquenessColumns = useMemo(() => {
    if (typeof recordsMetric !== "number" || recordsMetric <= 0) {
      return [];
    }
    return analysisColumns
      .filter(
        (column) =>
          typeof column.num_unique === "number" &&
          column.num_unique / recordsMetric < 0.5,
      )
      .map((column) => column.column_name);
  }, [analysisColumns, recordsMetric]);
  const runDuration = useMemo(() => {
    if (!selectedExecution) {
      return "--";
    }
    return formatDuration(selectedExecution.createdAt, selectedExecution.finishedAt);
  }, [selectedExecution]);
  const showSummaryCards = selectedExecution?.status === "completed";
  const hasProgressSnapshot = Boolean(
    selectedExecution?.progress &&
      (typeof selectedExecution.progress.done === "number" ||
        typeof selectedExecution.progress.total === "number" ||
        typeof selectedExecution.progress.percent === "number" ||
        typeof selectedExecution.progress.rate === "number" ||
        typeof selectedExecution.progress.eta_sec === "number"),
  ) || Boolean(
    selectedExecution?.column_progress &&
      (typeof selectedExecution.column_progress.done === "number" ||
        typeof selectedExecution.column_progress.total === "number" ||
        typeof selectedExecution.column_progress.percent === "number"),
  ) || Boolean(
    selectedExecution?.batch &&
      (typeof selectedExecution.batch.idx === "number" ||
        typeof selectedExecution.batch.total === "number"),
  );
  const selectedStatus = selectedExecution?.status ?? null;
  const isSelectedExecutionInProgress = selectedStatus
    ? isExecutionInProgress(selectedStatus)
    : false;
  const showProgressPanel = Boolean(selectedExecution) && (
    selectedStatus === "completed" ||
    isSelectedExecutionInProgress ||
    hasProgressSnapshot
  );
  const progressComplete = selectedExecution?.status === "completed";
  const progressPercent = selectedExecution?.progress?.percent ?? (progressComplete ? 100 : 0);
  const batchTotal = selectedExecution?.batch?.total ?? null;
  const batchIdx = selectedExecution?.batch?.idx ?? null;
  const showBatchProgress = typeof batchTotal === "number" && batchTotal > 1;
  const terminalLines = selectedExecution?.log_lines ?? [];
  const rawExecution = useMemo(() => {
    if (!selectedExecution) {
      return null;
    }
    const next = { ...selectedExecution } as Record<string, unknown>;
    delete next.dataset;
    delete next.log_lines;
    return next;
  }, [selectedExecution]);

  useEffect(() => {
    if (!terminalRef.current) {
      return;
    }
    shouldStickTerminalToBottomRef.current = true;
    terminalRef.current.scrollTop = terminalRef.current.scrollHeight;
  }, [selectedExecution?.id]);

  useEffect(() => {
    if (!terminalRef.current) {
      return;
    }
    if (!shouldStickTerminalToBottomRef.current) {
      return;
    }
    terminalRef.current.scrollTop = terminalRef.current.scrollHeight;
  }, [terminalLines.length]);

  return (
    <div className="flex h-full min-h-0">
      <ExecutionSidebar
        executions={executions}
        selectedExecutionId={selectedExecutionId}
        onSelectExecution={onSelectExecution}
      />
      <section className="min-w-0 flex-1 overflow-auto p-4">
        {!selectedExecution ? (
          <div className="rounded-xl border border-dashed border-border/60 p-4 text-sm text-muted-foreground">
            Select an execution.
          </div>
        ) : (
          <div className="space-y-4">
            {showProgressPanel && (
              <div className="space-y-3 rounded-2xl border shadow-border border-border/60 bg-card/55 p-3">
                <div className="flex items-center justify-between">
                  <div className="flex items-center gap-2">
                    <HugeiconsIcon
                      icon={progressComplete ? CheckmarkCircle02Icon : Flag02Icon}
                      className={cn(
                        "size-4",
                        progressComplete
                          ? "text-emerald-700 dark:text-emerald-300"
                          : "text-amber-700 dark:text-amber-300",
                      )}
                    />
                    <p className="text-sm font-semibold text-foreground">
                      Progress
                    </p>
                  </div>
                  <p className="text-xs text-muted-foreground">{formatPercent(progressPercent)}</p>
                </div>
                <Progress value={progressPercent} className="h-1" />
                <div className="grid gap-2 text-xs md:grid-cols-4">
                  <p className="text-muted-foreground">
                    Done: <span className="text-foreground">{selectedExecution.progress?.done ?? "--"}</span>
                  </p>
                  <p className="text-muted-foreground">
                    Total: <span className="text-foreground">{selectedExecution.progress?.total ?? "--"}</span>
                  </p>
                  <p className="text-muted-foreground">
                    Rate: <span className="text-foreground">{selectedExecution.progress?.rate ?? "--"} rec/s</span>
                  </p>
                  <p className="text-muted-foreground">
                    ETA: <span className="text-foreground">{formatEta(selectedExecution.progress?.eta_sec)}</span>
                  </p>
                </div>
                {selectedExecution.current_column && selectedExecution.column_progress && (
                  <p className="text-xs text-muted-foreground">
                    Column {selectedExecution.current_column}:{" "}
                    {selectedExecution.column_progress.done ?? "--"}/
                    {selectedExecution.column_progress.total ?? "--"} (
                    {formatPercent(selectedExecution.column_progress.percent)})
                  </p>
                )}
                {showBatchProgress && (
                  <p className="text-xs text-muted-foreground">
                    Processed batch: {batchIdx ?? "--"}/{batchTotal}
                  </p>
                )}
                {isStale && <Badge variant="outline">Recipe changed since this run</Badge>}
              </div>
            )}

            {(selectedExecution.status === "error" ||
              selectedExecution.status === "cancelled") && (
              <div className="rounded-xl border border-destructive/40 bg-destructive/5 p-3">
                <p className="text-sm font-semibold text-destructive">
                  {selectedExecution.status === "cancelled"
                    ? "Execution cancelled"
                    : "Execution failed"}
                </p>
                <p className="text-xs text-destructive">
                  {selectedExecution.error ?? "Unknown error."}
                </p>
              </div>
            )}

            <Tabs value={detailTab} onValueChange={setDetailTab}>
              <div className="flex items-center justify-between gap-2">
                <TabsList className="border border-border/60 bg-card/40">
                  <TabsTrigger value="overview">Overview</TabsTrigger>
                  <TabsTrigger value="columns">Columns</TabsTrigger>
                  <TabsTrigger value="data">Data</TabsTrigger>
                  <TabsTrigger value="raw">Raw</TabsTrigger>
                </TabsList>
                <div className="flex items-center gap-2">
                  {canPublish && (
                    <Button
                      type="button"
                      size="sm"
                      variant="outline"
                      onClick={() => setPublishDialogOpen(true)}
                    >
                      <HugeiconsIcon icon={Share08Icon} className="mr-2 size-4" />
                      Publish to Hugging Face
                    </Button>
                  )}
                  {canCancel && (
                    <Button
                      type="button"
                      size="sm"
                      variant="outline"
                      onClick={() => onCancelExecution(selectedExecution.id)}
                    >
                      Cancel
                    </Button>
                  )}
                </div>
              </div>
              <TabsContent value="overview">
                <ExecutionOverviewTab
                  execution={selectedExecution}
                  showSummaryCards={showSummaryCards}
                  recordsMetric={recordsMetric}
                  totalMetric={totalMetric}
                  runDuration={runDuration}
                  columnCount={columnCount}
                  llmColumnCount={llmColumnCount}
                  nullRate={nullRate}
                  sideEffects={sideEffects}
                  lowUniquenessColumns={lowUniquenessColumns}
                  modelUsageRows={modelUsageRows}
                  terminalLines={terminalLines}
                  terminalRef={terminalRef}
                  canPublish={canPublish}
                  onOpenPublish={() => setPublishDialogOpen(true)}
                  onTerminalScroll={(event) => {
                    const element = event.currentTarget;
                    const distanceFromBottom =
                      element.scrollHeight - element.scrollTop - element.clientHeight;
                    shouldStickTerminalToBottomRef.current =
                      distanceFromBottom <= TERMINAL_STICKY_BOTTOM_THRESHOLD_PX;
                  }}
                />
              </TabsContent>
              <TabsContent value="columns">
                <ExecutionColumnsTab analysisColumns={analysisColumns} />
              </TabsContent>
              <TabsContent value="data">
                <ExecutionDataTab
                  execution={selectedExecution}
                  datasetColumnNames={datasetColumnNames}
                  hiddenDatasetColumns={hiddenDatasetColumns}
                  canPageDataset={canPageDataset}
                  currentDatasetPage={currentDatasetPage}
                  totalPages={totalPages}
                  tableColumns={tableColumns}
                  datasetRowsForTable={datasetRowsForTable}
                  visibleDatasetColumnNames={visibleDatasetColumnNames}
                  expandedDatasetRows={expandedDatasetRows}
                  selectedExecutionIdSafe={selectedExecutionIdSafe}
                  onSetHiddenColumns={(updater) => {
                    const selectedId = selectedExecution.id;
                    setHiddenDatasetColumnsByExecution((current) => {
                      const currentColumns = current[selectedId] ?? [];
                      return {
                        ...current,
                        [selectedId]: updater(currentColumns),
                      };
                    });
                  }}
                  onPrevPage={() => {
                    if (selectedExecution.kind === "preview") {
                      const selectedId = selectedExecution.id;
                      setPreviewDatasetPageByExecution((current) => ({
                        ...current,
                        [selectedId]: Math.max(1, currentDatasetPage - 1),
                      }));
                      return;
                    }
                    onLoadDatasetPage(selectedExecution.id, currentDatasetPage - 1);
                  }}
                  onNextPage={() => {
                    if (selectedExecution.kind === "preview") {
                      const selectedId = selectedExecution.id;
                      setPreviewDatasetPageByExecution((current) => ({
                        ...current,
                        [selectedId]: Math.min(totalPages, currentDatasetPage + 1),
                      }));
                      return;
                    }
                    onLoadDatasetPage(selectedExecution.id, currentDatasetPage + 1);
                  }}
                  onToggleRowExpanded={(rowId) => {
                    setExpandedDatasetRowsByExecution((current) => {
                      const rows = current[selectedExecution.id] ?? {};
                      return {
                        ...current,
                        [selectedExecution.id]: {
                          ...rows,
                          [rowId]: !rows[rowId],
                        },
                      };
                    });
                  }}
                />
              </TabsContent>
              <TabsContent value="raw">
                <ExecutionRawTab rawExecution={rawExecution} />
              </TabsContent>
            </Tabs>
          </div>
        )}
      </section>
      <PublishExecutionDialog
        open={publishDialogOpen}
        onOpenChange={setPublishDialogOpen}
        execution={canPublish ? selectedExecution : null}
        onPublish={async (payload) => {
          if (!selectedExecution?.jobId) {
            throw new Error("This run is missing a job id.");
          }
          const response = await publishRecipeJob(selectedExecution.jobId, payload);
          return { url: response.url };
        }}
      />
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/executions/publish-execution-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useEffect, useMemo, useState, type ReactElement } from "react";
import { ArrowRight01Icon, CheckmarkCircle02Icon, Copy01Icon, Key01Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { Button } from "@/components/ui/button";
import {
  Dialog,
  DialogContent,
  DialogDescription,
  DialogFooter,
  DialogHeader,
  DialogTitle,
} from "@/components/ui/dialog";
import { Input } from "@/components/ui/input";
import { Switch } from "@/components/ui/switch";
import { Textarea } from "@/components/ui/textarea";
import { toastError, toastSuccess } from "@/shared/toast";
import type { RecipeExecutionRecord } from "../../execution-types";
import { copyTextToClipboard } from "../../executions/execution-helpers";

type PublishExecutionDialogProps = {
  open: boolean;
  onOpenChange: (open: boolean) => void;
  execution: RecipeExecutionRecord | null;
  onPublish: (payload: {
    repo_id: string;
    description: string;
    hf_token?: string | null;
    private: boolean;
    artifact_path?: string | null;
  }) => Promise<{ url: string }>;
};

function getExecutionRecordCount(execution: RecipeExecutionRecord | null): number | null {
  if (!execution) {
    return null;
  }
  if (typeof execution.analysis?.num_records === "number") {
    return execution.analysis.num_records;
  }
  if (execution.datasetTotal > 0) {
    return execution.datasetTotal;
  }
  if (execution.rows > 0) {
    return execution.rows;
  }
  return null;
}

function buildDefaultDescription(execution: RecipeExecutionRecord | null): string {
  if (!execution) {
    return "";
  }
  const runName = execution.run_name?.trim() || "This dataset";
  const records = getExecutionRecordCount(execution);
  const recordPart =
    typeof records === "number" && records > 0
      ? ` It contains ${records.toLocaleString()} generated records.`
      : "";
  return `${runName} was generated with Unsloth Recipe Studio.${recordPart}`;
}

export function PublishExecutionDialog({
  open,
  onOpenChange,
  execution,
  onPublish,
}: PublishExecutionDialogProps): ReactElement {
  const [repoId, setRepoId] = useState("");
  const [description, setDescription] = useState("");
  const [hfToken, setHfToken] = useState("");
  const [privateRepo, setPrivateRepo] = useState(false);
  const [publishing, setPublishing] = useState(false);
  const [publishError, setPublishError] = useState<string | null>(null);
  const [publishedUrl, setPublishedUrl] = useState<string | null>(null);

  const defaultDescription = useMemo(
    () => buildDefaultDescription(execution),
    [execution],
  );
  const runLabel = execution?.run_name?.trim() || "Completed run";
  const recordCount = getExecutionRecordCount(execution);
  const recordLabel =
    typeof recordCount === "number" ? recordCount.toLocaleString() : "--";

  useEffect(() => {
    if (!open) {
      setPublishing(false);
      setPublishError(null);
      setPublishedUrl(null);
      setRepoId("");
      setDescription("");
      setHfToken("");
      setPrivateRepo(false);
      return;
    }
    setPublishError(null);
    setPublishedUrl(null);
    setDescription(buildDefaultDescription(execution));
  }, [execution, open]);

  const canSubmit =
    !publishing &&
    Boolean(execution?.jobId) &&
    Boolean(execution?.artifact_path) &&
    repoId.trim().length > 0 &&
    description.trim().length > 0;

  const handleCopyUrl = async (): Promise<void> => {
    if (!publishedUrl) {
      return;
    }
    const ok = await copyTextToClipboard(publishedUrl);
    if (ok) {
      toastSuccess("Dataset link copied");
      return;
    }
    toastError("Copy failed", "Could not copy the dataset link.");
  };

  const handlePublish = async (): Promise<void> => {
    if (!execution?.jobId) {
      setPublishError("This run is missing a job id, so it cannot be published.");
      return;
    }
    setPublishing(true);
    setPublishError(null);
    try {
      const result = await onPublish({
        repo_id: repoId.trim(),
        description: description.trim(),
        hf_token: hfToken.trim() || null,
        private: privateRepo,
        artifact_path: execution.artifact_path,
      });
      setPublishedUrl(result.url);
      toastSuccess("Dataset published");
    } catch (error) {
      const message =
        error instanceof Error ? error.message : "Could not publish this dataset.";
      setPublishError(message);
      toastError("Publish failed", message);
    } finally {
      setPublishing(false);
    }
  };

  return (
    <Dialog
      open={open}
      onOpenChange={(nextOpen) => {
        if (publishing) {
          return;
        }
        onOpenChange(nextOpen);
      }}
    >
      <DialogContent
        className="sm:max-w-xl"
        overlayClassName="bg-black/55"
        onInteractOutside={(event) => {
          if (publishing) {
            event.preventDefault();
          }
        }}
      >
        {publishedUrl ? (
          <>
            <div className="flex flex-col items-center gap-3 py-4">
              <div className="flex size-12 items-center justify-center rounded-full bg-emerald-500/10">
                <HugeiconsIcon
                  icon={CheckmarkCircle02Icon}
                  className="size-6 text-emerald-600 dark:text-emerald-400"
                />
              </div>
              <div className="space-y-1 text-center">
                <DialogTitle>Published</DialogTitle>
                <DialogDescription>
                  Your dataset is live on Hugging Face.
                </DialogDescription>
              </div>
            </div>
            <div className="rounded-2xl border border-border/60 bg-card/55 p-3 text-xs">
              <p className="mb-1 text-muted-foreground">Dataset URL</p>
              <p className="break-all font-medium text-foreground">{publishedUrl}</p>
            </div>
            <DialogFooter>
              <Button variant="outline" onClick={handleCopyUrl}>
                <HugeiconsIcon icon={Copy01Icon} className="mr-2 size-4" />
                Copy link
              </Button>
              <Button asChild={true}>
                <a href={publishedUrl} target="_blank" rel="noreferrer">
                  Open repo
                  <HugeiconsIcon icon={ArrowRight01Icon} className="ml-2 size-4" />
                </a>
              </Button>
              <Button variant="ghost" onClick={() => onOpenChange(false)}>
                Done
              </Button>
            </DialogFooter>
          </>
        ) : (
          <>
            <DialogHeader>
              <DialogTitle>Publish to Hugging Face</DialogTitle>
              <DialogDescription>
                Create or update a dataset repo from this completed run.
              </DialogDescription>
            </DialogHeader>

            <div className="space-y-4">
              <div className="rounded-2xl border border-border/60 bg-card/55 p-3 text-xs">
                <p className="font-medium text-foreground">From this run</p>
                <div className="mt-2 grid gap-1.5 text-muted-foreground sm:grid-cols-2">
                  <p>
                    Run: <span className="text-foreground">{runLabel}</span>
                  </p>
                  <p>
                    Records: <span className="text-foreground">{recordLabel}</span>
                  </p>
                </div>
                <p className="mt-2 text-muted-foreground">
                  We’ll upload the generated dataset, dataset card, images, and any processor
                  outputs from this execution.
                </p>
              </div>

              <div className="space-y-1.5">
                <label className="text-sm font-medium text-foreground" htmlFor="publish-repo-id">
                  Repository
                </label>
                <Input
                  id="publish-repo-id"
                  placeholder="your-name/customer-support-synth"
                  value={repoId}
                  onChange={(event) => setRepoId(event.target.value)}
                  disabled={publishing}
                />
                <p className="text-xs text-muted-foreground">
                  Use the format <span className="font-mono">username-or-org/dataset-name</span>.
                </p>
              </div>

              <div className="space-y-1.5">
                <label className="text-sm font-medium text-foreground" htmlFor="publish-description">
                  About this dataset
                </label>
                <Textarea
                  id="publish-description"
                  className="corner-squircle"
                  value={description}
                  onChange={(event) => setDescription(event.target.value)}
                  disabled={publishing}
                  rows={4}
                  placeholder={defaultDescription || "What is this dataset for?"}
                />
                <p className="text-xs text-muted-foreground">
                  This short summary is used in the dataset card on Hugging Face.
                </p>
              </div>

              <div className="space-y-1.5">
                <div className="flex items-center justify-between gap-3">
                  <label className="text-sm font-medium text-foreground" htmlFor="publish-hf-token">
                    HF write token
                  </label>
                  <a
                    href="https://huggingface.co/settings/tokens"
                    target="_blank"
                    rel="noreferrer"
                    className="text-xs text-muted-foreground underline underline-offset-3 hover:text-foreground"
                  >
                    Manage tokens
                  </a>
                </div>
                <div className="relative">
                  <HugeiconsIcon
                    icon={Key01Icon}
                    className="pointer-events-none absolute top-1/2 left-3 size-4 -translate-y-1/2 text-muted-foreground"
                  />
                  <Input
                    id="publish-hf-token"
                    type="password"
                    autoComplete="new-password"
                    className="pl-9"
                    placeholder="hf_..."
                    value={hfToken}
                    onChange={(event) => setHfToken(event.target.value)}
                    disabled={publishing}
                  />
                </div>
                <p className="text-xs text-muted-foreground">
                  Leave empty if you're already logged in via CLI.
                </p>
              </div>

              <div className="corner-squircle flex items-start gap-3 rounded-2xl border border-border/60 bg-card/35 p-3">
                <Switch
                  id="publish-private"
                  size="sm"
                  checked={privateRepo}
                  onCheckedChange={setPrivateRepo}
                  disabled={publishing}
                />
                <div className="space-y-1">
                  <label
                    htmlFor="publish-private"
                    className="text-sm font-medium text-foreground"
                  >
                    Private dataset
                  </label>
                  <p className="text-xs text-muted-foreground">
                    Only people with access can view or download the repo.
                  </p>
                </div>
              </div>

              {publishError ? (
                <div className="rounded-2xl border border-destructive/40 bg-destructive/5 p-3 text-sm text-destructive">
                  {publishError}
                </div>
              ) : null}
            </div>

            <DialogFooter>
              <Button
                variant="outline"
                onClick={() => onOpenChange(false)}
                disabled={publishing}
              >
                Cancel
              </Button>
              <Button onClick={() => void handlePublish()} disabled={!canSubmit}>
                {publishing ? "Publishing..." : "Publish to Hugging Face"}
              </Button>
            </DialogFooter>
          </>
        )}
      </DialogContent>
    </Dialog>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/graph/internals-sync.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useUpdateNodeInternals } from "@xyflow/react";
import { useEffect, useMemo, useRef } from "react";

type InternalsSyncProps = {
  nodeIds: string[];
};

export function InternalsSync({ nodeIds }: InternalsSyncProps): null {
  const updateNodeInternals = useUpdateNodeInternals();
  const idsKey = useMemo(() => nodeIds.join("|"), [nodeIds]);
  const nodeIdsRef = useRef(nodeIds);
  nodeIdsRef.current = nodeIds;

  useEffect(() => {
    if (!idsKey) {
      return;
    }
    const raf = requestAnimationFrame(() => {
      updateNodeInternals(nodeIdsRef.current);
    });
    return () => cancelAnimationFrame(raf);
  }, [idsKey, updateNodeInternals]);

  return null;
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/inline/inline-category-badges.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Badge } from "@/components/ui/badge";
import { type ReactElement, useLayoutEffect, useRef, useState } from "react";

type InlineCategoryBadgesProps = {
  values: string[];
};

export function InlineCategoryBadges({
  values,
}: InlineCategoryBadgesProps): ReactElement {
  const containerRef = useRef<HTMLDivElement>(null);
  const [visibleCount, setVisibleCount] = useState(values.length);

  useLayoutEffect(() => {
    const container = containerRef.current;
    if (!container) return;

    const badges = Array.from(container.children) as HTMLElement[];
    if (badges.length === 0) {
      const id = requestAnimationFrame(() => setVisibleCount(0));
      return () => cancelAnimationFrame(id);
    }

    const containerWidth = container.clientWidth;
    // Reserve space for the "+N" badge (~36px)
    const overflowBadgeWidth = 36;
    let count = 0;
    let usedWidth = 0;

    for (const badge of badges) {
      const badgeWidth = badge.scrollWidth + 4; // 4px for gap
      if (usedWidth + badgeWidth > containerWidth - overflowBadgeWidth && count < badges.length - 1) {
        break;
      }
      if (usedWidth + badgeWidth > containerWidth) {
        break;
      }
      usedWidth += badgeWidth;
      count++;
    }

    const id = requestAnimationFrame(() => setVisibleCount(count || 1));
    return () => cancelAnimationFrame(id);
  }, [values]);

  if (values.length === 0) {
    return <p className="text-xs text-muted-foreground">No values</p>;
  }

  const overflow = values.length - visibleCount;

  return (
    <div className="relative">
      {/* Hidden measurer */}
      <div
        ref={containerRef}
        className="pointer-events-none invisible absolute inset-x-0 top-0 flex flex-nowrap gap-1"
        aria-hidden
      >
        {values.map((v, i) => (
          <Badge
            key={`m-${v}-${i}`}
            variant="secondary"
            className="corner-squircle h-4 shrink-0 px-1.5 text-[10px]"
          >
            {v}
          </Badge>
        ))}
      </div>
      {/* Visible badges */}
      <div className="flex flex-wrap gap-1">
        {values.slice(0, visibleCount).map((v, i) => (
          <Badge
            key={`${v}-${i}`}
            variant="secondary"
            className="corner-squircle h-4 px-1.5 text-[10px]"
          >
            {v}
          </Badge>
        ))}
        {overflow > 0 && (
          <Badge variant="outline" className="corner-squircle h-4 px-1.5 text-[10px]">
            +{overflow}
          </Badge>
        )}
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/inline/inline-expression.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Input } from "@/components/ui/input";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import type { ReactElement } from "react";
import { useRecipeStudioStore } from "../../stores/recipe-studio";
import type { ExpressionConfig, ExpressionDtype } from "../../types";
import { findInvalidJinjaReferences } from "../../utils/refs";
import { getAvailableVariableEntries } from "../../utils/variables";
import { AvailableReferencesInline } from "../shared/available-references-inline";
import { InlineField } from "./inline-field";

type InlineExpressionProps = {
  config: ExpressionConfig;
  onUpdate: (patch: Partial<ExpressionConfig>) => void;
};

const DTYPE_OPTIONS: ExpressionDtype[] = ["str", "int", "float", "bool"];

export function InlineExpression({
  config,
  onUpdate,
}: InlineExpressionProps): ReactElement {
  const configs = useRecipeStudioStore((state) => state.configs);
  const vars = getAvailableVariableEntries(configs, config.id);
  const invalidRefs = findInvalidJinjaReferences(
    config.expr,
    vars.map((entry) => entry.name),
  );

  return (
    <div className="space-y-3">
      <div className="grid gap-3 sm:grid-cols-[130px_1fr]">
        <InlineField label="Output type">
          <Select
            value={config.dtype}
            onValueChange={(value) =>
              onUpdate({ dtype: value as ExpressionDtype })
            }
          >
            <SelectTrigger className="nodrag h-8 w-full text-xs">
              <SelectValue placeholder="dtype" />
            </SelectTrigger>
            <SelectContent>
              {DTYPE_OPTIONS.map((dtype) => (
                <SelectItem key={dtype} value={dtype}>
                  {dtype}
                </SelectItem>
              ))}
            </SelectContent>
          </Select>
        </InlineField>
        <InlineField label="Expression">
          <Input
            className="nodrag h-8 w-full text-xs"
            aria-invalid={invalidRefs.length > 0}
            placeholder="{{ column_name }}"
            value={config.expr}
            onChange={(event) => onUpdate({ expr: event.target.value })}
          />
        </InlineField>
      </div>
      <AvailableReferencesInline entries={vars} />
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/inline/inline-field.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { cn } from "@/lib/utils";
import type { ReactElement, ReactNode } from "react";

type InlineFieldProps = {
  label: string;
  className?: string;
  children: ReactNode;
};

export function InlineField({
  label,
  className,
  children,
}: InlineFieldProps): ReactElement {
  return (
    <div className={cn("grid gap-1.5", className)}>
      <p className="text-[11px] font-semibold uppercase tracking-wide text-muted-foreground">
        {label}
      </p>
      {children}
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/inline/inline-llm.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  Combobox,
  ComboboxContent,
  ComboboxEmpty,
  ComboboxInput,
  ComboboxItem,
  ComboboxList,
} from "@/components/ui/combobox";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import { type ReactElement, useMemo, useRef } from "react";
import { useRecipeStudioStore } from "../../stores/recipe-studio";
import type { LlmConfig } from "../../types";
import { InlineField } from "./inline-field";

type InlineLlmProps = {
  config: LlmConfig;
  onUpdate: (patch: Partial<LlmConfig>) => void;
};

const CODE_LANG_OPTIONS = [
  "python",
  "javascript",
  "typescript",
  "java",
  "kotlin",
  "go",
  "rust",
  "ruby",
  "scala",
  "swift",
  "sql:sqlite",
  "sql:postgres",
  "sql:mysql",
  "sql:tsql",
  "sql:bigquery",
  "sql:ansi",
] as const;

export function InlineLlm({ config, onUpdate }: InlineLlmProps): ReactElement {
  const isCode = config.llm_type === "code";
  const configs = useRecipeStudioStore((state) => state.configs);
  const modelConfigAliases = useMemo(
    () =>
      Object.values(configs)
        .filter((c) => c.kind === "model_config")
        .map((c) => c.name),
    [configs],
  );
  const toolProfileAliases = useMemo(
    () =>
      Object.values(configs)
        .filter((c) => c.kind === "tool_config")
        .map((c) => c.name),
    [configs],
  );
  const aliasInputRef = useRef(config.model_alias);
  const lastAliasRef = useRef(config.model_alias);
  const anchorRef = useRef<HTMLDivElement>(null);
  const toolAnchorRef = useRef<HTMLDivElement>(null);
  if (lastAliasRef.current !== config.model_alias) {
    lastAliasRef.current = config.model_alias;
    aliasInputRef.current = config.model_alias;
  }

  return (
    <div className="space-y-3">
      <InlineField label="Model alias">
        <div ref={anchorRef}>
          <Combobox
            items={modelConfigAliases}
            filteredItems={modelConfigAliases}
            filter={null}
            value={config.model_alias || null}
            onValueChange={(value) =>
              onUpdate({
                // biome-ignore lint/style/useNamingConvention: api schema
                model_alias: value ?? "",
              })
            }
            onInputValueChange={(value) => {
              aliasInputRef.current = value;
            }}
            itemToStringValue={(value) => value}
            autoHighlight={true}
          >
            <ComboboxInput
              className="nodrag h-8 w-full text-xs"
              placeholder="Model alias"
              onBlur={() => {
                const next = aliasInputRef.current;
                if (next !== config.model_alias) {
                  onUpdate({
                    // biome-ignore lint/style/useNamingConvention: api schema
                    model_alias: next,
                  });
                }
              }}
            />
            <ComboboxContent anchor={anchorRef}>
              <ComboboxEmpty>No model configs found</ComboboxEmpty>
              <ComboboxList>
                {(alias: string) => (
                  <ComboboxItem key={alias} value={alias}>
                    {alias}
                  </ComboboxItem>
                )}
              </ComboboxList>
            </ComboboxContent>
          </Combobox>
        </div>
      </InlineField>
      <InlineField label="Tool profile">
        <div ref={toolAnchorRef}>
          <Combobox
            items={toolProfileAliases}
            filteredItems={toolProfileAliases}
            filter={null}
            value={config.tool_alias || null}
            onValueChange={(value) =>
              onUpdate({
                // biome-ignore lint/style/useNamingConvention: api schema
                tool_alias: value ?? "",
              })
            }
            itemToStringValue={(value) => value}
            autoHighlight={true}
          >
            <ComboboxInput
              className="nodrag h-8 w-full text-xs"
              placeholder="Tool profile"
              onBlur={(event) => {
                const next = event.target.value;
                if (next !== (config.tool_alias ?? "")) {
                  onUpdate({
                    // biome-ignore lint/style/useNamingConvention: api schema
                    tool_alias: next,
                  });
                }
              }}
            />
            <ComboboxContent anchor={toolAnchorRef}>
              <ComboboxEmpty>No tool profiles found</ComboboxEmpty>
              <ComboboxList>
                {(alias: string) => (
                  <ComboboxItem key={alias} value={alias}>
                    {alias}
                  </ComboboxItem>
                )}
              </ComboboxList>
            </ComboboxContent>
          </Combobox>
        </div>
      </InlineField>
      {isCode && (
        <InlineField label="Code language">
          <Select
            value={config.code_lang?.trim() || "python"}
            onValueChange={(value) =>
              onUpdate({
                // biome-ignore lint/style/useNamingConvention: api schema
                code_lang: value,
              })
            }
          >
            <SelectTrigger className="nodrag h-8 w-full text-xs">
              <SelectValue placeholder="Language" />
            </SelectTrigger>
            <SelectContent>
              {CODE_LANG_OPTIONS.map((lang) => (
                <SelectItem key={lang} value={lang}>
                  {lang}
                </SelectItem>
              ))}
            </SelectContent>
          </Select>
        </InlineField>
      )}
      <p className="text-[11px] text-muted-foreground">
        Prompt/system edited on aux nodes.
      </p>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/inline/inline-model.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Input } from "@/components/ui/input";
import type { ReactElement } from "react";
import type { ModelConfig, ModelProviderConfig } from "../../types";
import { InlineField } from "./inline-field";

type InlineModelPatch = Partial<ModelProviderConfig> | Partial<ModelConfig>;

type InlineModelProps = {
  config: ModelProviderConfig | ModelConfig;
  onUpdate: (patch: InlineModelPatch) => void;
};

export function InlineModel(props: InlineModelProps): ReactElement {
  if (props.config.kind === "model_provider") {
    return (
      <div className="grid gap-3 sm:grid-cols-2">
        <InlineField label="Endpoint">
          <Input
            className="nodrag h-8 w-full text-xs"
            placeholder="https://api.example.com/v1"
            value={props.config.endpoint}
            onChange={(event) => props.onUpdate({ endpoint: event.target.value })}
          />
        </InlineField>
        <InlineField label="API key">
          <Input
            className="nodrag h-8 w-full text-xs"
            placeholder="Optional"
            value={props.config.api_key ?? ""}
            onChange={(event) =>
              props.onUpdate({
                // biome-ignore lint/style/useNamingConvention: api schema
                api_key: event.target.value,
              })
            }
          />
        </InlineField>
      </div>
    );
  }

  return (
    <div className="grid gap-3 sm:grid-cols-2">
      <InlineField label="Provider">
        <Input
          className="nodrag h-8 w-full text-xs"
          placeholder="provider alias"
          value={props.config.provider}
          onChange={(event) => props.onUpdate({ provider: event.target.value })}
        />
      </InlineField>
      <InlineField label="Model">
        <Input
          className="nodrag h-8 w-full text-xs"
          placeholder="gpt-4o-mini"
          value={props.config.model}
          onChange={(event) => props.onUpdate({ model: event.target.value })}
        />
      </InlineField>
      <InlineField label="Temperature" className="sm:col-span-2">
        <Input
          className="nodrag h-8 w-full text-xs"
          type="number"
          placeholder="0.7"
          value={props.config.inference_temperature ?? ""}
          onChange={(event) =>
            props.onUpdate({
              // biome-ignore lint/style/useNamingConvention: api schema
              inference_temperature: event.target.value,
            })
          }
        />
      </InlineField>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/inline/inline-policy.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { NodeConfig, SamplerType } from "../../types";

export type ConfigUiMode = "inline" | "dialog";

const INLINE_SAMPLERS = new Set<SamplerType>([
  "uniform",
  "gaussian",
  "bernoulli",
  "uuid",
]);

export function getConfigUiMode(
  config: NodeConfig | null | undefined,
): ConfigUiMode {
  if (!config) {
    return "dialog";
  }
  if (config.kind === "sampler") {
    return INLINE_SAMPLERS.has(config.sampler_type) ? "inline" : "dialog";
  }
  if (config.kind === "model_provider" || config.kind === "model_config") {
    return "inline";
  }
  if (config.kind === "tool_config") {
    return "dialog";
  }
  if (config.kind === "llm") {
    if (config.llm_type === "text" || config.llm_type === "code") {
      return "inline";
    }
    return "dialog";
  }
  if (config.kind === "seed") {
    return "inline";
  }
  if (config.kind === "expression") {
    return "inline";
  }
  return "dialog";
}

export function isInlineConfig(
  config: NodeConfig | null | undefined,
): boolean {
  return getConfigUiMode(config) === "inline";
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/inline/inline-sampler.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Input } from "@/components/ui/input";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import type { ReactElement } from "react";
import type { SamplerConfig } from "../../types";
import { InlineField } from "./inline-field";

type InlineSamplerProps = {
  config: SamplerConfig;
  onUpdate: (patch: Partial<SamplerConfig>) => void;
};

type ConvertTo = "int" | "float" | "str";

function ConvertToField({
  value,
  onValueChange,
}: {
  value: SamplerConfig["convert_to"];
  onValueChange: (value: ConvertTo | undefined) => void;
}): ReactElement {
  return (
    <Select
      value={value ?? "none"}
      onValueChange={(next) =>
        onValueChange(next === "none" ? undefined : (next as ConvertTo))
      }
    >
      <SelectTrigger className="nodrag h-8 w-full text-xs">
        <SelectValue placeholder="Convert" />
      </SelectTrigger>
      <SelectContent>
        <SelectItem value="none">None</SelectItem>
        <SelectItem value="int">int</SelectItem>
        <SelectItem value="float">float</SelectItem>
        <SelectItem value="str">str</SelectItem>
      </SelectContent>
    </Select>
  );
}

export function InlineSampler({
  config,
  onUpdate,
}: InlineSamplerProps): ReactElement | null {
  if (config.sampler_type === "uniform") {
    return (
      <div className="grid gap-3 sm:grid-cols-3">
        <InlineField label="Low">
          <Input
            className="nodrag h-8 w-full text-xs"
            type="number"
            placeholder="0"
            value={config.low ?? ""}
            onChange={(event) => onUpdate({ low: event.target.value })}
          />
        </InlineField>
        <InlineField label="High">
          <Input
            className="nodrag h-8 w-full text-xs"
            type="number"
            placeholder="100"
            value={config.high ?? ""}
            onChange={(event) => onUpdate({ high: event.target.value })}
          />
        </InlineField>
        <InlineField label="Convert to">
          <ConvertToField
            value={config.convert_to}
            onValueChange={(value) =>
              onUpdate({
                // biome-ignore lint/style/useNamingConvention: api schema
                convert_to: value,
              })
            }
          />
        </InlineField>
      </div>
    );
  }

  if (config.sampler_type === "gaussian") {
    return (
      <div className="grid gap-3 sm:grid-cols-3">
        <InlineField label="Mean">
          <Input
            className="nodrag h-8 w-full text-xs"
            type="number"
            placeholder="0"
            value={config.mean ?? ""}
            onChange={(event) => onUpdate({ mean: event.target.value })}
          />
        </InlineField>
        <InlineField label="Std dev">
          <Input
            className="nodrag h-8 w-full text-xs"
            type="number"
            placeholder="1"
            value={config.std ?? ""}
            onChange={(event) => onUpdate({ std: event.target.value })}
          />
        </InlineField>
        <InlineField label="Convert to">
          <ConvertToField
            value={config.convert_to}
            onValueChange={(value) =>
              onUpdate({
                // biome-ignore lint/style/useNamingConvention: api schema
                convert_to: value,
              })
            }
          />
        </InlineField>
      </div>
    );
  }

  if (config.sampler_type === "bernoulli") {
    return (
      <InlineField label="Probability (p)">
        <Input
          className="nodrag h-8 w-full text-xs"
          type="number"
          min="0"
          max="1"
          step="0.01"
          placeholder="0.5"
          value={config.p ?? ""}
          onChange={(event) => onUpdate({ p: event.target.value })}
        />
      </InlineField>
    );
  }

  if (config.sampler_type === "uuid") {
    return (
      <InlineField label="UUID format">
        <Input
          className="nodrag h-8 w-full text-xs"
          placeholder="uuid4"
          value={config.uuid_format ?? ""}
          onChange={(event) =>
            onUpdate({
              // biome-ignore lint/style/useNamingConvention: api schema
              uuid_format: event.target.value,
            })
          }
        />
      </InlineField>
    );
  }

  return null;
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/inline/inline-seed.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { DocumentAttachmentIcon, DocumentCodeIcon, Plant01Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import type { ReactElement } from "react";
import type { SeedConfig } from "../../types";
import { HfDatasetCombobox } from "../shared/hf-dataset-combobox";
import { InlineField } from "./inline-field";

type InlineSeedProps = {
  config: SeedConfig;
  onUpdate: (patch: Partial<SeedConfig>) => void;
};

export function InlineSeed({ config, onUpdate }: InlineSeedProps): ReactElement {
  const mode = config.seed_source_type ?? "hf";

  if (mode === "hf") {
    return (
      <div className="space-y-2">
        <InlineField label="Dataset">
          <HfDatasetCombobox
            value={config.hf_repo_id}
            accessToken={config.hf_token?.trim() || undefined}
            onValueChange={(next) =>
              onUpdate({
                hf_repo_id: next,
                hf_path: "",
                seed_columns: [],
                seed_drop_columns: [],
                seed_preview_rows: [],
              })
            }
            placeholder="org/repo"
          />
        </InlineField>
        <p className="text-[11px] text-muted-foreground">
          Load columns in dialog.
        </p>
      </div>
    );
  }

  const isLocal = mode === "local";
  const fileName = isLocal
    ? config.local_file_name?.trim()
    : config.unstructured_file_name?.trim();

  return (
    <div className="corner-squircle flex items-center gap-2 rounded-md border border-border/60 bg-muted/30 px-2 py-2">
      <div className="corner-squircle rounded-md bg-primary/10 p-1.5 text-primary">
        <HugeiconsIcon
          icon={isLocal ? DocumentCodeIcon : DocumentAttachmentIcon}
          className="size-3.5"
        />
      </div>
      <div className="min-w-0">
        <p className="truncate text-xs font-medium">
          {fileName || "No file selected"}
        </p>
        <p className="text-[11px] text-muted-foreground">
          {isLocal ? "Structured file" : "Unstructured document"} · configure in dialog
        </p>
      </div>
      <HugeiconsIcon icon={Plant01Icon} className="ml-auto size-3.5 text-muted-foreground/60" />
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/recipe-floating-icon-button-class.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export const RECIPE_FLOATING_ICON_BUTTON_CLASS =
  "corner-squircle group h-11 w-11 rounded-xl border border-border/60 bg-transparent p-0 text-muted-foreground hover:bg-transparent hover:text-primary hover:border-primary/60";


================================================
FILE: studio/frontend/src/features/recipe-studio/components/recipe-graph-aux-node.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Textarea } from "@/components/ui/textarea";
import {
  Handle,
  Position,
  type Node,
  type NodeProps,
  useUpdateNodeInternals,
} from "@xyflow/react";
import { memo, type ReactElement, useEffect } from "react";
import { useRecipeStudioStore } from "../stores/recipe-studio";
import type { LlmConfig, Score, ScoreOption } from "../types";
import { AUX_HANDLE_CLASS } from "../utils/handle-layout";
import { HANDLE_IDS } from "../utils/handles";
import { findInvalidJinjaReferences } from "../utils/refs";
import { getAvailableVariableEntries } from "../utils/variables";
import { AvailableReferencesInline } from "./shared/available-references-inline";
import { BaseNode, BaseNodeContent, BaseNodeHeader, BaseNodeHeaderTitle } from "./rf-ui/base-node";

type PromptField = "prompt" | "system_prompt";

type PromptInputNodeData = {
  kind: "llm-prompt-input";
  llmId: string;
  field: PromptField;
  title: string;
  executionLocked?: boolean;
};

type JudgeScoreNodeData = {
  kind: "llm-judge-score";
  llmId: string;
  scoreIndex: number;
  executionLocked?: boolean;
};

export type RecipeGraphAuxNodeData = PromptInputNodeData | JudgeScoreNodeData;
export type RecipeGraphAuxNodeType = Node<RecipeGraphAuxNodeData, "aux">;

function updateScoreAt(
  config: LlmConfig,
  scoreIndex: number,
  patch: Partial<Score>,
): Score[] {
  const scores = config.scores ?? [];
  return scores.map((score, index) =>
    index === scoreIndex ? { ...score, ...patch } : score,
  );
}

function updateOptionAt(
  score: Score,
  optionIndex: number,
  patch: Partial<ScoreOption>,
): ScoreOption[] {
  return score.options.map((option, index) =>
    index === optionIndex ? { ...option, ...patch } : option,
  );
}

function AuxVariableBadges({
  entries,
}: {
  entries: ReturnType<typeof getAvailableVariableEntries>;
}): ReactElement | null {
  return <AvailableReferencesInline entries={entries} />;
}

function AuxNodeBase({
  id,
  data,
}: NodeProps<RecipeGraphAuxNodeType>): ReactElement | null {
  const config = useRecipeStudioStore((state) => state.configs[data.llmId]);
  const configs = useRecipeStudioStore((state) => state.configs);
  const updateConfig = useRecipeStudioStore((state) => state.updateConfig);
  const updateNodeInternals = useUpdateNodeInternals();

  useEffect(() => {
    updateNodeInternals(id);
  }, [id, updateNodeInternals]);

  if (!(config && config.kind === "llm")) {
    return null;
  }
  const executionLocked = Boolean(data.executionLocked);

  const sourceHandles = (
    <>
      <Handle
        id={HANDLE_IDS.llmInputOutLeft}
        type="source"
        position={Position.Left}
        isConnectable={false}
        isConnectableStart={false}
        className={AUX_HANDLE_CLASS}
      />
      <Handle
        id={HANDLE_IDS.llmInputOutRight}
        type="source"
        position={Position.Right}
        isConnectable={false}
        isConnectableStart={false}
        className={AUX_HANDLE_CLASS}
      />
      <Handle
        id={HANDLE_IDS.llmInputOutTop}
        type="source"
        position={Position.Top}
        isConnectable={false}
        isConnectableStart={false}
        className={AUX_HANDLE_CLASS}
      />
      <Handle
        id={HANDLE_IDS.llmInputOutBottom}
        type="source"
        position={Position.Bottom}
        isConnectable={false}
        isConnectableStart={false}
        className={AUX_HANDLE_CLASS}
      />
    </>
  );

  if (data.kind === "llm-prompt-input") {
    const value = data.field === "prompt" ? config.prompt : config.system_prompt;
    const variableEntries = getAvailableVariableEntries(configs, data.llmId);
    const availableRefs = variableEntries.map((entry) => entry.name);
    const hasInvalidRefs =
      findInvalidJinjaReferences(value, availableRefs).length > 0;
    return (
      <BaseNode className="corner-squircle w-full min-w-0 rounded-lg border-border/60 bg-card shadow-sm">
        <BaseNodeHeader className="border-b border-border/50 px-3 py-2">
          <BaseNodeHeaderTitle className="text-xs">{data.title}</BaseNodeHeaderTitle>
        </BaseNodeHeader>
        <BaseNodeContent className="gap-2 px-3 py-2">
          <Textarea
            className="corner-squircle nodrag nowheel max-h-40 min-h-[88px] w-full resize-none overflow-y-auto text-xs"
            aria-invalid={hasInvalidRefs}
            value={value}
            disabled={executionLocked}
            onChange={(event) =>
              updateConfig(data.llmId, {
                [data.field]: event.target.value,
              } as Partial<LlmConfig>)
            }
          />
          <AuxVariableBadges entries={variableEntries} />
        </BaseNodeContent>
        {sourceHandles}
      </BaseNode>
    );
  }

  const score = config.scores?.[data.scoreIndex];
  if (!score) {
    return null;
  }

  const updateScore = (patch: Partial<Score>): void => {
    updateConfig(data.llmId, {
      scores: updateScoreAt(config, data.scoreIndex, patch),
    });
  };

  const removeScore = (): void => {
    const nextScores = (config.scores ?? []).filter(
      (_score, index) => index !== data.scoreIndex,
    );
    updateConfig(data.llmId, { scores: nextScores });
  };

  const addOption = (): void => {
    updateScore({
      options: [...score.options, { value: "", description: "" }],
    });
  };

  const removeOption = (optionIndex: number): void => {
    updateScore({
      options: score.options.filter((_option, index) => index !== optionIndex),
    });
  };

  const updateOption = (
    optionIndex: number,
    patch: Partial<ScoreOption>,
  ): void => {
    updateScore({
      options: updateOptionAt(score, optionIndex, patch),
    });
  };

  return (
    <BaseNode className="corner-squircle w-full min-w-0 rounded-lg border-border/60 bg-card shadow-sm">
      <BaseNodeHeader className="border-b border-border/50 px-3 py-2">
        <BaseNodeHeaderTitle className="text-xs">
          {score.name.trim() || `Scorer ${data.scoreIndex + 1}`}
        </BaseNodeHeaderTitle>
        <Button
          type="button"
          size="xs"
          variant="ghost"
          className="nodrag"
          disabled={executionLocked}
          onClick={removeScore}
        >
          Remove
        </Button>
      </BaseNodeHeader>
      <BaseNodeContent className="gap-2 px-3 py-2">
        <Input
          className="nodrag h-7 w-full text-xs"
          placeholder="Score name"
          value={score.name}
          disabled={executionLocked}
          onChange={(event) => updateScore({ name: event.target.value })}
        />
        <Textarea
          className="corner-squircle nodrag nowheel max-h-32 min-h-[56px] w-full resize-none overflow-y-auto text-xs"
          placeholder="Score description"
          value={score.description}
          disabled={executionLocked}
          onChange={(event) => updateScore({ description: event.target.value })}
        />
        <div className="space-y-1">
          {score.options.map((option, optionIndex) => (
            <div key={`${data.llmId}-score-${data.scoreIndex}-opt-${optionIndex}`} className="grid grid-cols-[74px_1fr_auto] gap-1">
              <Input
                className="nodrag h-7 text-xs"
                placeholder="Value"
                value={option.value}
                disabled={executionLocked}
                onChange={(event) =>
                  updateOption(optionIndex, { value: event.target.value })
                }
              />
              <Input
                className="nodrag h-7 text-xs"
                placeholder="Description"
                value={option.description}
                disabled={executionLocked}
                onChange={(event) =>
                  updateOption(optionIndex, {
                    description: event.target.value,
                  })
                }
              />
              <Button
                type="button"
                size="xs"
                variant="ghost"
                className="nodrag"
                disabled={executionLocked}
                onClick={() => removeOption(optionIndex)}
              >
                x
              </Button>
            </div>
          ))}
          <Button
            type="button"
            size="xs"
            variant="outline"
            className="nodrag mt-1"
            disabled={executionLocked}
            onClick={addOption}
          >
            Add option
          </Button>
        </div>
      </BaseNodeContent>
      {sourceHandles}
    </BaseNode>
  );
}

export const RecipeGraphAuxNode = memo(AuxNodeBase);


================================================
FILE: studio/frontend/src/features/recipe-studio/components/recipe-graph-node.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { MarkdownPreview } from "@/components/markdown/markdown-preview";
import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import { cn } from "@/lib/utils";
import {
  BalanceScaleIcon,
  Clock01Icon,
  CodeIcon,
  CodeSimpleIcon,
  DiceFaces03Icon,
  EqualSignIcon,
  FingerPrintIcon,
  FunctionIcon,
  Parabola02Icon,
  PencilEdit02Icon,
  Plant01Icon,
  Plug01Icon,
  Shield02Icon,
  Tag01Icon,
  TagsIcon,
  UserAccountIcon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import {
  type NodeProps,
  NodeResizer,
  Position,
  useUpdateNodeInternals,
} from "@xyflow/react";
import { type ReactElement, memo, useEffect } from "react";
import {
  MAX_NODE_WIDTH,
  MAX_NOTE_NODE_WIDTH,
  MIN_NODE_WIDTH,
} from "../constants";
import { useNodeConnectionStatus } from "../hooks/use-node-connection-status";
import { useRecipeStudioStore } from "../stores/recipe-studio";
import type {
  LlmType,
  NodeConfig,
  RecipeNode as RecipeGraphNodeType,
  SamplerType,
} from "../types";
import { NODE_HANDLE_CLASS } from "../utils/handle-layout";
import { HANDLE_IDS } from "../utils/handles";
import {
  RECIPE_STUDIO_NODE_TONES,
  RECIPE_STUDIO_USER_NODE_TONE,
} from "../utils/ui-tones";
import { InlineCategoryBadges } from "./inline/inline-category-badges";
import { InlineExpression } from "./inline/inline-expression";
import { InlineLlm } from "./inline/inline-llm";
import { InlineModel } from "./inline/inline-model";
import { isInlineConfig } from "./inline/inline-policy";
import { InlineSampler } from "./inline/inline-sampler";
import { InlineSeed } from "./inline/inline-seed";
import {
  BaseNode,
  BaseNodeContent,
  BaseNodeHeader,
  BaseNodeHeaderTitle,
} from "./rf-ui/base-node";
import { LabeledHandle } from "./rf-ui/labeled-handle";

type IconType = typeof CodeIcon;

function hexToRgb(hex: string): { r: number; g: number; b: number } | null {
  const normalized = hex.trim().replace("#", "");
  if (!/^[0-9a-fA-F]{6}$/.test(normalized)) {
    return null;
  }
  const int = Number.parseInt(normalized, 16);
  return {
    r: (int >> 16) & 255,
    g: (int >> 8) & 255,
    b: int & 255,
  };
}

function parseNoteOpacity(value: string | undefined): number {
  const parsed = Number.parseInt(value ?? "", 10);
  if (!Number.isFinite(parsed)) {
    return 0.35;
  }
  return Math.max(0.05, Math.min(1, parsed / 100));
}

const NODE_META = {
  sampler: {
    tone: RECIPE_STUDIO_NODE_TONES.sampler,
  },
  llm: {
    tone: RECIPE_STUDIO_NODE_TONES.llm,
  },
  validator: {
    tone: RECIPE_STUDIO_NODE_TONES.validator,
  },
  expression: {
    tone: RECIPE_STUDIO_NODE_TONES.expression,
  },
  note: {
    tone: RECIPE_STUDIO_NODE_TONES.note,
  },
  seed: {
    tone: RECIPE_STUDIO_NODE_TONES.seed,
  },
  model_provider: {
    tone: RECIPE_STUDIO_NODE_TONES.model_provider,
  },
  model_config: {
    tone: RECIPE_STUDIO_NODE_TONES.model_config,
  },
  tool_config: {
    tone: RECIPE_STUDIO_NODE_TONES.tool_config,
  },
} as const;
const SAMPLER_ICONS: Record<SamplerType, IconType> = {
  category: Tag01Icon,
  subcategory: TagsIcon,
  uniform: EqualSignIcon,
  gaussian: Parabola02Icon,
  bernoulli: EqualSignIcon,
  datetime: Clock01Icon,
  timedelta: Clock01Icon,
  uuid: FingerPrintIcon,
  person: UserAccountIcon,
  person_from_faker: UserAccountIcon,
};

const LLM_ICONS: Record<LlmType, IconType> = {
  text: PencilEdit02Icon,
  structured: CodeIcon,
  code: CodeSimpleIcon,
  judge: BalanceScaleIcon,
};

function resolveNodeIcon(
  kind: RecipeGraphNodeType["data"]["kind"],
  blockType: RecipeGraphNodeType["data"]["blockType"],
): IconType {
  if (kind === "sampler" && blockType in SAMPLER_ICONS) {
    return SAMPLER_ICONS[blockType as SamplerType];
  }
  if (kind === "llm" && blockType in LLM_ICONS) {
    return LLM_ICONS[blockType as LlmType];
  }
  if (kind === "validator") {
    return Shield02Icon;
  }
  if (kind === "expression") {
    return FunctionIcon;
  }
  if (kind === "note") {
    return PencilEdit02Icon;
  }
  if (kind === "model_provider") {
    return Shield02Icon;
  }
  if (kind === "model_config") {
    return Plant01Icon;
  }
  if (kind === "tool_config") {
    return Plug01Icon;
  }
  if (kind === "seed") {
    return Plant01Icon;
  }
  return DiceFaces03Icon;
}

function getConfigSummary(config: NodeConfig | undefined): string {
  if (!config) {
    return "Open settings";
  }

  if (config.kind === "sampler") {
    if (config.sampler_type === "category") {
      const count = config.values?.length ?? 0;
      return `${count} options`;
    }
    if (config.sampler_type === "subcategory") {
      if (config.subcategory_parent?.trim()) {
        return `Based on ${config.subcategory_parent}`;
      }
      return "Choose the main field";
    }
    if (config.sampler_type === "datetime") {
      const start = config.datetime_start?.trim() || "?";
      const end = config.datetime_end?.trim() || "?";
      return `${start} -> ${end}`;
    }
    if (config.sampler_type === "timedelta") {
      if (config.reference_column_name?.trim()) {
        return `From ${config.reference_column_name}`;
      }
      return "Choose a date field";
    }
    if (
      config.sampler_type === "person" ||
      config.sampler_type === "person_from_faker"
    ) {
      const locale = config.person_locale?.trim() || "any locale";
      const city = config.person_city?.trim();
      if (city) {
        return `${locale} · ${city}`;
      }
      return locale;
    }
    return "Open settings";
  }

  if (config.kind === "llm") {
    if (config.llm_type === "structured") {
      return "Set the response format in settings";
    }
    if (config.llm_type === "judge") {
      const scoreCount = config.scores?.length ?? 0;
      return `${scoreCount} criteria`;
    }
    if (config.tool_alias?.trim()) {
      return `Tools: ${config.tool_alias.trim()}`;
    }
    return "Add your prompt in settings";
  }

  if (config.kind === "tool_config") {
    const providerCount = config.mcp_providers.length;
    const allowCount =
      config.allow_tools?.filter((value) => value.trim()).length ?? 0;
    const providerLabel =
      providerCount === 1 ? "1 server" : `${providerCount} servers`;
    if (allowCount === 0) {
      return `${providerLabel} · all tools allowed`;
    }
    return `${providerLabel} · ${allowCount} selected tools`;
  }

  if (config.kind === "validator") {
    const target = config.target_columns[0]?.trim();
    if (target) {
      return `Checks ${target}`;
    }
    return "Choose code to check";
  }

  if (config.kind === "seed") {
    const seedSourceType = config.seed_source_type ?? "hf";
    if (seedSourceType === "hf" && config.hf_repo_id.trim()) {
      return config.hf_repo_id.trim();
    }
    if (seedSourceType === "local" && config.local_file_name?.trim()) {
      return config.local_file_name.trim();
    }
    if (
      seedSourceType === "unstructured" &&
      config.unstructured_file_name?.trim()
    ) {
      return config.unstructured_file_name.trim();
    }
    if (config.hf_path.trim()) {
      return config.hf_path.trim();
    }
    if (seedSourceType === "hf") {
      return "Choose a dataset";
    }
    if (seedSourceType === "local") {
      return "Upload a table file";
    }
    return "Upload a document";
  }

  if (config.kind === "markdown_note") {
    if (config.markdown.trim()) {
      return "Note preview";
    }
    return "Add note text";
  }

  return "Open settings";
}

function renderNodeBody(
  config: NodeConfig | undefined,
  summary: string,
  updateConfig: (id: string, patch: Partial<NodeConfig>) => void,
): ReactElement {
  if (config?.kind === "markdown_note") {
    return <MarkdownPreview markdown={config.markdown} />;
  }

  if (config && isInlineConfig(config)) {
    const onUpdate = (patch: Partial<NodeConfig>) =>
      updateConfig(config.id, patch);

    if (config.kind === "sampler") {
      return <InlineSampler config={config} onUpdate={onUpdate} />;
    }
    if (config.kind === "model_provider" || config.kind === "model_config") {
      return <InlineModel config={config} onUpdate={onUpdate} />;
    }
    if (config.kind === "llm") {
      return <InlineLlm config={config} onUpdate={onUpdate} />;
    }
    if (config.kind === "expression") {
      return <InlineExpression config={config} onUpdate={onUpdate} />;
    }
    if (config.kind === "seed") {
      return <InlineSeed config={config} onUpdate={onUpdate} />;
    }
  }

  if (config?.kind === "sampler" && config.sampler_type === "category") {
    return <InlineCategoryBadges values={config.values ?? []} />;
  }

  if (config?.kind === "tool_config") {
    const providerNames = config.mcp_providers
      .map((provider) => provider.name.trim())
      .filter(Boolean);
    return (
      <div className="space-y-2">
        <p className="text-xs text-muted-foreground">{summary}</p>
        {providerNames.length > 0 && (
          <div className="flex flex-wrap gap-1.5">
            {providerNames.map((providerName) => (
              <Badge
                key={providerName}
                variant="secondary"
                className="corner-squircle font-mono text-[11px]"
              >
                {providerName}
              </Badge>
            ))}
          </div>
        )}
      </div>
    );
  }

  return <p className="text-xs text-muted-foreground">{summary}</p>;
}

function RecipeGraphNodeBase({
  id,
  data,
  selected,
}: NodeProps<RecipeGraphNodeType>): ReactElement {
  const meta = NODE_META[data.kind];
  const icon = resolveNodeIcon(data.kind, data.blockType);
  const layoutDirection = data.layoutDirection ?? "LR";
  const config = useRecipeStudioStore((state) => state.configs[id]);
  const openConfig = useRecipeStudioStore((state) => state.openConfig);
  const updateConfig = useRecipeStudioStore((state) => state.updateConfig);
  const llmAuxVisible = useRecipeStudioStore(
    (state) => state.llmAuxVisibility[id] ?? false,
  );
  const setLlmAuxVisibility = useRecipeStudioStore(
    (state) => state.setLlmAuxVisibility,
  );
  const updateNodeInternals = useUpdateNodeInternals();
  const executionLocked = Boolean(data.executionLocked);
  const runtimeState = data.runtimeState ?? "idle";
  const connectionStatus = useNodeConnectionStatus(id, config);

  useEffect(() => {
    updateNodeInternals(id);
  }, [id, layoutDirection, config, updateNodeInternals]);

  if (config?.kind === "markdown_note") {
    const rgb = hexToRgb(config.note_color ?? "#FDE68A");
    const alpha = parseNoteOpacity(config.note_opacity);
    const noteStyle = rgb
      ? {
          backgroundColor: `rgba(${rgb.r}, ${rgb.g}, ${rgb.b}, ${alpha})`,
          borderColor: `rgba(${rgb.r}, ${rgb.g}, ${rgb.b}, ${Math.min(1, Math.max(alpha + 0.15, 0.3))})`,
        }
      : undefined;

    return (
      <BaseNode
        className="corner-squircle relative w-full min-w-0 overflow-visible rounded-lg border-border/60 shadow-sm"
        style={noteStyle}
      >
        <NodeResizer
          isVisible={selected}
          minWidth={MIN_NODE_WIDTH}
          minHeight={80}
          maxWidth={MAX_NOTE_NODE_WIDTH}
          maxHeight={520}
          color="var(--primary)"
          lineClassName="!border-transparent !shadow-none"
          lineStyle={{ opacity: 0 }}
          handleClassName="!h-3 !w-3 !border-transparent !bg-transparent"
          handleStyle={{ opacity: 0 }}
        />
        <BaseNodeContent className="px-3 py-2">
          <MarkdownPreview markdown={config.markdown} plain={true} />
        </BaseNodeContent>
      </BaseNode>
    );
  }

  const showDataHandles =
    data.kind === "llm" ||
    data.kind === "validator" ||
    data.kind === "expression" ||
    data.kind === "sampler" ||
    data.kind === "seed";
  const showSemanticIn =
    data.kind === "model_config" || data.kind === "validator";
  const showSemanticOut =
    data.kind === "model_config" ||
    data.kind === "model_provider" ||
    data.kind === "tool_config" ||
    data.kind === "validator";
  const summary = getConfigSummary(config);
  const nodeBody = renderNodeBody(config, summary, updateConfig);
  const canShowLlmAux =
    config?.kind === "llm" &&
    (Boolean(config.prompt.trim()) ||
      Boolean(config.system_prompt.trim()) ||
      Boolean((config.scores?.length ?? 0) > 0));
  const iconTone =
    config?.kind === "sampler" &&
    (config.sampler_type === "person" ||
      config.sampler_type === "person_from_faker")
      ? RECIPE_STUDIO_USER_NODE_TONE
      : meta.tone;
  const runtimeNodeTone =
    runtimeState === "running"
      ? "border-primary/70 ring-2 ring-primary/20 shadow-md"
      : runtimeState === "done"
        ? "border-emerald-500/60 ring-1 ring-emerald-500/20"
        : "";
  const hasConnectionIssue =
    connectionStatus.isDisconnected ||
    connectionStatus.missingDataInput;

  return (
    <BaseNode
      className={cn(
        "corner-squircle relative w-full min-w-0 overflow-visible rounded-lg border-border/60 shadow-sm",
        runtimeNodeTone,
        hasConnectionIssue &&
          runtimeState === "idle" &&
          "opacity-80 border-dashed border-amber-400/70",
      )}
    >
      {runtimeState === "running" && config?.kind === "llm" && (
        <div className="pointer-events-none absolute -top-7 right-2 z-20">
          <span
            className="block size-6 animate-spin rounded-full border-[3px] border-primary/90 border-t-transparent bg-background"
            aria-label="Running"
          />
        </div>
      )}
      <NodeResizer
        isVisible={selected}
        minWidth={MIN_NODE_WIDTH}
        minHeight={120}
        maxWidth={MAX_NODE_WIDTH}
        maxHeight={520}
        color="var(--primary)"
        lineClassName="!border-transparent !shadow-none"
        lineStyle={{ opacity: 0 }}
        handleClassName="!h-3 !w-3 !border-transparent !bg-transparent"
        handleStyle={{ opacity: 0 }}
      />
      <BaseNodeHeader className="border-b border-border/50 px-3 py-2">
        <div className="flex min-w-0 items-center gap-2">
          <div
            className={cn(
              "corner-squircle flex size-7 items-center justify-center rounded-md border",
              iconTone,
            )}
          >
            <HugeiconsIcon icon={icon} className="size-3.5" />
          </div>
          <div className="min-w-0">
            <BaseNodeHeaderTitle className="truncate text-sm">
              {data.name}
            </BaseNodeHeaderTitle>
            <p className="truncate text-[11px] text-muted-foreground">
              {data.subtype} · {data.title}
            </p>
          </div>
        </div>
        <div className="flex items-center gap-1">
          {canShowLlmAux && (
            <Button
              type="button"
              size="xs"
              variant="ghost"
              className="nodrag"
              disabled={executionLocked}
              onClick={(event) => {
                event.preventDefault();
                event.stopPropagation();
                setLlmAuxVisibility(id, !llmAuxVisible);
              }}
            >
              {llmAuxVisible ? "Hide inputs" : "Show inputs"}
            </Button>
          )}
          <Button
            type="button"
            size="xs"
            variant="ghost"
            className="nodrag"
            disabled={executionLocked}
            onClick={(event) => {
              event.preventDefault();
              event.stopPropagation();
              openConfig(id);
            }}
          >
            Configure
          </Button>
        </div>
      </BaseNodeHeader>

      <BaseNodeContent
        className={cn(
          "gap-2 px-3 py-2",
          executionLocked && "pointer-events-none opacity-85",
        )}
      >
        {nodeBody}
      </BaseNodeContent>

      {showDataHandles && (
        <>
          <LabeledHandle
            id={HANDLE_IDS.dataIn}
            title="Data input"
            type="target"
            position={Position.Left}
            className="absolute inset-0 pointer-events-none"
            labelClassName="sr-only"
            handleClassName={NODE_HANDLE_CLASS}
          />
          <LabeledHandle
            id={HANDLE_IDS.dataOutLeft}
            title="Data output"
            type="source"
            position={Position.Left}
            className="absolute inset-0 pointer-events-none opacity-0"
            labelClassName="sr-only"
            handleClassName={NODE_HANDLE_CLASS}
          />
          <LabeledHandle
            id={HANDLE_IDS.dataInTop}
            title="Data input"
            type="target"
            position={Position.Top}
            className="absolute inset-0 pointer-events-none"
            labelClassName="sr-only"
            handleClassName={NODE_HANDLE_CLASS}
          />
          <LabeledHandle
            id={HANDLE_IDS.dataOutTop}
            title="Data output"
            type="source"
            position={Position.Top}
            className="absolute inset-0 pointer-events-none opacity-0"
            labelClassName="sr-only"
            handleClassName={NODE_HANDLE_CLASS}
          />
          <LabeledHandle
            id={HANDLE_IDS.dataOut}
            title="Data output"
            type="source"
            position={Position.Right}
            className="absolute inset-0 pointer-events-none"
            labelClassName="sr-only"
            handleClassName={NODE_HANDLE_CLASS}
          />
          <LabeledHandle
            id={HANDLE_IDS.dataInRight}
            title="Data input"
            type="target"
            position={Position.Right}
            className="absolute inset-0 pointer-events-none opacity-0"
            labelClassName="sr-only"
            handleClassName={NODE_HANDLE_CLASS}
          />
          <LabeledHandle
            id={HANDLE_IDS.dataOutBottom}
            title="Data output"
            type="source"
            position={Position.Bottom}
            className="absolute inset-0 pointer-events-none"
            labelClassName="sr-only"
            handleClassName={NODE_HANDLE_CLASS}
          />
          <LabeledHandle
            id={HANDLE_IDS.dataInBottom}
            title="Data input"
            type="target"
            position={Position.Bottom}
            className="absolute inset-0 pointer-events-none opacity-0"
            labelClassName="sr-only"
            handleClassName={NODE_HANDLE_CLASS}
          />
        </>
      )}

      {showSemanticIn && (
        <>
          <LabeledHandle
            id={HANDLE_IDS.semanticIn}
            title="Semantic input"
            type="target"
            position={Position.Left}
            className="absolute inset-0 pointer-events-none"
            labelClassName="sr-only"
            handleClassName={NODE_HANDLE_CLASS}
          />
          <LabeledHandle
            id={HANDLE_IDS.semanticInTop}
            title="Semantic input"
            type="target"
            position={Position.Top}
            className="absolute inset-0 pointer-events-none"
            labelClassName="sr-only"
            handleClassName={NODE_HANDLE_CLASS}
          />
        </>
      )}

      {showSemanticOut && (
        <>
          <LabeledHandle
            id={HANDLE_IDS.semanticOut}
            title="Semantic output"
            type="source"
            position={Position.Right}
            className="absolute inset-0 pointer-events-none"
            labelClassName="sr-only"
            handleClassName={NODE_HANDLE_CLASS}
          />
          <LabeledHandle
            id={HANDLE_IDS.semanticOutBottom}
            title="Semantic output"
            type="source"
            position={Position.Bottom}
            className="absolute inset-0 pointer-events-none"
            labelClassName="sr-only"
            handleClassName={NODE_HANDLE_CLASS}
          />
        </>
      )}
    </BaseNode>
  );
}

export const RecipeNode = memo(RecipeGraphNodeBase);


================================================
FILE: studio/frontend/src/features/recipe-studio/components/recipe-graph-semantic-edge.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { BaseEdge, type EdgeProps, getSmoothStepPath } from "@xyflow/react";
import { memo, type ReactElement } from "react";

export const RecipeGraphSemanticEdge = memo(function RecipeGraphSemanticEdge({
  id,
  sourceX,
  sourceY,
  targetX,
  targetY,
  sourcePosition,
  targetPosition,
  style,
  markerEnd,
  selected,
  data,
}: EdgeProps): ReactElement {
  const isActive = Boolean((data as { active?: boolean } | undefined)?.active);
  const [path] = getSmoothStepPath({
    sourceX,
    sourceY,
    sourcePosition,
    targetX,
    targetY,
    targetPosition,
    borderRadius: 0,
    offset: 16,
  });

  return (
    <BaseEdge
      id={id}
      path={path}
      markerEnd={markerEnd}
      style={{
        strokeDasharray: isActive ? "8 6" : selected ? "7 5" : "6 5",
        strokeWidth: isActive ? 2.4 : selected ? 2.3 : 1.8,
        stroke: isActive || selected ? "var(--primary)" : "var(--muted-foreground)",
        opacity: isActive ? 1 : selected ? 0.95 : 0.62,
        ...style,
      }}
    />
  );
});


================================================
FILE: studio/frontend/src/features/recipe-studio/components/recipe-studio-header.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import {
  Popover,
  PopoverContent,
  PopoverTrigger,
} from "@/components/ui/popover";
import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs";
import {
  Alert02Icon,
  AlertDiamondIcon,
  CookBookIcon,
  FloppyDiskIcon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { type KeyboardEvent, type ReactElement, useState } from "react";
import type { RecipeStudioView } from "../execution-types";
import type { GraphWarning } from "../utils/graph-warnings";
import {
  RECIPE_STUDIO_WARNING_BADGE_TONE,
  RECIPE_STUDIO_WARNING_ICON_TONE,
} from "../utils/ui-tones";

type StatusTone = "success" | "error";

type RecipeStudioHeaderProps = {
  activeView: RecipeStudioView;
  saveLoading: boolean;
  saveTone: StatusTone;
  savedAtLabel: string;
  workflowName: string;
  warnings?: GraphWarning[];
  onWorkflowNameChange: (value: string) => void;
  onViewChange: (view: RecipeStudioView) => void;
  onSaveRecipe: () => void;
};

const STATUS_MESSAGE_CLASS: Record<StatusTone, string> = {
  success: "Saved",
  error: "Needs saving",
};

export function RecipeStudioHeader({
  activeView,
  saveLoading,
  saveTone,
  savedAtLabel,
  workflowName,
  warnings = [],
  onWorkflowNameChange,
  onViewChange,
  onSaveRecipe,
}: RecipeStudioHeaderProps): ReactElement {
  const [editingWorkflowName, setEditingWorkflowName] = useState(false);

  function handleViewValueChange(value: string): void {
    if (value === "editor" || value === "executions") {
      onViewChange(value);
    }
  }

  function closeWorkflowNameEditor(): void {
    if (workflowName.trim().length === 0) {
      onWorkflowNameChange("Untitled recipe");
    }
    setEditingWorkflowName(false);
  }

  function handleWorkflowNameKeyDown(
    event: KeyboardEvent<HTMLInputElement>,
  ): void {
    if (event.key === "Enter") {
      closeWorkflowNameEditor();
      return;
    }
    if (event.key === "Escape") {
      setEditingWorkflowName(false);
    }
  }

  return (
    <div className="grid grid-cols-[minmax(0,1fr)_auto_minmax(0,1fr)] items-center gap-4 border-b px-4 py-3">
      <div className="flex min-w-0 items-center gap-3">
        <div
          className="flex size-8 shrink-0 items-center justify-center rounded-lg corner-squircle border border-border/70 bg-muted/20"
          aria-hidden={true}
        >
          <HugeiconsIcon
            icon={CookBookIcon}
            className="size-4 text-muted-foreground"
          />
        </div>
        <div className="flex min-w-0 items-center gap-2">
          {editingWorkflowName ? (
            <Input
              value={workflowName}
              onChange={(event) => onWorkflowNameChange(event.target.value)}
              onBlur={closeWorkflowNameEditor}
              onKeyDown={handleWorkflowNameKeyDown}
              autoFocus={true}
              className="h-7 w-full max-w-[min(22rem,50vw)]"
              aria-label="Recipe name"
            />
          ) : (
            <button
              type="button"
              onClick={() => setEditingWorkflowName(true)}
              className="max-w-[min(22rem,50vw)] truncate text-sm font-semibold text-foreground hover:text-primary"
              title={workflowName}
              aria-label={`Edit recipe name: ${workflowName}`}
            >
              {workflowName}
            </button>
          )}
          <Badge variant="secondary" className="h-6 shrink-0 text-[10px]">
            {STATUS_MESSAGE_CLASS[saveTone]}
          </Badge>
          <span
            className="hidden max-w-[12rem] truncate text-xs text-muted-foreground sm:inline"
            title={savedAtLabel}
          >
            {savedAtLabel}
          </span>
        </div>
      </div>
      <div className="justify-self-center">
        <Tabs value={activeView} onValueChange={handleViewValueChange}>
          <TabsList>
            <TabsTrigger value="editor">Editor</TabsTrigger>
            <TabsTrigger value="executions">Runs</TabsTrigger>
          </TabsList>
        </Tabs>
      </div>
      <div className="flex items-center justify-self-end gap-2">
        {warnings.length > 0 && (
          <Popover>
            <PopoverTrigger asChild={true}>
              <button
                type="button"
                className={`inline-flex h-6 shrink-0 items-center gap-1 rounded-md border px-2 text-[10px] font-medium ${RECIPE_STUDIO_WARNING_BADGE_TONE}`}
              >
                <HugeiconsIcon icon={Alert02Icon} className="size-3" />
                {warnings.length}
              </button>
            </PopoverTrigger>
            <PopoverContent align="end" className="w-80 p-0">
              <div className="border-b px-3 py-2">
                <p className="text-xs font-semibold text-foreground">
                  Graph warnings ({warnings.length})
                </p>
              </div>
              <ul className="max-h-60 overflow-y-auto py-1">
                {warnings.map((w) => (
                  <li
                    key={`${w.nodeId ?? "global"}-${w.message}`}
                    className="flex items-start gap-2 px-3 py-1.5"
                  >
                    <HugeiconsIcon
                      icon={
                        w.severity === "error" ? AlertDiamondIcon : Alert02Icon
                      }
                      className={`mt-0.5 size-3 shrink-0 ${w.severity === "error" ? "text-destructive" : RECIPE_STUDIO_WARNING_ICON_TONE}`}
                    />
                    <span className="text-xs text-muted-foreground">
                      {(w.nodeName || w.nodeId) && (
                        <span className="font-medium text-foreground">
                          {w.nodeName || w.nodeId}:{" "}
                        </span>
                      )}
                      {w.message}
                    </span>
                  </li>
                ))}
              </ul>
            </PopoverContent>
          </Popover>
        )}
        <Button
          type="button"
          size="sm"
          variant="outline"
          onClick={onSaveRecipe}
          disabled={saveLoading}
        >
          <HugeiconsIcon icon={FloppyDiskIcon} className="size-3.5" />
          {saveLoading ? "Saving..." : "Save"}
        </Button>
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/rf-ui/base-handle.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ComponentProps, ReactElement } from "react";
import { Handle, type HandleProps } from "@xyflow/react";

import { cn } from "@/lib/utils";

export type BaseHandleProps = HandleProps;

export function BaseHandle({
  className,
  children,
  ...props
}: ComponentProps<typeof Handle>): ReactElement {
  return (
    <Handle
      {...props}
      className={cn(
        "h-[12px] w-[12px] rounded-full border border-border/80 bg-muted shadow-[0_0_0_1px_hsl(var(--background))] transition-all hover:scale-110 hover:border-primary/70 hover:bg-primary/20",
        className,
      )}
    >
      {children}
    </Handle>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/rf-ui/base-node.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ComponentProps, ReactElement } from "react";

import { cn } from "@/lib/utils";

export function BaseNode({
  className,
  ...props
}: ComponentProps<"div">): ReactElement {
  return (
    <div
      className={cn(
        "bg-card text-card-foreground relative rounded-md border transition-[border-color,box-shadow] duration-150",
        "hover:border-primary/40 hover:ring-1 hover:ring-primary/20 hover:shadow-sm",
        "[.react-flow\\_\\_node.selected_&]:border-primary/45",
        "[.react-flow\\_\\_node.selected_&]:ring-1 [.react-flow\\_\\_node.selected_&]:ring-primary/25",
        "[.react-flow\\_\\_node.selected_&]:shadow-md",
        className,
      )}
      tabIndex={0}
      {...props}
    />
  );
}

export function BaseNodeHeader({
  className,
  ...props
}: ComponentProps<"header">): ReactElement {
  return (
    <header
      {...props}
      className={cn(
        "mx-0 my-0 -mb-1 flex flex-row items-center justify-between gap-2 px-3 py-2",
        className,
      )}
    />
  );
}

export function BaseNodeHeaderTitle({
  className,
  ...props
}: ComponentProps<"h3">): ReactElement {
  return (
    <h3
      data-slot="base-node-title"
      className={cn("user-select-none flex-1 font-semibold", className)}
      {...props}
    />
  );
}

export function BaseNodeContent({
  className,
  ...props
}: ComponentProps<"div">): ReactElement {
  return (
    <div
      data-slot="base-node-content"
      className={cn("flex flex-col gap-y-2 p-3", className)}
      {...props}
    />
  );
}

export function BaseNodeFooter({
  className,
  ...props
}: ComponentProps<"div">): ReactElement {
  return (
    <div
      data-slot="base-node-footer"
      className={cn(
        "flex flex-col items-center gap-y-2 border-t px-3 pt-2 pb-3",
        className,
      )}
      {...props}
    />
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/rf-ui/data-edge.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ReactElement } from "react";
import {
  BaseEdge,
  getBezierPath,
  getSmoothStepPath,
  getStraightPath,
  Position,
  type Edge,
  type EdgeProps,
} from "@xyflow/react";

export type DataEdge = Edge<{
  path?: "auto" | "bezier" | "smoothstep" | "step" | "straight";
  active?: boolean;
}>;

export function DataEdge({
  data = { path: "auto" },
  id,
  markerEnd,
  selected,
  sourcePosition,
  sourceX,
  sourceY,
  style,
  targetPosition,
  targetX,
  targetY,
}: EdgeProps<DataEdge>): ReactElement {
  const resolvedPathType = resolvePathType({
    type: data.path ?? "auto",
  });
  const isActive = Boolean(data.active);
  const [edgePath] = getPath({
    type: resolvedPathType,
    sourceX,
    sourceY,
    sourcePosition,
    targetX,
    targetY,
    targetPosition,
  });

  const edgeStyle = {
    stroke: isActive || selected ? "var(--primary)" : "var(--muted-foreground)",
    strokeWidth: isActive ? 2.6 : selected ? 2.6 : 2.1,
    opacity: isActive ? 1 : selected ? 0.96 : 0.7,
    strokeDasharray: isActive ? "8 6" : undefined,
    ...style,
  };

  return (
    <BaseEdge
      id={id}
      path={edgePath}
      markerEnd={markerEnd}
      style={edgeStyle}
    />
  );
}

function getPath({
  type,
  sourceX,
  sourceY,
  targetX,
  targetY,
  sourcePosition,
  targetPosition,
}: {
  type: "bezier" | "smoothstep" | "step" | "straight";
  sourceX: number;
  sourceY: number;
  targetX: number;
  targetY: number;
  sourcePosition: Position;
  targetPosition: Position;
}): [string, number, number, ...number[]] {
  if (type === "bezier") {
    return getBezierPath({
      sourceX,
      sourceY,
      targetX,
      targetY,
      sourcePosition,
      targetPosition,
    });
  }
  if (type === "smoothstep") {
    return getSmoothStepPath({
      sourceX,
      sourceY,
      targetX,
      targetY,
      sourcePosition,
      targetPosition,
    });
  }
  if (type === "step") {
    return getSmoothStepPath({
      sourceX,
      sourceY,
      targetX,
      targetY,
      sourcePosition,
      targetPosition,
      borderRadius: 0,
    });
  }
  return getStraightPath({
    sourceX,
    sourceY,
    targetX,
    targetY,
  });
}

function resolvePathType({
  type,
}: {
  type: "auto" | "bezier" | "smoothstep" | "step" | "straight";
}): "bezier" | "smoothstep" | "step" | "straight" {
  if (type !== "auto") {
    return type;
  }
  return "smoothstep";
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/rf-ui/labeled-handle.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { type ComponentProps, type ReactElement } from "react";
import { type HandleProps } from "@xyflow/react";

import { cn } from "@/lib/utils";
import { BaseHandle } from "./base-handle";

const flexDirections = {
  top: "flex-col",
  right: "flex-row-reverse justify-end",
  bottom: "flex-col-reverse justify-end",
  left: "flex-row",
};

export function LabeledHandle({
  className,
  labelClassName,
  handleClassName,
  title,
  position,
  ...props
}: HandleProps &
  ComponentProps<"div"> & {
    title: string;
    handleClassName?: string;
    labelClassName?: string;
  }): ReactElement {
  const { ref, ...handleProps } = props;

  return (
    <div
      title={title}
      className={cn(
        "relative flex items-center",
        flexDirections[position],
        className,
      )}
      ref={ref}
    >
      <BaseHandle
        position={position}
        className={handleClassName}
        {...handleProps}
      />
      <label className={cn("text-foreground px-3", labelClassName)}>
        {title}
      </label>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/runtime/execution-progress-island.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  ArrowDown01Icon,
  ArrowUp01Icon,
  CheckmarkCircle02Icon,
  Flag02Icon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import type { ReactElement } from "react";
import { Button } from "@/components/ui/button";
import { Progress } from "@/components/ui/progress";
import { Spinner } from "@/components/ui/spinner";
import { cn } from "@/lib/utils";
import type { RecipeExecutionRecord } from "../../execution-types";
import { isExecutionInProgress } from "../../executions/execution-helpers";
import {
  formatMetricValue,
  formatPercent,
} from "../executions/executions-view-helpers";

type ExecutionProgressIslandProps = {
  execution: RecipeExecutionRecord;
  currentColumnIcon: typeof Flag02Icon;
  minimized: boolean;
  onMinimizedChange: (value: boolean) => void;
  onViewExecutions: () => void;
};

function formatEta(value: number | null | undefined): string {
  const metric = formatMetricValue(value);
  if (metric === "--") {
    return "--";
  }
  return `${metric}s`;
}

function statusLabel(input: {
  complete: boolean;
  inProgress: boolean;
}): string {
  if (input.complete) {
    return "Run completed";
  }
  if (input.inProgress) {
    return "Run in progress";
  }
  return "Run status";
}

export function ExecutionProgressIsland({
  execution,
  currentColumnIcon,
  minimized,
  onMinimizedChange,
  onViewExecutions,
}: ExecutionProgressIslandProps): ReactElement {
  const complete = execution.status === "completed";
  const inProgress = isExecutionInProgress(execution.status);
  const progressPercent = execution.progress?.percent ?? (complete ? 100 : 0);
  const hasProgressSignal = Boolean(
    execution.progress &&
      (typeof execution.progress.done === "number" ||
        typeof execution.progress.total === "number" ||
        typeof execution.progress.percent === "number" ||
        typeof execution.progress.rate === "number" ||
        typeof execution.progress.eta_sec === "number"),
  );
  const showLoadingSpinner = inProgress && !hasProgressSignal;
  const batchTotal = execution.batch?.total ?? null;
  const showBatch = typeof batchTotal === "number" && batchTotal > 1;

  return (
    <div
      className={cn(
        "w-[clamp(15rem,26vw,20rem)] max-w-[calc(100vw-1rem)] rounded-b-xl border-x border-b bg-card/96 shadow-sm backdrop-blur-sm transition-all",
        minimized ? "min-h-[3rem]" : "min-h-[8.5rem]",
      )}
      aria-live="polite"
    >
      <div className="flex items-center justify-between gap-2 px-3 py-2">
        <div className="flex min-w-0 items-center gap-2">
          <HugeiconsIcon
            icon={complete ? CheckmarkCircle02Icon : Flag02Icon}
            className={cn(
              "size-3.5",
              complete
                ? "text-emerald-700 dark:text-emerald-300"
                : "text-amber-700 dark:text-amber-300",
            )}
          />
          <p className="truncate text-xs font-medium text-foreground">
            {statusLabel({ complete, inProgress })}
          </p>
        </div>
        <div className="flex items-center gap-2">
          {showLoadingSpinner && (
            <Spinner className="size-3.5 text-muted-foreground" />
          )}
          <span className="shrink-0 text-[11px] text-muted-foreground">
            {formatPercent(progressPercent)}
          </span>
          <button
            type="button"
            onClick={() => onMinimizedChange(!minimized)}
            className="inline-flex size-8 shrink-0 items-center justify-center rounded border border-border/70 text-muted-foreground transition hover:bg-muted/50"
            aria-label={minimized ? "Expand progress" : "Minimize progress"}
            title={minimized ? "Expand" : "Minimize"}
          >
            <HugeiconsIcon
              icon={minimized ? ArrowDown01Icon : ArrowUp01Icon}
              className="size-3.5"
            />
          </button>
        </div>
      </div>

      <div className="px-3">
        <Progress value={progressPercent} className="h-1" />
      </div>

      {!minimized && (
        <>
          <div className="grid grid-cols-2 gap-2 px-3 pt-2 text-[11px] text-muted-foreground sm:grid-cols-4">
            <p className="truncate" title={`Done: ${formatMetricValue(execution.progress?.done)}`}>
              Done: {formatMetricValue(execution.progress?.done)}
            </p>
            <p className="truncate" title={`Total: ${formatMetricValue(execution.progress?.total)}`}>
              Total: {formatMetricValue(execution.progress?.total)}
            </p>
            <p className="truncate" title={`Rate: ${formatMetricValue(execution.progress?.rate)}`}>
              Rate: {formatMetricValue(execution.progress?.rate)}
            </p>
            <p className="truncate" title={`ETA: ${formatEta(execution.progress?.eta_sec)}`}>
              ETA: {formatEta(execution.progress?.eta_sec)}
            </p>
          </div>
          <div className="mt-1 flex items-center gap-1.5 px-3 text-[11px] text-muted-foreground">
            <HugeiconsIcon
              icon={currentColumnIcon}
              className="size-3.5 shrink-0"
            />
            <p
              className="truncate"
              title={execution.current_column ?? "--"}
            >
              Column: {execution.current_column ?? "--"}
            </p>
          </div>
          {showBatch && (
            <div
              className="mt-1 truncate px-3 text-[11px] text-muted-foreground"
              title={`Batch: ${execution.batch?.idx ?? "--"}/${execution.batch?.total ?? "--"}`}
            >
              Batch: {execution.batch?.idx ?? "--"}/{execution.batch?.total ?? "--"}
            </div>
          )}
          <div className="px-3 pb-2 pt-2">
            <Button
              type="button"
              variant="outline"
              size="sm"
              className="h-7 w-full text-[11px]"
              onClick={onViewExecutions}
            >
              View run details
            </Button>
          </div>
        </>
      )}
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/shared/available-references-inline.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Badge } from "@/components/ui/badge";
import { type ReactElement, useLayoutEffect, useRef, useState } from "react";
import type { AvailableVariableEntry } from "../../utils/variables";

type AvailableReferencesInlineProps = {
  entries: AvailableVariableEntry[];
};

const MAX_ROWS = 2;

export function AvailableReferencesInline({
  entries,
}: AvailableReferencesInlineProps): ReactElement | null {
  const [expanded, setExpanded] = useState(false);
  const [collapsedCount, setCollapsedCount] = useState(entries.length);
  const wrapperRef = useRef<HTMLDivElement | null>(null);
  const measureRefs = useRef<Array<HTMLSpanElement | null>>([]);

  useLayoutEffect(() => {
    if (expanded) {
      return;
    }
    const wrapper = wrapperRef.current;
    const items = measureRefs.current.filter(
      (node): node is HTMLSpanElement => Boolean(node),
    );
    if (!(wrapper && items.length > 0)) {
      setCollapsedCount(entries.length);
      return;
    }

    const compute = () => {
      const rowTops: number[] = [];
      let cutoff = items.length;
      for (let i = 0; i < items.length; i += 1) {
        const top = items[i].offsetTop;
        if (!rowTops.some((value) => Math.abs(value - top) <= 1)) {
          rowTops.push(top);
        }
        if (rowTops.length > MAX_ROWS) {
          cutoff = i;
          break;
        }
      }
      if (cutoff < items.length) {
        cutoff = Math.max(0, cutoff - 1);
      }
      setCollapsedCount(cutoff);
    };

    compute();
    const observer = new ResizeObserver(compute);
    observer.observe(wrapper);
    return () => observer.disconnect();
  }, [entries.length, expanded]);

  if (entries.length === 0) {
    return null;
  }

  const shown = expanded ? entries : entries.slice(0, collapsedCount);
  const hiddenCount = Math.max(0, entries.length - shown.length);

  return (
    <div className="space-y-1">
      <p className="text-[10px] font-medium text-muted-foreground">
        Available references
      </p>
      <div ref={wrapperRef} className="relative">
        {!expanded && (
          <div className="invisible pointer-events-none absolute inset-0 -z-10">
            <div className="flex flex-wrap gap-1">
              {entries.map((entry, index) => (
                <Badge
                  // biome-ignore lint/suspicious/noArrayIndexKey: static measurement mirror
                  key={`${entry.source}:${entry.name}:${index}`}
                  ref={(node) => {
                    measureRefs.current[index] = node;
                  }}
                  variant="secondary"
                  className={
                    entry.source === "seed"
                      ? "corner-squircle h-4 border-blue-500/25 bg-blue-500/10 px-1.5 font-mono text-[10px] text-blue-700 dark:text-blue-300"
                      : "corner-squircle h-4 px-1.5 font-mono text-[10px]"
                  }
                >
                  {entry.name}
                </Badge>
              ))}
            </div>
          </div>
        )}
        <div className="flex flex-wrap gap-1">
          {shown.map((entry) => (
            <Badge
              key={`${entry.source}:${entry.name}`}
              variant="secondary"
              className={
                entry.source === "seed"
                  ? "corner-squircle h-4 border-blue-500/25 bg-blue-500/10 px-1.5 font-mono text-[10px] text-blue-700 dark:text-blue-300"
                  : "corner-squircle h-4 px-1.5 font-mono text-[10px]"
              }
            >
              {entry.name}
            </Badge>
          ))}
          {!expanded && hiddenCount > 0 && (
            <button
              type="button"
              className="corner-squircle h-4 px-1.5 text-[10px] text-muted-foreground hover:text-foreground"
              onClick={() => setExpanded(true)}
            >
              +{hiddenCount} more
            </button>
          )}
          {expanded && collapsedCount < entries.length && (
            <button
              type="button"
              className="corner-squircle h-4 px-1.5 text-[10px] text-muted-foreground hover:text-foreground"
              onClick={() => setExpanded(false)}
            >
              Show less
            </button>
          )}
        </div>
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/components/shared/hf-dataset-combobox.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  Combobox,
  ComboboxContent,
  ComboboxEmpty,
  ComboboxInput,
  ComboboxItem,
  ComboboxList,
} from "@/components/ui/combobox";
import { Spinner } from "@/components/ui/spinner";
import { useDebouncedValue, useHfDatasetSearch } from "@/hooks";
import { type ReactElement, useEffect, useMemo, useRef, useState } from "react";

type HfDatasetComboboxProps = {
  value: string;
  onValueChange: (value: string) => void;
  accessToken?: string;
  inputId?: string;
  placeholder?: string;
  className?: string;
};

export function HfDatasetCombobox({
  value,
  onValueChange,
  accessToken,
  inputId,
  placeholder = "Search datasets...",
  className,
}: HfDatasetComboboxProps): ReactElement {
  const [inputValue, setInputValue] = useState(value);
  const selectingRef = useRef(false);
  const anchorRef = useRef<HTMLDivElement>(null);
  const debouncedQuery = useDebouncedValue(inputValue);

  useEffect(() => {
    setInputValue(value);
  }, [value]);

  const { results, isLoading, error } = useHfDatasetSearch(debouncedQuery, {
    accessToken,
  });

  const items = useMemo(() => {
    const ids = results.map((item) => item.id);
    const selected = value.trim();
    if (selected && !ids.includes(selected)) {
      ids.push(selected);
    }
    return ids;
  }, [results, value]);

  return (
    <div
      ref={anchorRef}
      className={className}
      onKeyDown={(event) => {
        if (event.key !== "Enter") return;
        if (!(event.target instanceof HTMLInputElement)) return;
        event.preventDefault();
        if (items.length > 0) {
          onValueChange(items[0]);
          return;
        }
        const typed = event.target.value.trim();
        if (typed) {
          onValueChange(typed);
        }
      }}
    >
      <Combobox
        items={items}
        filteredItems={items}
        filter={null}
        value={value.trim() ? value : null}
        onValueChange={(next) => onValueChange(next ?? "")}
        onInputValueChange={(next) => {
          if (selectingRef.current) {
            selectingRef.current = false;
            return;
          }
          setInputValue(next);
        }}
        itemToStringValue={(item) => item}
        autoHighlight={true}
      >
        <ComboboxInput
          id={inputId}
          className="nodrag w-full"
          placeholder={placeholder}
        />
        <ComboboxContent anchor={anchorRef}>
          {isLoading ? (
            <div className="flex items-center gap-2 px-2 py-3 text-xs text-muted-foreground">
              <Spinner className="size-3.5" />
              Searching...
            </div>
          ) : (
            <ComboboxEmpty>No datasets found</ComboboxEmpty>
          )}
          <ComboboxList>
            {(id: string) => (
              <ComboboxItem
                key={id}
                value={id}
                onPointerDown={() => {
                  selectingRef.current = true;
                }}
              >
                {id}
              </ComboboxItem>
            )}
          </ComboboxList>
        </ComboboxContent>
      </Combobox>
      {error && (
        <p className="mt-1 text-xs text-destructive">
          {error}
        </p>
      )}
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/constants.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export const DEFAULT_NODE_WIDTH = 400;
export const DEFAULT_NODE_HEIGHT = 120;
export const MIN_NODE_WIDTH = 260;
export const MAX_NODE_WIDTH = 900;
export const MAX_NOTE_NODE_WIDTH = 600;


================================================
FILE: studio/frontend/src/features/recipe-studio/data/executions-db.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import Dexie, { type EntityTable } from "dexie";
import type { RecipeExecutionRecord } from "../execution-types";

const db = new Dexie("unsloth-data-recipe-executions") as Dexie & {
  executions: EntityTable<RecipeExecutionRecord, "id">;
};

db.version(1).stores({
  executions: "id, recipeId, kind, status, createdAt",
});

db.version(2).stores({
  executions: "id, recipeId, kind, status, createdAt, finishedAt, jobId",
});

export async function listRecipeExecutions(
  recipeId: string,
): Promise<RecipeExecutionRecord[]> {
  const executions = await db.executions.where("recipeId").equals(recipeId).toArray();
  return executions.sort((a, b) => b.createdAt - a.createdAt);
}

export async function saveRecipeExecution(
  execution: RecipeExecutionRecord,
): Promise<void> {
  await db.executions.put(execution);
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/config-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import { Dialog, DialogContent, DialogFooter } from "@/components/ui/dialog";
import { Switch } from "@/components/ui/switch";
import type { ReactElement } from "react";
import { getBlockDefinitionForConfig } from "../blocks/definitions";
import { renderBlockDialog } from "../blocks/registry";
import type { NodeConfig, SamplerConfig } from "../types";
import { DialogShell } from "./shared/dialog-shell";
import { ValidationBanner } from "./shared/validation-banner";

type ConfigDialogProps = {
  open: boolean;
  onOpenChange: (open: boolean) => void;
  config: NodeConfig | null;
  categoryOptions: SamplerConfig[];
  modelConfigAliases: string[];
  modelProviderOptions: string[];
  toolProfileAliases: string[];
  datetimeOptions: string[];
  onUpdate: (id: string, patch: Partial<NodeConfig>) => void;
  container?: HTMLDivElement | null;
  readOnly?: boolean;
};

export function ConfigDialog({
  open,
  onOpenChange,
  config,
  categoryOptions,
  modelConfigAliases,
  modelProviderOptions,
  toolProfileAliases,
  datetimeOptions,
  onUpdate,
  container,
  readOnly = false,
}: ConfigDialogProps): ReactElement {
  const blockDefinition = getBlockDefinitionForConfig(config);
  const showDropToggle =
    config?.kind === "sampler" ||
    config?.kind === "llm" ||
    config?.kind === "validator" ||
    config?.kind === "expression" ||
    (config?.kind === "seed" &&
      (config.seed_source_type ?? "hf") === "unstructured");

  return (
    <Dialog open={open} onOpenChange={onOpenChange}>
      <DialogContent
        container={container}
        position="absolute"
        overlayPosition="absolute"
        overlayClassName="bg-transparent"
        className="corner-squircle max-h-[650px] overflow-y-auto overflow-x-hidden sm:max-w-2xl shadow-border"
      >
        <DialogShell
          title={blockDefinition ? blockDefinition.title : undefined}
          description={
            blockDefinition
              ? blockDefinition.description
              : "Choose a step to edit."
          }
        />
        {!config && (
          <div className="text-sm text-muted-foreground">
            Select a step to edit.
          </div>
        )}
        {config && (
          <div className="min-w-0 space-y-4">
            {readOnly && (
              <div className="rounded-lg border border-amber-500/30 bg-amber-500/10 px-3 py-2 text-xs text-amber-700 dark:text-amber-300">
                This recipe is locked while a run is in progress.
              </div>
            )}
            <ValidationBanner config={config} />
            <div
              className={readOnly ? "pointer-events-none min-w-0 opacity-75" : "min-w-0"}
            >
              {showDropToggle && (
                <div className="mb-2 flex items-center corner-squircle justify-between gap-3 rounded-2xl border border-border/60 px-3 pt-2 pb-4">
                  <div className="min-w-0">
                    <p className="text-sm font-semibold">Keep out of final dataset</p>
                    <p className="break-words text-xs text-muted-foreground">
                      Use this step while generating, but leave it out of exported rows.
                    </p>
                  </div>
                  <Switch
                    checked={config.drop ?? false}
                    disabled={readOnly}
                    onCheckedChange={(value) => onUpdate(config.id, { drop: value })}
                  />
                </div>
              )}
              {renderBlockDialog(
                config,
                open,
                categoryOptions,
                modelConfigAliases,
                modelProviderOptions,
                toolProfileAliases,
                datetimeOptions,
                onUpdate,
              )}
            </div>
          </div>
        )}
        <DialogFooter>
          <Button
            type="button"
            variant="outline"
            onClick={() => onOpenChange(false)}
          >
            Done
          </Button>
        </DialogFooter>
      </DialogContent>
    </Dialog>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/expression/expression-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import { Textarea } from "@/components/ui/textarea";
import type { ReactElement } from "react";
import { useMemo } from "react";
import { useRecipeStudioStore } from "../../stores/recipe-studio";
import type { ExpressionConfig, ExpressionDtype } from "../../types";
import { findInvalidJinjaReferences } from "../../utils/refs";
import { getAvailableVariables } from "../../utils/variables";
import { AvailableVariables } from "../shared/available-variables";
import { FieldLabel } from "../shared/field-label";
import { NameField } from "../shared/name-field";

const DTYPE_OPTIONS: ExpressionDtype[] = ["str", "int", "float", "bool"];

type ExpressionDialogProps = {
  config: ExpressionConfig;
  onUpdate: (patch: Partial<ExpressionConfig>) => void;
};

export function ExpressionDialog({
  config,
  onUpdate,
}: ExpressionDialogProps): ReactElement {
  const configs = useRecipeStudioStore((state) => state.configs);
  const dtypeId = `${config.id}-dtype`;
  const exprId = `${config.id}-expr`;
  const validReferences = useMemo(
    () => getAvailableVariables(configs, config.id),
    [configs, config.id],
  );
  const invalidExprRefs = useMemo(
    () => findInvalidJinjaReferences(config.expr, validReferences),
    [config.expr, validReferences],
  );
  const invalidExprText = invalidExprRefs
    .slice(0, 3)
    .map((ref) => `{{ ${ref} }}`)
    .join(", ");
  const updateField = <K extends keyof ExpressionConfig>(
    key: K,
    value: ExpressionConfig[K],
  ) => {
    onUpdate({ [key]: value } as Partial<ExpressionConfig>);
  };
  return (
    <div className="space-y-4">
      <AvailableVariables configId={config.id} />
      <NameField
        value={config.name}
        onChange={(value) => onUpdate({ name: value })}
      />
      <div className="grid gap-2">
        <FieldLabel
          label="Output type"
          htmlFor={dtypeId}
          hint="Choose how this formula should be stored in the final dataset."
        />
        <Select
          value={config.dtype}
          onValueChange={(value) =>
            updateField("dtype", value as ExpressionDtype)
          }
        >
          <SelectTrigger className="nodrag w-full" id={dtypeId}>
            <SelectValue placeholder="Select type" />
          </SelectTrigger>
          <SelectContent>
            {DTYPE_OPTIONS.map((dtype) => (
              <SelectItem key={dtype} value={dtype}>
                {dtype}
              </SelectItem>
            ))}
          </SelectContent>
        </Select>
      </div>
      <div className="grid gap-2">
        <FieldLabel
          label="Formula"
          htmlFor={exprId}
          hint="Build this field from other fields."
        />
        <Textarea
          id={exprId}
          className="corner-squircle nodrag"
          aria-invalid={invalidExprRefs.length > 0}
          placeholder="{{ category_1 }} - {{ subcategory_1 }}"
          value={config.expr}
          onChange={(event) => updateField("expr", event.target.value)}
        />
        {invalidExprRefs.length > 0 && (
          <p className="text-xs text-destructive">
            Unknown field: {invalidExprText}
            {invalidExprRefs.length > 3
              ? ` +${invalidExprRefs.length - 3} more`
              : ""}
          </p>
        )}
        <p className="text-xs text-muted-foreground">
          Insert other fields like {"{{ field_name }}"}.
        </p>
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/import-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import {
  Dialog,
  DialogContent,
  DialogFooter,
  DialogHeader,
  DialogTitle,
} from "@/components/ui/dialog";
import { Textarea } from "@/components/ui/textarea";
import { type ReactElement, useState } from "react";
import { FieldLabel } from "./shared/field-label";

type ImportDialogProps = {
  open: boolean;
  onOpenChange: (open: boolean) => void;
  onImport: (value: string) => string | null;
  container?: HTMLDivElement | null;
};

export function ImportDialog({
  open,
  onOpenChange,
  onImport,
  container,
}: ImportDialogProps): ReactElement {
  const [value, setValue] = useState("");
  const [error, setError] = useState<string | null>(null);
  const payloadId = "recipe-import-payload";
  const handleOpenChange = (nextOpen: boolean) => {
    if (!nextOpen) {
      setValue("");
      setError(null);
    }
    onOpenChange(nextOpen);
  };

  const handleImport = () => {
    const message = onImport(value);
    if (message) {
      setError(message);
      return;
    }
    handleOpenChange(false);
  };

  return (
    <Dialog open={open} onOpenChange={handleOpenChange}>
      <DialogContent
        container={container}
        position="absolute"
        overlayPosition="absolute"
        overlayClassName="bg-transparent"
        className="corner-squircle max-h-[650px] overflow-auto sm:max-w-2xl shadow-border"
      >
        <DialogHeader>
          <DialogTitle>Import recipe</DialogTitle>
        </DialogHeader>
        <div className="grid gap-2">
          <FieldLabel
            label="Recipe JSON"
            htmlFor={payloadId}
            hint="Paste JSON exported from Recipe Studio."
          />
          <Textarea
            id={payloadId}
            className="corner-squircle nodrag min-h-[220px] max-h-[450px]"
            placeholder='{"recipe": { "columns": [] }}'
            value={value}
            onChange={(event) => setValue(event.target.value)}
          />
          {error && (
            <p className="text-xs text-rose-600" role="alert">
              {error}
            </p>
          )}
        </div>
        <DialogFooter>
          <Button type="button" variant="outline" onClick={handleImport}>
            Import recipe
          </Button>
        </DialogFooter>
      </DialogContent>
    </Dialog>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/llm/general-tab.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/components/ui/collapsible";
import {
  Combobox,
  ComboboxContent,
  ComboboxEmpty,
  ComboboxInput,
  ComboboxItem,
  ComboboxList,
} from "@/components/ui/combobox";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import { Switch } from "@/components/ui/switch";
import { Textarea } from "@/components/ui/textarea";
import { ArrowRight01Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { type ReactElement, type RefObject, useMemo, useRef } from "react";
import { useRecipeStudioStore } from "../../stores/recipe-studio";
import type { LlmConfig } from "../../types";
import { isLikelyImageValue } from "../../utils/image-preview";
import { findInvalidJinjaReferences } from "../../utils/refs";
import { getAvailableVariables } from "../../utils/variables";
import { CollapsibleSectionTriggerButton } from "../shared/collapsible-section-trigger";
import { AvailableVariables } from "../shared/available-variables";
import { FieldLabel } from "../shared/field-label";
import { NameField } from "../shared/name-field";

const CODE_LANG_OPTIONS = [
  "python",
  "javascript",
  "typescript",
  "java",
  "kotlin",
  "go",
  "rust",
  "ruby",
  "scala",
  "swift",
  "sql:sqlite",
  "sql:postgres",
  "sql:mysql",
  "sql:tsql",
  "sql:bigquery",
  "sql:ansi",
];

const TRACE_MODE_OPTIONS = ["none", "last_message", "all_messages"] as const;

function normalizeTraceMode(value: string): LlmConfig["with_trace"] {
  if (value === "last_message" || value === "all_messages") {
    return value;
  }
  return "none";
}

type LlmGeneralTabProps = {
  config: LlmConfig;
  modelConfigAliases: string[];
  modelProviderOptions: string[];
  toolProfileAliases: string[];
  modelAliasAnchorRef: RefObject<HTMLDivElement | null>;
  onUpdate: (patch: Partial<LlmConfig>) => void;
};

export function LlmGeneralTab({
  config,
  modelConfigAliases,
  modelProviderOptions,
  toolProfileAliases,
  modelAliasAnchorRef,
  onUpdate,
}: LlmGeneralTabProps): ReactElement {
  const configs = useRecipeStudioStore((state) => state.configs);
  const modelAliasId = `${config.id}-model-alias`;
  const toolAliasId = `${config.id}-tool-alias`;
  const codeLangId = `${config.id}-code-lang`;
  const promptId = `${config.id}-prompt`;
  const outputFormatId = `${config.id}-output-format`;
  const systemPromptId = `${config.id}-system-prompt`;
  const hasModelConfigs = modelConfigAliases.length > 0;
  const hasModelProviders = modelProviderOptions.length > 0;
  const hasToolProfiles = toolProfileAliases.length > 0;
  const validReferences = useMemo(
    () => getAvailableVariables(configs, config.id),
    [configs, config.id],
  );
  const invalidPromptRefs = useMemo(
    () => findInvalidJinjaReferences(config.prompt, validReferences),
    [config.prompt, validReferences],
  );
  const invalidSystemRefs = useMemo(
    () => findInvalidJinjaReferences(config.system_prompt, validReferences),
    [config.system_prompt, validReferences],
  );
  const invalidPromptText = invalidPromptRefs
    .slice(0, 3)
    .map((ref) => `{{ ${ref} }}`)
    .join(", ");
  const invalidSystemText = invalidSystemRefs
    .slice(0, 3)
    .map((ref) => `{{ ${ref} }}`)
    .join(", ");
  const seedConfig = useMemo(
    () => Object.values(configs).find((item) => item.kind === "seed"),
    [configs],
  );
  const hasHfSeed = Boolean(
    seedConfig && (seedConfig.seed_source_type ?? "hf") === "hf",
  );
  const seedColumns = useMemo(
    () => seedConfig?.seed_columns ?? [],
    [seedConfig],
  );
  const seedPreviewRows = useMemo(
    () => seedConfig?.seed_preview_rows ?? [],
    [seedConfig],
  );
  const imageColumnOptions = useMemo(() => {
    if (seedColumns.length === 0) {
      return [];
    }
    const detected = seedColumns.filter((columnName) => {
      const lower = columnName.toLowerCase();
      if (
        lower.includes("image") ||
        lower.includes("img") ||
        lower.includes("photo") ||
        lower.includes("picture") ||
        lower.includes("base64") ||
        lower.includes("url")
      ) {
        return true;
      }
      return seedPreviewRows.some((row) => isLikelyImageValue(row[columnName]));
    });
    return detected.length > 0 ? detected : seedColumns;
  }, [seedColumns, seedPreviewRows]);
  const imageContext = config.image_context ?? {
    enabled: false,
    // biome-ignore lint/style/useNamingConvention: api schema
    column_name: "",
  };
  const imageContextToggleId = `${config.id}-image-context-enabled`;
  const imageContextColumnId = `${config.id}-image-context-column`;
  const imageContextColumnOptions = useMemo(() => {
    const preferred =
      imageColumnOptions.length > 0 ? imageColumnOptions : seedColumns;
    const deduped = Array.from(
      new Set(preferred.map((value) => value.trim()).filter(Boolean)),
    );
    const selected = imageContext.column_name.trim();
    if (selected && !deduped.includes(selected)) {
      deduped.unshift(selected);
    }
    return deduped;
  }, [imageColumnOptions, imageContext.column_name, seedColumns]);
  const traceModeId = `${config.id}-trace-mode`;
  const reasoningToggleId = `${config.id}-reasoning-content`;
  const advancedOpen = config.advancedOpen === true;
  const toolAliasAnchorRef = useRef<HTMLDivElement>(null);
  const needsSetupHelp = !hasModelConfigs || !hasModelProviders;
  const needsModelChoice = !config.model_alias?.trim();

  return (
    <div className="space-y-4">
      <NameField
        value={config.name}
        onChange={(value) => onUpdate({ name: value })}
      />
      {needsSetupHelp ? (
        <div className="rounded-2xl border border-border/60 bg-muted/10 px-4 py-3 text-xs text-muted-foreground">
          <p className="text-sm font-semibold text-foreground">
            Set up the model once, then come back here
          </p>
          <div className="mt-2 space-y-1.5">
            {!hasModelProviders && (
              <p className="flex items-start gap-2">
                <HugeiconsIcon
                  icon={ArrowRight01Icon}
                  className="mt-0.5 size-3.5 shrink-0 text-primary"
                />
                <span>Add a Provider connection step in AI generation → Setup.</span>
              </p>
            )}
            {!hasModelConfigs && (
              <p className="flex items-start gap-2">
                <HugeiconsIcon
                  icon={ArrowRight01Icon}
                  className="mt-0.5 size-3.5 shrink-0 text-primary"
                />
                <span>Add a Model preset step, connect it, then choose it below.</span>
              </p>
            )}
          </div>
        </div>
      ) : needsModelChoice ? (
        <div className="rounded-2xl border border-border/60 bg-muted/10 px-4 py-3 text-xs text-muted-foreground">
          <p className="text-sm font-semibold text-foreground">
            Start by choosing a model preset
          </p>
          <p className="mt-1">
            Once that is in place, write the prompt and add optional tool access
            if this step needs tools.
          </p>
        </div>
      ) : null}
      <div className="grid gap-2">
        <FieldLabel
          label="Model preset"
          htmlFor={modelAliasId}
          hint="Choose the reusable model setup for this step."
        />
        <div ref={modelAliasAnchorRef}>
          <Combobox
            items={modelConfigAliases}
            filteredItems={modelConfigAliases}
            filter={null}
            value={config.model_alias || null}
            onValueChange={(value) => onUpdate({ model_alias: value ?? "" })}
            itemToStringValue={(value) => value}
            autoHighlight={true}
          >
            <ComboboxInput
              id={modelAliasId}
              className="nodrag w-full"
              placeholder="Choose a model preset"
              onBlur={(event) => {
                const inputValue = event.target.value;
                if (inputValue !== config.model_alias) {
                  onUpdate({ model_alias: inputValue });
                }
              }}
            />
            <ComboboxContent anchor={modelAliasAnchorRef}>
              <ComboboxEmpty>No model configs found</ComboboxEmpty>
              <ComboboxList>
                {(alias: string) => (
                  <ComboboxItem key={alias} value={alias}>
                    {alias}
                  </ComboboxItem>
                )}
              </ComboboxList>
            </ComboboxContent>
          </Combobox>
        </div>
      </div>
      {!hasToolProfiles && (
        <p className="text-xs text-muted-foreground">
          Need tools for this step? Add a Tool access step in AI generation →
          Setup.
        </p>
      )}
      {(hasToolProfiles || Boolean(config.tool_alias?.trim())) && (
        <div className="grid gap-2">
          <FieldLabel
            label="Tool access (optional)"
            htmlFor={toolAliasId}
            hint="Choose saved tool access for this step. Leave empty if this step should not use tools."
          />
          <div ref={toolAliasAnchorRef}>
            <Combobox
              items={toolProfileAliases}
              filteredItems={toolProfileAliases}
              filter={null}
              value={config.tool_alias || null}
              onValueChange={(value) => onUpdate({ tool_alias: value ?? "" })}
              itemToStringValue={(value) => value}
              autoHighlight={true}
            >
              <ComboboxInput
                id={toolAliasId}
                className="nodrag w-full"
                placeholder="Choose tool access"
                onBlur={(event) => {
                  const inputValue = event.target.value;
                  if (inputValue !== (config.tool_alias ?? "")) {
                    onUpdate({ tool_alias: inputValue });
                  }
                }}
              />
              <ComboboxContent anchor={toolAliasAnchorRef}>
                <ComboboxEmpty>No tool access found</ComboboxEmpty>
                <ComboboxList>
                  {(alias: string) => (
                    <ComboboxItem key={alias} value={alias}>
                      {alias}
                    </ComboboxItem>
                  )}
                </ComboboxList>
              </ComboboxContent>
            </Combobox>
          </div>
        </div>
      )}
      {config.llm_type === "code" && (
        <div className="grid gap-2">
          <FieldLabel
            label="Code language"
            htmlFor={codeLangId}
            hint="Choose the language this AI step should generate."
          />
          <Select
            value={config.code_lang ?? "python"}
            onValueChange={(value) => onUpdate({ code_lang: value })}
          >
            <SelectTrigger className="nodrag w-full" id={codeLangId}>
              <SelectValue placeholder="Select language" />
            </SelectTrigger>
            <SelectContent>
              {CODE_LANG_OPTIONS.map((lang) => (
                <SelectItem key={lang} value={lang}>
                  {lang}
                </SelectItem>
              ))}
            </SelectContent>
          </Select>
        </div>
      )}
      <div className="grid gap-2">
        <FieldLabel
          label="Prompt"
          htmlFor={promptId}
          hint="Write the prompt for this step. Insert other fields with {{ field_name }}."
        />
        <Textarea
          id={promptId}
          className="corner-squircle nodrag max-h-[450px] overflow-auto"
          aria-invalid={invalidPromptRefs.length > 0}
          value={config.prompt}
          onChange={(event) => onUpdate({ prompt: event.target.value })}
        />
        {invalidPromptRefs.length > 0 && (
          <p className="text-xs text-destructive">
            Unknown field: {invalidPromptText}
            {invalidPromptRefs.length > 3
              ? ` +${invalidPromptRefs.length - 3} more`
              : ""}
          </p>
        )}
      </div>
      <AvailableVariables configId={config.id} />
      {hasHfSeed && (
        <div className="space-y-2">
          <div className="flex items-center justify-between gap-3">
            <FieldLabel
              label="Use image context"
              htmlFor={imageContextToggleId}
              hint="Attach one image field from your source data to this AI step."
            />
            <Switch
              id={imageContextToggleId}
              checked={imageContext.enabled}
              onCheckedChange={(checked) => {
                onUpdate({
                  image_context: {
                    ...imageContext,
                    enabled: checked,
                    // biome-ignore lint/style/useNamingConvention: api schema
                    column_name:
                      checked && !imageContext.column_name
                        ? (imageContextColumnOptions[0] ?? "")
                        : imageContext.column_name,
                  },
                });
              }}
            />
          </div>
          {imageContext.enabled && (
            <div className="grid gap-2">
              <FieldLabel
                label="Image field"
                htmlFor={imageContextColumnId}
                hint="Choose the source-data field that contains the image."
              />
              <Select
                value={imageContext.column_name || undefined}
                onValueChange={(value) =>
                  onUpdate({
                    image_context: {
                      ...imageContext,
                      // biome-ignore lint/style/useNamingConvention: api schema
                      column_name: value,
                    },
                  })
                }
              >
                <SelectTrigger
                  className="nodrag w-full"
                  id={imageContextColumnId}
                >
                  <SelectValue placeholder="Select image column" />
                </SelectTrigger>
                <SelectContent>
                  {imageContextColumnOptions.map((columnName) => (
                    <SelectItem key={columnName} value={columnName}>
                      {columnName}
                    </SelectItem>
                  ))}
                </SelectContent>
              </Select>
            </div>
          )}
        </div>
      )}
      {config.llm_type === "structured" && (
        <div className="grid gap-2">
          <FieldLabel
            label="Response format"
            htmlFor={outputFormatId}
            hint="Describe the JSON shape you want back."
          />
          <Textarea
            id={outputFormatId}
            className="corner-squircle nodrag"
            value={config.output_format ?? ""}
            onChange={(event) =>
              onUpdate({ output_format: event.target.value })
            }
          />
        </div>
      )}
      <Collapsible
        open={advancedOpen}
        onOpenChange={(open) => onUpdate({ advancedOpen: open })}
      >
        <CollapsibleTrigger asChild={true}>
          <CollapsibleSectionTriggerButton
            label="Trace and extra controls"
            open={advancedOpen}
          />
        </CollapsibleTrigger>
        <CollapsibleContent className="mt-3 space-y-4">
          <div className="grid gap-2">
            <FieldLabel
              label="Instructions (optional)"
              htmlFor={systemPromptId}
              hint="Add extra guidance that should apply before the prompt."
            />
            <Textarea
              id={systemPromptId}
              className="corner-squircle nodrag max-h-[450px] overflow-auto"
              aria-invalid={invalidSystemRefs.length > 0}
              value={config.system_prompt}
              onChange={(event) =>
                onUpdate({ system_prompt: event.target.value })
              }
            />
            {invalidSystemRefs.length > 0 && (
              <p className="text-xs text-destructive">
                Unknown field: {invalidSystemText}
                {invalidSystemRefs.length > 3
                  ? ` +${invalidSystemRefs.length - 3} more`
                  : ""}
              </p>
            )}
          </div>
          <div className="grid gap-2">
            <FieldLabel
              label="Save trace details"
              htmlFor={traceModeId}
              hint="Adds a trace field you can inspect later."
            />
            <Select
              value={config.with_trace ?? "none"}
              onValueChange={(value) =>
                onUpdate({
                  // biome-ignore lint/style/useNamingConvention: api schema
                  with_trace: normalizeTraceMode(value),
                })
              }
            >
              <SelectTrigger className="nodrag w-full" id={traceModeId}>
                <SelectValue placeholder="Select trace mode" />
              </SelectTrigger>
              <SelectContent>
                {TRACE_MODE_OPTIONS.map((traceMode) => (
                  <SelectItem key={traceMode} value={traceMode}>
                    {traceMode}
                  </SelectItem>
                ))}
              </SelectContent>
            </Select>
          </div>
          <div className="flex items-center justify-between gap-3">
            <FieldLabel
              label="Save reasoning text"
              htmlFor={reasoningToggleId}
              hint="Adds a reasoning field when the model returns one."
            />
            <Switch
              id={reasoningToggleId}
              checked={config.extract_reasoning_content === true}
              onCheckedChange={(checked) =>
                onUpdate({
                  // biome-ignore lint/style/useNamingConvention: api schema
                  extract_reasoning_content: checked,
                })
              }
            />
          </div>
        </CollapsibleContent>
      </Collapsible>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/llm/llm-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { type ReactElement, useRef } from "react";
import type { LlmConfig } from "../../types";
import { LlmGeneralTab } from "./general-tab";
import { LlmScoresTab } from "./scores-tab";
import {
  Tabs,
  TabsContent,
  TabsList,
  TabsTrigger,
} from "@/components/ui/tabs";

type LlmDialogProps = {
  config: LlmConfig;
  modelConfigAliases: string[];
  modelProviderOptions: string[];
  toolProfileAliases: string[];
  onUpdate: (patch: Partial<LlmConfig>) => void;
};

export function LlmDialog({
  config,
  modelConfigAliases,
  modelProviderOptions,
  toolProfileAliases,
  onUpdate,
}: LlmDialogProps): ReactElement {
  const modelAliasAnchorRef = useRef<HTMLDivElement>(null);

  if (config.llm_type !== "judge") {
    return (
      <LlmGeneralTab
        config={config}
        modelConfigAliases={modelConfigAliases}
        modelProviderOptions={modelProviderOptions}
        toolProfileAliases={toolProfileAliases}
        modelAliasAnchorRef={modelAliasAnchorRef}
        onUpdate={onUpdate}
      />
    );
  }

  return (
    <Tabs defaultValue="general" className="w-full">
      <TabsList className="w-full">
        <TabsTrigger value="general">General</TabsTrigger>
        {config.llm_type === "judge" && <TabsTrigger value="scores">Scores</TabsTrigger>}
      </TabsList>
      <TabsContent value="general" className="pt-3">
        <LlmGeneralTab
          config={config}
          modelConfigAliases={modelConfigAliases}
          modelProviderOptions={modelProviderOptions}
          toolProfileAliases={toolProfileAliases}
          modelAliasAnchorRef={modelAliasAnchorRef}
          onUpdate={onUpdate}
        />
      </TabsContent>
      {config.llm_type === "judge" && (
        <TabsContent value="scores" className="pt-3">
          <LlmScoresTab config={config} onUpdate={onUpdate} />
        </TabsContent>
      )}
    </Tabs>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/llm/scores-tab.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import {
  Empty,
  EmptyContent,
  EmptyDescription,
  EmptyHeader,
  EmptyTitle,
} from "@/components/ui/empty";
import { Input } from "@/components/ui/input";
import { Textarea } from "@/components/ui/textarea";
import { type ReactElement } from "react";
import type { LlmConfig, Score } from "../../types";
import { FieldLabel } from "../shared/field-label";

type LlmScoresTabProps = {
  config: LlmConfig;
  onUpdate: (patch: Partial<LlmConfig>) => void;
};

export function LlmScoresTab({
  config,
  onUpdate,
}: LlmScoresTabProps): ReactElement {
  const scores = config.scores ?? [];

  function updateScores(nextScores: Score[]): void {
    onUpdate({ scores: nextScores });
  }

  function removeScore(index: number): void {
    updateScores(scores.filter((_, currentIndex) => currentIndex !== index));
  }

  function addScore(): void {
    updateScores([
      ...scores,
      {
        name: "",
        description: "",
        options: [
          { value: "1", description: "" },
          { value: "5", description: "" },
        ],
      },
    ]);
  }

  function updateScore(index: number, patch: Partial<Score>): void {
    updateScores(
      scores.map((score, currentIndex) =>
        currentIndex === index ? { ...score, ...patch } : score,
      ),
    );
  }

  function addOption(scoreIndex: number): void {
    const score = scores[scoreIndex];
    if (!score) {
      return;
    }
    updateScore(scoreIndex, {
      options: [...(score.options ?? []), { value: "", description: "" }],
    });
  }

  function removeOption(scoreIndex: number, optionIndex: number): void {
    const score = scores[scoreIndex];
    if (!score) {
      return;
    }
    updateScore(scoreIndex, {
      options: (score.options ?? []).filter(
        (_option, currentIndex) => currentIndex !== optionIndex,
      ),
    });
  }

  function updateOption(
    scoreIndex: number,
    optionIndex: number,
    patch: { value?: string; description?: string },
  ): void {
    const score = scores[scoreIndex];
    if (!score) {
      return;
    }
    updateScore(scoreIndex, {
      options: (score.options ?? []).map((option, currentIndex) =>
        currentIndex === optionIndex ? { ...option, ...patch } : option,
      ),
    });
  }

  return (
    <div className="space-y-3">
      <div className="flex items-center justify-between">
        <FieldLabel
          label="Scorers"
          hint="Rubrics used by LLM Judge to score each generated row."
        />
        {scores.length > 0 && (
          <Button type="button" size="xs" variant="outline" onClick={addScore}>
            Add scorer
          </Button>
        )}
      </div>
      {scores.length === 0 && (
        <Empty className="rounded-xl border border-dashed border-border/70 p-5">
          <EmptyHeader>
            <EmptyTitle className="text-sm">No scorers yet</EmptyTitle>
            <EmptyDescription className="text-xs">
              Add a scorer rubric before running judge generation.
            </EmptyDescription>
          </EmptyHeader>
          <EmptyContent className="max-w-none">
            <Button type="button" size="sm" onClick={addScore}>
              Add first scorer
            </Button>
          </EmptyContent>
        </Empty>
      )}
      {scores.map((score, index) => (
        <div
          key={`${config.id}-score-${index}`}
          className="space-y-2 rounded-xl corner-squircle border border-border/60 px-3 py-2"
        >
          <div className="flex items-center justify-between gap-2">
            <p className="text-xs font-semibold text-foreground">
              {score.name.trim() || `Scorer ${index + 1}`}
            </p>
            <Button
              type="button"
              size="xs"
              variant="ghost"
              onClick={() => removeScore(index)}
            >
              Remove
            </Button>
          </div>
          <Input
            className="nodrag h-8 text-xs"
            placeholder="Score name"
            value={score.name}
            onChange={(event) =>
              updateScore(index, { name: event.target.value })
            }
          />
          <Textarea
            className="corner-squircle nodrag min-h-[56px] text-xs"
            placeholder="Score description"
            value={score.description}
            onChange={(event) =>
              updateScore(index, { description: event.target.value })
            }
          />
          <div className="space-y-1">
            {(score.options ?? []).map((option, optionIndex) => (
              <div
                key={`${config.id}-score-${index}-option-${optionIndex}`}
                className="grid grid-cols-[74px_1fr_auto] gap-1"
              >
                <Input
                  className="nodrag h-7 text-xs"
                  placeholder="Value"
                  value={option.value}
                  onChange={(event) =>
                    updateOption(index, optionIndex, {
                      value: event.target.value,
                    })
                  }
                />
                <Input
                  className="nodrag h-7 text-xs"
                  placeholder="Description"
                  value={option.description}
                  onChange={(event) =>
                    updateOption(index, optionIndex, {
                      description: event.target.value,
                    })
                  }
                />
                <Button
                  type="button"
                  size="xs"
                  variant="ghost"
                  onClick={() => removeOption(index, optionIndex)}
                >
                  x
                </Button>
              </div>
            ))}
            <Button
              type="button"
              size="xs"
              variant="outline"
              onClick={() => addOption(index)}
            >
              Add option
            </Button>
          </div>
        </div>
      ))}
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/markdown-note/markdown-note-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Slider } from "@/components/ui/slider";
import { Textarea } from "@/components/ui/textarea";
import type { ReactElement } from "react";
import type { MarkdownNoteConfig } from "../../types";
import { FieldLabel } from "../shared/field-label";
import { NameField } from "../shared/name-field";

type MarkdownNoteDialogProps = {
  config: MarkdownNoteConfig;
  onUpdate: (patch: Partial<MarkdownNoteConfig>) => void;
};

export function MarkdownNoteDialog({
  config,
  onUpdate,
}: MarkdownNoteDialogProps): ReactElement {
  const markdownId = `${config.id}-markdown`;
  const colorId = `${config.id}-note-color`;
  const opacity =
    Number.parseInt(config.note_opacity ?? "35", 10) > 0
      ? Math.max(0, Math.min(100, Number.parseInt(config.note_opacity ?? "35", 10)))
      : 35;

  return (
    <div className="space-y-4">
      <NameField value={config.name} onChange={(value) => onUpdate({ name: value })} />
      <div className="grid gap-3">
        <FieldLabel
          label="Note style"
          htmlFor={colorId}
          hint="Pick a color and opacity for this note block."
        />
        <div className="flex items-center gap-3">
          <input
            id={colorId}
            type="color"
            className="nodrag h-9 w-14 cursor-pointer rounded-md border border-border/60 bg-transparent p-1"
            value={config.note_color ?? "#FDE68A"}
            onChange={(event) => onUpdate({ note_color: event.target.value })}
          />
          <div className="flex-1 space-y-1">
            <div className="flex items-center justify-between">
              <span className="text-xs text-muted-foreground">Opacity</span>
              <span className="text-xs tabular-nums text-muted-foreground">{opacity}%</span>
            </div>
            <Slider
              min={5}
              max={100}
              step={1}
              value={[opacity]}
              onValueChange={([value]) =>
                onUpdate({ note_opacity: String(Math.round(value)) })
              }
            />
          </div>
        </div>
      </div>
      <div className="grid gap-2">
        <FieldLabel
          label="Markdown"
          htmlFor={markdownId}
          hint="UI-only note. Not sent to backend payload recipe."
        />
        <Textarea
          id={markdownId}
          className="corner-squircle nodrag min-h-[180px]"
          placeholder="## Note"
          value={config.markdown}
          onChange={(event) => onUpdate({ markdown: event.target.value })}
        />
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/models/model-config-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/components/ui/collapsible";
import { Checkbox } from "@/components/ui/checkbox";
import {
  Combobox,
  ComboboxContent,
  ComboboxEmpty,
  ComboboxInput,
  ComboboxItem,
  ComboboxList,
} from "@/components/ui/combobox";
import { Input } from "@/components/ui/input";
import { Textarea } from "@/components/ui/textarea";
import { type ReactElement, useRef, useState } from "react";
import type { ModelConfig } from "../../types";
import { CollapsibleSectionTriggerButton } from "../shared/collapsible-section-trigger";
import { FieldLabel } from "../shared/field-label";
import { NameField } from "../shared/name-field";

type ModelConfigDialogProps = {
  config: ModelConfig;
  providerOptions: string[];
  onUpdate: (patch: Partial<ModelConfig>) => void;
};

export function ModelConfigDialog({
  config,
  providerOptions,
  onUpdate,
}: ModelConfigDialogProps): ReactElement {
  const [optionalOpen, setOptionalOpen] = useState(false);
  const modelId = `${config.id}-model`;
  const providerId = `${config.id}-provider`;
  const tempId = `${config.id}-temperature`;
  const topPId = `${config.id}-top-p`;
  const maxTokensId = `${config.id}-max-tokens`;
  const timeoutId = `${config.id}-timeout`;
  const extraBodyId = `${config.id}-inference-extra-body`;
  const providerAnchorRef = useRef<HTMLDivElement>(null);
  const providerInputRef = useRef(config.provider);
  const lastProviderRef = useRef(config.provider);
  if (lastProviderRef.current !== config.provider) {
    lastProviderRef.current = config.provider;
    providerInputRef.current = config.provider;
  }
  const updateField = <K extends keyof ModelConfig>(
    key: K,
    value: ModelConfig[K],
  ) => {
    onUpdate({ [key]: value } as Partial<ModelConfig>);
  };

  return (
    <div className="space-y-4">
      <NameField
        label="Model preset name"
        value={config.name}
        onChange={(value) => onUpdate({ name: value })}
      />
      <div className="rounded-2xl border border-border/60 bg-muted/10 px-4 py-3">
        <p className="text-sm font-semibold text-foreground">
          Set up one reusable model choice for your AI steps
        </p>
        <p className="mt-1 text-xs text-muted-foreground">
          Choose the provider connection, enter the exact model ID, then save any
          generation defaults you want to reuse.
        </p>
      </div>
      <div className="grid gap-2">
        <FieldLabel
          label="Provider connection"
          htmlFor={providerId}
          hint="Choose where this model should run."
        />
        <div ref={providerAnchorRef}>
          <Combobox
            items={providerOptions}
            filteredItems={providerOptions}
            filter={null}
            value={config.provider || null}
            onValueChange={(value) => updateField("provider", value ?? "")}
            onInputValueChange={(value) => {
              providerInputRef.current = value;
            }}
            itemToStringValue={(value) => value}
            autoHighlight={true}
          >
            <ComboboxInput
              id={providerId}
              className="nodrag w-full"
              placeholder="Choose a provider connection"
              onBlur={() => {
                const next = providerInputRef.current;
                if (next !== config.provider) {
                  updateField("provider", next);
                }
              }}
            />
            <ComboboxContent anchor={providerAnchorRef}>
              <ComboboxEmpty>No providers found</ComboboxEmpty>
              <ComboboxList>
                {(provider: string) => (
                  <ComboboxItem key={provider} value={provider}>
                    {provider}
                  </ComboboxItem>
                )}
              </ComboboxList>
            </ComboboxContent>
          </Combobox>
        </div>
        <p className="text-xs text-muted-foreground">
          {providerOptions.length === 0
            ? "Add a Provider connection step first, then come back here."
            : "Matching blocks are linked automatically on the canvas."}
        </p>
      </div>
      <div className="grid gap-2">
        <FieldLabel
          label="Model ID"
          htmlFor={modelId}
          hint="The exact model name sent to the connection."
        />
        <Input
          id={modelId}
          className="nodrag"
          placeholder="gpt-4o-mini"
          value={config.model}
          onChange={(event) => updateField("model", event.target.value)}
        />
      </div>
      <div className="grid gap-3">
        <div className="space-y-1">
          <p className="text-sm font-semibold text-foreground">
            Default generation settings
          </p>
          <p className="text-xs text-muted-foreground">
            These defaults are reused anywhere you choose this model preset.
          </p>
        </div>
        <div className="grid gap-3 sm:grid-cols-2">
          <div className="grid gap-2">
            <FieldLabel
              label="Temperature"
              htmlFor={tempId}
              hint="Higher values make responses more varied."
            />
            <Input
              id={tempId}
              className="nodrag"
              value={config.inference_temperature ?? ""}
              onChange={(event) =>
                updateField("inference_temperature", event.target.value)
              }
            />
          </div>
          <div className="grid gap-2">
            <FieldLabel
              label="Top-p"
              htmlFor={topPId}
              hint="Use this to limit how broad token selection can be."
            />
            <Input
              id={topPId}
              className="nodrag"
              value={config.inference_top_p ?? ""}
              onChange={(event) =>
                updateField("inference_top_p", event.target.value)
              }
            />
          </div>
          <div className="grid gap-2">
            <FieldLabel
              label="Max tokens"
              htmlFor={maxTokensId}
              hint="Maximum length of the model response."
            />
            <Input
              id={maxTokensId}
              className="nodrag"
              value={config.inference_max_tokens ?? ""}
              onChange={(event) =>
                updateField("inference_max_tokens", event.target.value)
              }
            />
          </div>
          <div className="grid gap-2">
            <FieldLabel
              label="Timeout (seconds)"
              htmlFor={timeoutId}
              hint="How long to wait before a request is treated as failed."
            />
            <Input
              id={timeoutId}
              className="nodrag"
              value={config.inference_timeout ?? ""}
              onChange={(event) =>
                updateField("inference_timeout", event.target.value)
              }
            />
          </div>
        </div>
      </div>
      <Collapsible open={optionalOpen} onOpenChange={setOptionalOpen}>
        <CollapsibleTrigger asChild={true}>
          <CollapsibleSectionTriggerButton
            label="Advanced request fields"
            open={optionalOpen}
          />
        </CollapsibleTrigger>
        <CollapsibleContent className="mt-3 space-y-4">
          <div className="grid gap-2">
            <FieldLabel
              label="Advanced request fields (JSON)"
              htmlFor={extraBodyId}
              hint="Extra request fields to send with every call."
            />
            <Textarea
              id={extraBodyId}
              className="corner-squircle nodrag"
              placeholder='{"top_k": 20, "min_p": 0.0}'
              value={config.inference_extra_body ?? ""}
              onChange={(event) =>
                updateField("inference_extra_body", event.target.value)
              }
            />
          </div>
          <label className="flex items-center gap-2 text-xs font-semibold uppercase text-muted-foreground">
            <Checkbox
              checked={config.skip_health_check ?? false}
              onCheckedChange={(value) =>
                updateField("skip_health_check", Boolean(value))
              }
            />
            Skip connection check
          </label>
        </CollapsibleContent>
      </Collapsible>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/models/model-provider-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/components/ui/collapsible";
import { Input } from "@/components/ui/input";
import { Textarea } from "@/components/ui/textarea";
import { type ReactElement, useState } from "react";
import type { ModelProviderConfig } from "../../types";
import { CollapsibleSectionTriggerButton } from "../shared/collapsible-section-trigger";
import { FieldLabel } from "../shared/field-label";
import { NameField } from "../shared/name-field";

type ModelProviderDialogProps = {
  config: ModelProviderConfig;
  onUpdate: (patch: Partial<ModelProviderConfig>) => void;
};

export function ModelProviderDialog({
  config,
  onUpdate,
}: ModelProviderDialogProps): ReactElement {
  const [optionalOpen, setOptionalOpen] = useState(false);
  const endpointId = `${config.id}-endpoint`;
  const apiKeyEnvId = `${config.id}-api-key-env`;
  const apiKeyId = `${config.id}-api-key`;
  const extraHeadersId = `${config.id}-extra-headers`;
  const extraBodyId = `${config.id}-extra-body`;
  const updateField = <K extends keyof ModelProviderConfig>(
    key: K,
    value: ModelProviderConfig[K],
  ) => {
    onUpdate({ [key]: value } as Partial<ModelProviderConfig>);
  };

  return (
    <div className="space-y-4">
      <NameField
        label="Connection name"
        value={config.name}
        onChange={(value) => onUpdate({ name: value })}
      />
      <div className="rounded-2xl border border-border/60 bg-muted/10 px-4 py-3">
        <p className="text-sm font-semibold text-foreground">
          Start with the endpoint you want this model to use
        </p>
        <p className="mt-1 text-xs text-muted-foreground">
          Most connections only need an endpoint. Add an API key if that
          service requires one.
        </p>
      </div>
      <div className="grid gap-2">
        <FieldLabel
          label="Endpoint"
          htmlFor={endpointId}
          hint="Base URL for the model service or gateway."
        />
        <Input
          id={endpointId}
          className="nodrag"
          placeholder="https://..."
          value={config.endpoint}
          onChange={(event) => updateField("endpoint", event.target.value)}
        />
      </div>
      <div className="grid gap-2">
        <FieldLabel
          label="API key (optional)"
          htmlFor={apiKeyId}
          hint="Paste a key here, or use an environment variable below."
        />
        <Input
          id={apiKeyId}
          className="nodrag"
          value={config.api_key ?? ""}
          onChange={(event) => updateField("api_key", event.target.value)}
        />
      </div>
      <Collapsible open={optionalOpen} onOpenChange={setOptionalOpen}>
        <CollapsibleTrigger asChild={true}>
          <CollapsibleSectionTriggerButton
            label="Advanced request overrides"
            open={optionalOpen}
          />
        </CollapsibleTrigger>
        <CollapsibleContent className="mt-3 space-y-4">
          <div className="grid gap-2">
            <FieldLabel
              label="API key environment variable"
              htmlFor={apiKeyEnvId}
              hint="Name of the environment variable that stores the key."
            />
            <Input
              id={apiKeyEnvId}
              className="nodrag"
              placeholder="OPENAI_API_KEY"
              value={config.api_key_env ?? ""}
              onChange={(event) => updateField("api_key_env", event.target.value)}
            />
          </div>
          <div className="grid gap-2">
            <FieldLabel
              label="Extra headers (JSON)"
              htmlFor={extraHeadersId}
              hint="Optional headers to send with every request."
            />
            <Textarea
              id={extraHeadersId}
              className="corner-squircle nodrag"
              placeholder='{"X-Header": "value"}'
              value={config.extra_headers ?? ""}
              onChange={(event) => updateField("extra_headers", event.target.value)}
            />
          </div>
          <div className="grid gap-2">
            <FieldLabel
              label="Extra body (JSON)"
              htmlFor={extraBodyId}
              hint="Optional request fields to send every time."
            />
            <Textarea
              id={extraBodyId}
              className="corner-squircle nodrag"
              placeholder='{"key": "value"}'
              value={config.extra_body ?? ""}
              onChange={(event) => updateField("extra_body", event.target.value)}
            />
          </div>
        </CollapsibleContent>
      </Collapsible>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/preview-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/components/ui/collapsible";
import {
  Dialog,
  DialogContent,
  DialogFooter,
  DialogHeader,
  DialogTitle,
} from "@/components/ui/dialog";
import { Input } from "@/components/ui/input";
import { Switch } from "@/components/ui/switch";
import { cn } from "@/lib/utils";
import {
  AlertCircleIcon,
  ArrowDown01Icon,
  CheckmarkCircle02Icon,
  CookBookIcon,
  TestTube01Icon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { type ReactElement, type ReactNode, useState } from "react";
import type { RecipeExecutionKind } from "../execution-types";
import type { RecipeRunSettings } from "../stores/recipe-executions";
import { FieldLabel } from "./shared/field-label";

type RunDialogProps = {
  open: boolean;
  onOpenChange: (open: boolean) => void;
  kind: RecipeExecutionKind;
  onKindChange: (kind: RecipeExecutionKind) => void;
  rows: number;
  fullRunName: string;
  onFullRunNameChange: (name: string) => void;
  onRowsChange: (rows: number) => void;
  settings: RecipeRunSettings;
  onSettingsChange: (patch: Partial<RecipeRunSettings>) => void;
  loading: boolean;
  validateLoading: boolean;
  validateResult: {
    valid: boolean;
    errors: string[];
    rawDetail: string | null;
  } | null;
  errors: string[];
  onRun: () => void;
  onValidate: () => void;
  container?: HTMLDivElement | null;
};

type ValidationResult = RunDialogProps["validateResult"];

const MAX_RECORDS = 200_000;
const MAX_WORKERS = 2_048;
const MAX_SHUTDOWN_WINDOW = 10_000;
const MAX_RETRY_STEPS = 100;

function clampInt(value: number, min: number, max: number): number {
  if (!Number.isFinite(value)) {
    return min;
  }
  const next = Math.floor(value);
  if (next < min) {
    return min;
  }
  if (next > max) {
    return max;
  }
  return next;
}

function clampFloat(value: number, min: number, max: number): number {
  if (!Number.isFinite(value)) {
    return min;
  }
  if (value < min) {
    return min;
  }
  if (value > max) {
    return max;
  }
  return value;
}

function commitInt(
  raw: string,
  current: number,
  min: number,
  max: number,
  apply: (value: number) => void,
  setDraft: (value: string) => void,
): void {
  const trimmed = raw.trim();
  if (!trimmed) {
    setDraft(String(current));
    return;
  }
  const parsed = Number(trimmed);
  if (!Number.isFinite(parsed)) {
    setDraft(String(current));
    return;
  }
  const next = clampInt(parsed, min, max);
  apply(next);
  setDraft(String(next));
}

function commitFloat(
  raw: string,
  current: number,
  min: number,
  max: number,
  apply: (value: number) => void,
  setDraft: (value: string) => void,
): void {
  const trimmed = raw.trim();
  if (!trimmed) {
    setDraft(String(current));
    return;
  }
  const parsed = Number(trimmed);
  if (!Number.isFinite(parsed)) {
    setDraft(String(current));
    return;
  }
  const next = clampFloat(parsed, min, max);
  apply(next);
  setDraft(String(next));
}

type DraftInputFieldProps = {
  id: string;
  label: string;
  hint: string;
  inputMode: "numeric" | "decimal";
  value: string;
  onChange: (value: string) => void;
  onBlur: () => void;
  placeholder?: string;
};

function DraftInputField({
  id,
  label,
  hint,
  inputMode,
  value,
  onChange,
  onBlur,
  placeholder,
}: DraftInputFieldProps): ReactElement {
  return (
    <div className="grid gap-2">
      <FieldLabel label={label} htmlFor={id} hint={hint} />
      <Input
        id={id}
        type="text"
        inputMode={inputMode}
        value={value}
        onChange={(event) => onChange(event.target.value)}
        onBlur={onBlur}
        placeholder={placeholder}
      />
    </div>
  );
}

function AdvancedSettingsSection({
  title,
  description,
  children,
}: {
  title: string;
  description: string;
  children: ReactNode;
}): ReactElement {
  return (
    <div className="space-y-3 rounded-2xl border border-border/70 bg-card/60 p-4">
      <div className="space-y-0.5">
        <p className="text-sm font-semibold text-foreground">{title}</p>
        <p className="text-xs text-muted-foreground">{description}</p>
      </div>
      {children}
    </div>
  );
}

function ValidationResultPanel({
  validateResult,
}: {
  validateResult: ValidationResult;
}): ReactElement | null {
  if (!validateResult) {
    return null;
  }

  return (
    <div
      className={cn(
        "space-y-3 rounded-2xl border p-4 shadow-border backdrop-blur-sm",
        validateResult.valid
          ? "border-emerald-300/70 bg-emerald-50/80 dark:border-emerald-900/60 dark:bg-emerald-950/30"
          : "border-destructive/30 bg-destructive/5",
      )}
    >
      <div className="flex items-start gap-3">
        <div
          className={cn(
            "mt-0.5 flex size-8 shrink-0 items-center justify-center rounded-full border",
            validateResult.valid
              ? "border-emerald-300/70 bg-emerald-500/10 text-emerald-700 dark:border-emerald-900/60 dark:text-emerald-300"
              : "border-destructive/30 bg-destructive/10 text-destructive",
          )}
        >
          <HugeiconsIcon
            icon={
              validateResult.valid ? CheckmarkCircle02Icon : AlertCircleIcon
            }
            className="size-4"
          />
        </div>
        <div className="min-w-0 flex-1 space-y-1">
          <p
            className={cn(
              "text-sm font-semibold",
              validateResult.valid
                ? "text-emerald-700 dark:text-emerald-300"
                : "text-destructive",
            )}
          >
            {validateResult.valid ? "Ready to run" : "Fix these issues first"}
          </p>
          <p className="text-xs text-muted-foreground">
            {validateResult.valid
              ? "Everything checks out. Start the run when you're ready."
              : "Update the recipe, then check it again."}
          </p>
        </div>
      </div>
      {!validateResult.valid && validateResult.errors.length > 0 && (
        <div className="space-y-1">
          {validateResult.errors.map((error) => (
            <p key={error} className="break-words text-xs text-destructive">
              {error}
            </p>
          ))}
        </div>
      )}
      {!validateResult.valid && validateResult.rawDetail && (
        <p className="break-words text-xs text-destructive">
          {validateResult.rawDetail}
        </p>
      )}
    </div>
  );
}

type RunDialogBodyProps = Omit<
  RunDialogProps,
  "open" | "onOpenChange" | "container"
> & {
  onClose: () => void;
};

function RunDialogBody({
  kind,
  onKindChange,
  rows,
  fullRunName,
  onFullRunNameChange,
  onRowsChange,
  settings,
  onSettingsChange,
  loading,
  validateLoading,
  validateResult,
  errors,
  onRun,
  onValidate,
  onClose,
}: RunDialogBodyProps): ReactElement {
  const [advancedOpen, setAdvancedOpen] = useState(false);
  const kindLabel = kind === "preview" ? "Test run" : "Full run";
  const normalizedFullRunName = fullRunName.trim();
  const isFullRunNameMissing =
    kind === "full" && normalizedFullRunName.length === 0;
  const rowHint =
    kind === "preview"
      ? "How many sample rows to generate for a quick check."
      : "How many rows to generate in total.";

  const [rowsDraft, setRowsDraft] = useState(String(rows));
  const [batchSizeDraft, setBatchSizeDraft] = useState(
    String(settings.batchSize),
  );
  const [llmParallelDraft, setLlmParallelDraft] = useState(
    settings.llmParallelRequests === null
      ? ""
      : String(settings.llmParallelRequests),
  );
  const [workersDraft, setWorkersDraft] = useState(
    String(settings.nonInferenceWorkers),
  );
  const [windowDraft, setWindowDraft] = useState(
    String(settings.shutdownErrorWindow),
  );
  const [restartsDraft, setRestartsDraft] = useState(
    String(settings.maxConversationRestarts),
  );
  const [correctionsDraft, setCorrectionsDraft] = useState(
    String(settings.maxConversationCorrectionSteps),
  );
  const [shutdownRateDraft, setShutdownRateDraft] = useState(
    String(settings.shutdownErrorRate),
  );

  return (
    <>
      <DialogHeader className="space-y-2">
        <DialogTitle>{kindLabel}</DialogTitle>
        <p className="text-sm text-muted-foreground">
          Choose a quick test or a full run. Advanced settings are optional.
        </p>
      </DialogHeader>

      <div className="grid gap-2">
        <FieldLabel
          label="Run type"
          hint="Start with a quick check or generate the full dataset."
        />
        <div className="grid grid-cols-2 gap-2">
          <Button
            type="button"
            variant={kind === "preview" ? "default" : "outline"}
            className="corner-squircle min-h-10 justify-center whitespace-normal px-3 text-center"
            aria-pressed={kind === "preview"}
            onClick={() => onKindChange("preview")}
          >
            Test run
          </Button>
          <Button
            type="button"
            variant={kind === "full" ? "default" : "outline"}
            className="corner-squircle min-h-10 justify-center whitespace-normal px-3 text-center"
            aria-pressed={kind === "full"}
            onClick={() => onKindChange("full")}
          >
            Full run
          </Button>
        </div>
      </div>

      {kind === "full" && (
        <div className="grid gap-2">
          <FieldLabel
            label="Run name"
            htmlFor="run-name"
            hint="Name shown in your run history."
          />
          <Input
            id="run-name"
            type="text"
            value={fullRunName}
            onChange={(event) => onFullRunNameChange(event.target.value)}
            placeholder="Sprint dataset v2"
            aria-invalid={isFullRunNameMissing}
          />
          {isFullRunNameMissing ? (
            <p className="text-xs text-destructive">
              Give this full run a name before you start.
            </p>
          ) : null}
        </div>
      )}

      <div className="grid gap-2">
        <FieldLabel label="Records" htmlFor="run-rows" hint={rowHint} />
        <Input
          id="run-rows"
          type="text"
          inputMode="numeric"
          value={rowsDraft}
          onChange={(event) => setRowsDraft(event.target.value)}
          onBlur={() =>
            commitInt(
              rowsDraft,
              rows,
              1,
              MAX_RECORDS,
              onRowsChange,
              setRowsDraft,
            )
          }
        />
      </div>

      <Collapsible open={advancedOpen} onOpenChange={setAdvancedOpen}>
        <CollapsibleTrigger asChild={true}>
          <button
            type="button"
            className="flex items-center gap-2 text-xs font-semibold uppercase tracking-wide text-muted-foreground hover:text-foreground"
          >
            <HugeiconsIcon
              icon={ArrowDown01Icon}
              className={cn(
                "size-3.5 transition-transform",
                advancedOpen && "rotate-180",
              )}
            />
            {advancedOpen
              ? "Hide advanced run settings"
              : "Show advanced run settings"}
          </button>
        </CollapsibleTrigger>
        <CollapsibleContent className="mt-3 space-y-4">
          {kind === "full" && (
            <AdvancedSettingsSection
              title="Batching"
              description="Use batches when you want to split a larger run into smaller pieces."
            >
              <div className="flex items-center justify-between gap-3 text-sm">
                <div className="space-y-0.5">
                  <span className="font-medium">Enable batching</span>
                  <p className="text-xs text-muted-foreground">
                    Split a larger run into smaller chunks.
                  </p>
                </div>
                <Switch
                  checked={settings.batchEnabled}
                  onCheckedChange={(checked) =>
                    onSettingsChange({ batchEnabled: Boolean(checked) })
                  }
                />
              </div>
              {rows >= 1000 && !settings.batchEnabled ? (
                <p className="text-xs text-muted-foreground">
                  Larger runs are usually easier to manage in batches.
                </p>
              ) : null}
            </AdvancedSettingsSection>
          )}
          <AdvancedSettingsSection
            title="Throughput"
            description="Control how much work runs at the same time."
          >
            <div className="grid gap-4 md:grid-cols-2">
              <DraftInputField
                id="run-llm-parallel"
                label="AI requests at once"
                hint="Leave empty to use each saved model's own setting."
                inputMode="numeric"
                value={llmParallelDraft}
                onChange={setLlmParallelDraft}
                onBlur={() => {
                  const trimmed = llmParallelDraft.trim();
                  if (!trimmed) {
                    onSettingsChange({ llmParallelRequests: null });
                    setLlmParallelDraft("");
                    return;
                  }
                  const parsed = Number(trimmed);
                  if (!Number.isFinite(parsed)) {
                    setLlmParallelDraft(
                      settings.llmParallelRequests === null
                        ? ""
                        : String(settings.llmParallelRequests),
                    );
                    return;
                  }
                  const next = clampInt(parsed, 1, MAX_WORKERS);
                  onSettingsChange({ llmParallelRequests: next });
                  setLlmParallelDraft(String(next));
                }}
                placeholder="Use saved model setting"
              />
              <DraftInputField
                id="run-non-inference-workers"
                label="CPU workers"
                hint="Used for steps like source data, generated fields, and formulas."
                inputMode="numeric"
                value={workersDraft}
                onChange={setWorkersDraft}
                onBlur={() =>
                  commitInt(
                    workersDraft,
                    settings.nonInferenceWorkers,
                    1,
                    MAX_WORKERS,
                    (value) => onSettingsChange({ nonInferenceWorkers: value }),
                    setWorkersDraft,
                  )
                }
              />
              {kind === "full" && settings.batchEnabled && (
                <>
                  <DraftInputField
                    id="run-batch-size"
                    label="Batch size"
                    hint="How many rows to generate in each batch."
                    inputMode="numeric"
                    value={batchSizeDraft}
                    onChange={setBatchSizeDraft}
                    onBlur={() =>
                      commitInt(
                        batchSizeDraft,
                        settings.batchSize,
                        1,
                        MAX_RECORDS,
                        (value) => onSettingsChange({ batchSize: value }),
                        setBatchSizeDraft,
                      )
                    }
                  />
                  <div className="flex items-center justify-between gap-3 rounded-xl border border-border/60 bg-background/60 px-3 py-2 text-sm text-foreground">
                    <div className="space-y-0.5">
                      <p className="font-medium">Merge batches into one file</p>
                      <p className="text-xs text-muted-foreground">
                        Combine every batch output into one final file.
                      </p>
                    </div>
                    <Switch
                      checked={settings.mergeBatches}
                      onCheckedChange={(checked) =>
                        onSettingsChange({ mergeBatches: Boolean(checked) })
                      }
                    />
                  </div>
                </>
              )}
            </div>
          </AdvancedSettingsSection>
          <AdvancedSettingsSection
            title="Retries and recovery"
            description="Choose how hard the run should try before it gives up."
          >
            <div className="grid gap-4 md:grid-cols-2">
              <DraftInputField
                id="run-shutdown-window"
                label="Failure check window"
                hint="How many recent attempts to inspect before stopping early."
                inputMode="numeric"
                value={windowDraft}
                onChange={setWindowDraft}
                onBlur={() =>
                  commitInt(
                    windowDraft,
                    settings.shutdownErrorWindow,
                    1,
                    MAX_SHUTDOWN_WINDOW,
                    (value) => onSettingsChange({ shutdownErrorWindow: value }),
                    setWindowDraft,
                  )
                }
              />
              <DraftInputField
                id="run-shutdown-rate"
                label="Stop after too many failures"
                hint="Example: 0.5 stops when about half of recent attempts fail."
                inputMode="decimal"
                value={shutdownRateDraft}
                onChange={setShutdownRateDraft}
                onBlur={() =>
                  commitFloat(
                    shutdownRateDraft,
                    settings.shutdownErrorRate,
                    0,
                    1,
                    (value) => onSettingsChange({ shutdownErrorRate: value }),
                    setShutdownRateDraft,
                  )
                }
              />
              <DraftInputField
                id="run-max-restarts"
                label="Full retries"
                hint="How many times to retry when a model answer fails checks."
                inputMode="numeric"
                value={restartsDraft}
                onChange={setRestartsDraft}
                onBlur={() =>
                  commitInt(
                    restartsDraft,
                    settings.maxConversationRestarts,
                    0,
                    MAX_RETRY_STEPS,
                    (value) =>
                      onSettingsChange({ maxConversationRestarts: value }),
                    setRestartsDraft,
                  )
                }
              />
              <DraftInputField
                id="run-correction-steps"
                label="Correction attempts"
                hint="How many follow-up fixes to try before starting over."
                inputMode="numeric"
                value={correctionsDraft}
                onChange={setCorrectionsDraft}
                onBlur={() =>
                  commitInt(
                    correctionsDraft,
                    settings.maxConversationCorrectionSteps,
                    0,
                    MAX_RETRY_STEPS,
                    (value) =>
                      onSettingsChange({ maxConversationCorrectionSteps: value }),
                    setCorrectionsDraft,
                  )
                }
              />
              <div className="flex items-center justify-between gap-3 rounded-xl border border-border/60 bg-background/60 px-3 py-2 text-sm text-foreground md:col-span-2">
                <div className="space-y-0.5">
                  <p className="font-medium">Keep running through failures</p>
                  <p className="text-xs text-muted-foreground">
                    Useful for longer runs when you want as many rows as possible.
                  </p>
                </div>
                <Switch
                  checked={settings.disableEarlyShutdown}
                  onCheckedChange={(checked) =>
                    onSettingsChange({
                      disableEarlyShutdown: Boolean(checked),
                    })
                  }
                />
              </div>
            </div>
          </AdvancedSettingsSection>
        </CollapsibleContent>
      </Collapsible>

      {errors.length > 0 && (
        <div className="max-h-44 space-y-2 overflow-y-auto rounded-2xl border border-destructive/30 bg-destructive/5 p-4 shadow-border">
          <div className="flex items-center gap-2">
            <HugeiconsIcon
              icon={AlertCircleIcon}
              className="size-4 text-destructive"
            />
            <Badge
              variant="outline"
              className="rounded-full text-[10px] text-destructive"
            >
              Before you run
            </Badge>
          </div>
          {errors.map((error) => (
            <p key={error} className="break-words text-xs text-destructive">
              {error}
            </p>
          ))}
        </div>
      )}

      <ValidationResultPanel validateResult={validateResult} />

      <DialogFooter>
        <Button
          type="button"
          variant="outline"
          onClick={onClose}
          disabled={loading}
          className="corner-squircle border-border/70 bg-card/70"
        >
          Cancel
        </Button>
        <Button
          type="button"
          variant="outline"
          onClick={onValidate}
          disabled={loading || validateLoading}
          className="corner-squircle border-border/70 bg-card/70"
        >
          <HugeiconsIcon icon={TestTube01Icon} className="size-3.5" />
          {validateLoading ? "Checking..." : "Check recipe"}
        </Button>
        <Button
          type="button"
          onClick={onRun}
          disabled={loading || isFullRunNameMissing}
          className="corner-squircle"
        >
          <HugeiconsIcon icon={CookBookIcon} className="size-3.5" />
          {loading ? "Starting..." : `Start ${kindLabel.toLowerCase()}`}
        </Button>
      </DialogFooter>
    </>
  );
}

export function RunDialog({
  open,
  onOpenChange,
  container,
  ...contentProps
}: RunDialogProps): ReactElement {
  const draftKey = [open ? "open" : "closed", contentProps.kind].join("|");

  return (
    <Dialog open={open} onOpenChange={onOpenChange}>
      <DialogContent
        container={container}
        position="absolute"
        overlayPosition="absolute"
        overlayClassName="bg-transparent"
        className="corner-squircle max-h-[650px] overflow-y-auto overflow-x-hidden border-border/70 bg-background/95 sm:max-w-2xl shadow-border backdrop-blur-xl"
      >
        <RunDialogBody
          key={draftKey}
          {...contentProps}
          onClose={() => onOpenChange(false)}
        />
      </DialogContent>
    </Dialog>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/processors-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import { Dialog, DialogContent, DialogFooter, DialogTitle } from "@/components/ui/dialog";
import { Input } from "@/components/ui/input";
import { Switch } from "@/components/ui/switch";
import { Textarea } from "@/components/ui/textarea";
import { VisuallyHidden } from "radix-ui";
import { type ReactElement, useMemo } from "react";
import type { RecipeProcessorConfig } from "../types";
import { buildDefaultSchemaTransform } from "../utils/processors";
import { AvailableVariables } from "./shared/available-variables";
import { FieldLabel } from "./shared/field-label";
type ProcessorsDialogProps = {
  open: boolean;
  onOpenChange: (open: boolean) => void;
  processors: RecipeProcessorConfig[];
  onProcessorsChange: (processors: RecipeProcessorConfig[]) => void;
  container?: HTMLDivElement | null;
};

export function ProcessorsDialog({
  open,
  onOpenChange,
  processors,
  onProcessorsChange,
  container,
}: ProcessorsDialogProps): ReactElement {
  const schemaIndex = useMemo(
    () =>
      processors.findIndex(
        (processor) => processor.processor_type === "schema_transform",
      ),
    [processors],
  );
  const schemaProcessor = schemaIndex >= 0 ? processors[schemaIndex] : null;
  const nameId = schemaProcessor ? `${schemaProcessor.id}-name` : "schema-transform-name";
  const templateId = schemaProcessor
    ? `${schemaProcessor.id}-template`
    : "schema-transform-template";

  const setSchemaEnabled = (enabled: boolean) => {
    if (enabled) {
      if (schemaProcessor) {
        return;
      }
      onProcessorsChange([...processors, buildDefaultSchemaTransform()]);
      return;
    }
    onProcessorsChange(
      processors.filter(
        (processor) => processor.processor_type !== "schema_transform",
      ),
    );
  };

  const updateSchema = (patch: Partial<RecipeProcessorConfig>) => {
    if (!schemaProcessor) {
      return;
    }
    const next = [...processors];
    next[schemaIndex] = { ...schemaProcessor, ...patch };
    onProcessorsChange(next);
  };

  return (
    <Dialog open={open} onOpenChange={onOpenChange}>
      <DialogContent
        container={container}
        position="absolute"
        overlayPosition="absolute"
        overlayClassName="bg-transparent"
        className="corner-squircle max-h-[650px] overflow-auto sm:max-w-2xl shadow-border"
      >
        <VisuallyHidden.Root>
          <DialogTitle>Processors</DialogTitle>
        </VisuallyHidden.Root>
        <div className="space-y-4">
          <div className="flex items-center justify-between gap-3 corner-squircle rounded-2xl border border-border/60 px-3 py-2">
            <div>
              <p className="text-sm font-semibold">Schema transform</p>
              <p className="text-xs text-muted-foreground">
                Transform final rows to target schema (post-batch).
              </p>
            </div>
            <Switch
              checked={Boolean(schemaProcessor)}
              onCheckedChange={setSchemaEnabled}
            />
          </div>

          {schemaProcessor && (
            <div className="space-y-3">
              <AvailableVariables configId="" />
              <div className="grid gap-2">
                <FieldLabel
                  label="Name"
                  htmlFor={nameId}
                  hint="Processor name shown in graph and payload."
                />
                <Input
                  id={nameId}
                  className="nodrag"
                  value={schemaProcessor.name}
                  onChange={(event) => updateSchema({ name: event.target.value })}
                />
              </div>
              <div className="grid gap-2">
                <FieldLabel
                  label="Template (JSON)"
                  htmlFor={templateId}
                  hint="Target output schema template using Jinja references."
                />
                <Textarea
                  id={templateId}
                  className="corner-squircle nodrag min-h-[220px]"
                  value={schemaProcessor.template}
                  onChange={(event) =>
                    updateSchema({ template: event.target.value })
                  }
                />
                <p className="text-xs text-muted-foreground">
                  Use Jinja refs like {"{{ customer_review }}"} in values.
                </p>
              </div>
            </div>
          )}
        </div>
        <DialogFooter>
          <Button
            type="button"
            variant="outline"
            onClick={() => onOpenChange(false)}
          >
            Done
          </Button>
        </DialogFooter>
      </DialogContent>
    </Dialog>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/samplers/bernoulli-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Input } from "@/components/ui/input";
import type { ReactElement } from "react";
import type { SamplerConfig } from "../../types";
import { NameField } from "../shared/name-field";
import { FieldLabel } from "../shared/field-label";

type BernoulliDialogProps = {
  config: SamplerConfig;
  onUpdate: (patch: Partial<SamplerConfig>) => void;
};

export function BernoulliDialog({
  config,
  onUpdate,
}: BernoulliDialogProps): ReactElement {
  const pId = `${config.id}-bernoulli-p`;
  return (
    <div className="space-y-4">
      <NameField
        value={config.name}
        onChange={(value) => onUpdate({ name: value })}
      />
      <div className="grid gap-2">
        <FieldLabel
          label="Probability (p)"
          htmlFor={pId}
          hint="Success probability in [0, 1]."
        />
        <Input
          id={pId}
          type="number"
          min="0"
          max="1"
          step="0.01"
          className="nodrag"
          value={config.p ?? ""}
          onChange={(event) => onUpdate({ p: event.target.value })}
        />
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/samplers/category-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/components/ui/collapsible";
import { Input } from "@/components/ui/input";
import { type ReactElement, useState } from "react";
import type { SamplerConfig } from "../../types";
import { ChipInput } from "../../components/chip-input";
import { CollapsibleSectionTriggerButton } from "../shared/collapsible-section-trigger";
import { FieldLabel } from "../shared/field-label";
import { NameField } from "../shared/name-field";

type CategoryDialogProps = {
  config: SamplerConfig;
  onUpdate: (patch: Partial<SamplerConfig>) => void;
};

function addChipWithWeight(
  values: string[] | undefined,
  weights: Array<number | null> | undefined,
  value: string,
): { values: string[]; weights: Array<number | null> } {
  return {
    values: [...(values ?? []), value],
    weights: [...(weights ?? []), null],
  };
}

function removeChipWithWeight(
  values: string[] | undefined,
  weights: Array<number | null> | undefined,
  index: number,
): { values: string[]; weights: Array<number | null> } {
  const nextValues = [...(values ?? [])];
  const nextWeights = [...(weights ?? [])];
  nextValues.splice(index, 1);
  nextWeights.splice(index, 1);
  return { values: nextValues, weights: nextWeights };
}

export function CategoryDialog({
  config,
  onUpdate,
}: CategoryDialogProps): ReactElement {
  const [conditionDraft, setConditionDraft] = useState("");
  const advancedOpen = config.advancedOpen === true;
  const conditionInputId = `${config.id}-conditional-rule`;
  const conditional = config.conditional_params ?? {};
  const conditionalCount = Object.keys(conditional).length;

  const handleAddCondition = () => {
    const condition = conditionDraft.trim();
    if (!condition || conditional[condition]) {
      return;
    }
    onUpdate({
      // biome-ignore lint/style/useNamingConvention: api schema
      conditional_params: {
        ...conditional,
        [condition]: {
          // biome-ignore lint/style/useNamingConvention: api schema
          sampler_type: "category",
          values: [],
          weights: [],
        },
      },
    });
    setConditionDraft("");
  };

  const removeCondition = (condition: string) => {
    const next = { ...conditional };
    delete next[condition];
    onUpdate({
      // biome-ignore lint/style/useNamingConvention: api schema
      conditional_params: Object.keys(next).length > 0 ? next : undefined,
    });
  };

  return (
    <div className="space-y-4">
      <NameField
        value={config.name}
        onChange={(value) => onUpdate({ name: value })}
      />
      <div className="space-y-3">
        <div className="grid gap-2">
          <FieldLabel
            label="Values"
            hint="Define allowed categorical values for this column."
          />
          <ChipInput
            values={config.values ?? []}
            onAdd={(value) => {
              const { values, weights } = addChipWithWeight(
                config.values,
                config.weights,
                value,
              );
              onUpdate({ values, weights });
            }}
            onRemove={(index) => {
              const { values, weights } = removeChipWithWeight(
                config.values,
                config.weights,
                index,
              );
              onUpdate({ values, weights });
            }}
            placeholder="Type a value and press Enter"
          />
        </div>
      </div>
      <Collapsible
        open={advancedOpen}
        onOpenChange={(open) => onUpdate({ advancedOpen: open })}
      >
        <CollapsibleTrigger asChild={true}>
          <CollapsibleSectionTriggerButton
            label="Advanced list settings"
            open={advancedOpen}
          />
        </CollapsibleTrigger>
        <CollapsibleContent className="mt-2 space-y-3">
            <div className="grid gap-2">
              <FieldLabel
                label="Weights (optional)"
                hint="Set selection probability per value."
              />
              {(config.values ?? []).length === 0 ? (
                <p className="text-xs text-muted-foreground">
                  Add values first, then set optional weights.
                </p>
              ) : (
                <div className="grid gap-2 sm:grid-cols-2 lg:grid-cols-3">
                  {(config.values ?? []).map((value, index) => (
                    <div key={`${value}-weight`} className="space-y-1">
                      <p
                        className="truncate text-xs text-muted-foreground"
                        title={value}
                      >
                        {value}
                      </p>
                      <Input
                        type="number"
                        className="nodrag w-full"
                        placeholder="Weight"
                        value={config.weights?.[index] ?? ""}
                        onChange={(event) => {
                          const weights = [...(config.weights ?? [])];
                          weights[index] = event.target.value
                            ? Number(event.target.value)
                            : null;
                          onUpdate({ weights });
                        }}
                      />
                    </div>
                  ))}
                </div>
              )}
            </div>
            <div className="flex items-center justify-between gap-2">
              <FieldLabel
                label="Conditional params (category)"
                hint="Override category values/weights when condition matches."
              />
              <span className="text-xs text-muted-foreground">
                {conditionalCount} rules
              </span>
            </div>
            <div className="flex gap-2">
              <Input
                id={conditionInputId}
                className="nodrag"
                placeholder="Condition (e.g., {{ region }} == 'US')"
                value={conditionDraft}
                onChange={(event) => setConditionDraft(event.target.value)}
                onKeyDown={(event) => {
                  if (event.key === "Enter") {
                    event.preventDefault();
                    handleAddCondition();
                  }
                }}
              />
              <Button type="button" size="sm" onClick={handleAddCondition}>
                Add rule
              </Button>
            </div>
            {Object.entries(conditional).map(([condition, params]) => (
              <div
                key={condition}
                className="space-y-3 rounded-2xl border border-border/60 p-3"
              >
                <div className="flex items-center justify-between gap-2">
                  <p className="text-xs font-semibold text-foreground">{condition}</p>
                  <Button
                    type="button"
                    size="xs"
                    variant="ghost"
                    onClick={() => removeCondition(condition)}
                  >
                    Remove
                  </Button>
                </div>
                <ChipInput
                  values={params.values ?? []}
                  onAdd={(value) => {
                    const { values, weights } = addChipWithWeight(
                      params.values,
                      params.weights,
                      value,
                    );
                    onUpdate({
                      // biome-ignore lint/style/useNamingConvention: api schema
                      conditional_params: {
                        ...conditional,
                        [condition]: { ...params, values, weights },
                      },
                    });
                  }}
                  onRemove={(index) => {
                    const { values, weights } = removeChipWithWeight(
                      params.values,
                      params.weights,
                      index,
                    );
                    onUpdate({
                      // biome-ignore lint/style/useNamingConvention: api schema
                      conditional_params: {
                        ...conditional,
                        [condition]: { ...params, values, weights },
                      },
                    });
                  }}
                  placeholder="Type a conditional value and press Enter"
                />
                <div className="grid gap-2">
                  <p className="text-xs font-semibold uppercase text-muted-foreground">
                    Rule weights (optional)
                  </p>
                  <div className="grid gap-2 sm:grid-cols-2 lg:grid-cols-3">
                    {(params.values ?? []).map((value, index) => (
                      <div
                        key={`${condition}-${value}-${index}-weight`}
                        className="space-y-1"
                      >
                        <p
                          className="truncate text-xs text-muted-foreground"
                          title={value}
                        >
                          {value}
                        </p>
                        <Input
                          type="number"
                          className="nodrag"
                          placeholder="Weight"
                          value={params.weights?.[index] ?? ""}
                          onChange={(event) => {
                            const weights = [
                              ...(params.weights ??
                                Array.from(
                                  { length: (params.values ?? []).length },
                                  () => null,
                                )),
                            ];
                            weights[index] = event.target.value
                              ? Number(event.target.value)
                              : null;
                            onUpdate({
                              // biome-ignore lint/style/useNamingConvention: api schema
                              conditional_params: {
                                ...conditional,
                                [condition]: { ...params, weights },
                              },
                            });
                          }}
                        />
                      </div>
                    ))}
                  </div>
                </div>
              </div>
            ))}
        </CollapsibleContent>
      </Collapsible>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/samplers/datetime-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Input } from "@/components/ui/input";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import type { ReactElement } from "react";
import type { SamplerConfig } from "../../types";
import { NameField } from "../shared/name-field";
import { FieldLabel } from "../shared/field-label";

const DATETIME_UNITS = [
  "second",
  "minute",
  "hour",
  "day",
  "week",
  "month",
  "year",
];

type DatetimeDialogProps = {
  config: SamplerConfig;
  onUpdate: (patch: Partial<SamplerConfig>) => void;
};

export function DatetimeDialog({
  config,
  onUpdate,
}: DatetimeDialogProps): ReactElement {
  const startId = `${config.id}-datetime-start`;
  const endId = `${config.id}-datetime-end`;
  const unitId = `${config.id}-datetime-unit`;
  const updateField = <K extends keyof SamplerConfig>(
    key: K,
    value: SamplerConfig[K],
  ) => {
    onUpdate({ [key]: value } as Partial<SamplerConfig>);
  };
  return (
    <div className="space-y-4">
      <NameField
        value={config.name}
        onChange={(value) => onUpdate({ name: value })}
      />
      <div className="grid gap-3">
        <div className="grid gap-2 sm:grid-cols-2">
          <div className="grid gap-2">
            <FieldLabel
              label="Start"
              htmlFor={startId}
              hint="Earliest datetime allowed."
            />
            <Input
              id={startId}
              type="datetime-local"
              className="nodrag"
              value={config.datetime_start ?? ""}
              onChange={(event) =>
                updateField("datetime_start", event.target.value)
              }
            />
          </div>
          <div className="grid gap-2">
            <FieldLabel
              label="End"
              htmlFor={endId}
              hint="Latest datetime allowed."
            />
            <Input
              id={endId}
              type="datetime-local"
              className="nodrag"
              value={config.datetime_end ?? ""}
              onChange={(event) =>
                updateField("datetime_end", event.target.value)
              }
            />
          </div>
        </div>
        <div className="grid gap-2">
          <FieldLabel
            label="Unit"
            htmlFor={unitId}
            hint="Sampling granularity for generated timestamps."
          />
          <Select
            value={config.datetime_unit ?? ""}
            onValueChange={(value) => updateField("datetime_unit", value)}
          >
            <SelectTrigger className="nodrag w-full" id={unitId}>
              <SelectValue placeholder="Select unit" />
            </SelectTrigger>
            <SelectContent>
              {DATETIME_UNITS.map((unit) => (
                <SelectItem key={unit} value={unit}>
                  {unit}
                </SelectItem>
              ))}
            </SelectContent>
          </Select>
        </div>
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/samplers/gaussian-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Input } from "@/components/ui/input";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import type { ReactElement } from "react";
import type { SamplerConfig } from "../../types";
import { NameField } from "../shared/name-field";
import { FieldLabel } from "../shared/field-label";

type GaussianDialogProps = {
  config: SamplerConfig;
  onUpdate: (patch: Partial<SamplerConfig>) => void;
};

export function GaussianDialog({
  config,
  onUpdate,
}: GaussianDialogProps): ReactElement {
  const meanId = `${config.id}-gaussian-mean`;
  const stdId = `${config.id}-gaussian-std`;
  const convertId = `${config.id}-gaussian-convert`;
  return (
    <div className="space-y-4">
      <NameField
        value={config.name}
        onChange={(value) => onUpdate({ name: value })}
      />
      <div className="grid gap-3 sm:grid-cols-2">
        <div className="grid gap-2">
          <FieldLabel
            label="Mean"
            htmlFor={meanId}
            hint="Center of the normal distribution."
          />
          <Input
            id={meanId}
            type="number"
            className="nodrag"
            value={config.mean ?? ""}
            onChange={(event) => onUpdate({ mean: event.target.value })}
          />
        </div>
        <div className="grid gap-2">
          <FieldLabel
            label="Std"
            htmlFor={stdId}
            hint="Standard deviation. must be > 0."
          />
          <Input
            id={stdId}
            type="number"
            className="nodrag"
            value={config.std ?? ""}
            onChange={(event) => onUpdate({ std: event.target.value })}
          />
        </div>
      </div>
      <div className="grid gap-2">
        <FieldLabel
          label="Convert to"
          htmlFor={convertId}
          hint="Optionally cast sampled values before output."
        />
        <Select
          value={config.convert_to ?? "none"}
          onValueChange={(value) =>
            onUpdate({
              // biome-ignore lint/style/useNamingConvention: api schema
              convert_to: value === "none" ? undefined : (value as "int" | "float" | "str"),
            })
          }
        >
          <SelectTrigger className="nodrag w-full" id={convertId}>
            <SelectValue placeholder="No conversion" />
          </SelectTrigger>
          <SelectContent>
            <SelectItem value="none">None</SelectItem>
            <SelectItem value="int">int</SelectItem>
            <SelectItem value="float">float</SelectItem>
            <SelectItem value="str">str</SelectItem>
          </SelectContent>
        </Select>
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/samplers/person-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Input } from "@/components/ui/input";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import { type ReactElement, useEffect } from "react";
import type { SamplerConfig } from "../../types";
import { NameField } from "../shared/name-field";
import { FieldLabel } from "../shared/field-label";

type PersonDialogProps = {
  config: SamplerConfig;
  onUpdate: (patch: Partial<SamplerConfig>) => void;
};

export function PersonDialog({
  config,
  onUpdate,
}: PersonDialogProps): ReactElement {
  const localeId = `${config.id}-person-locale`;
  const sexId = `${config.id}-person-sex`;
  const ageRangeId = `${config.id}-person-age-range`;
  const cityId = `${config.id}-person-city`;

  const updateField = <K extends keyof SamplerConfig>(
    key: K,
    value: SamplerConfig[K],
  ) => {
    onUpdate({ [key]: value } as Partial<SamplerConfig>);
  };

  useEffect(() => {
    if (config.sampler_type !== "person_from_faker") {
      onUpdate({
        sampler_type: "person_from_faker",
        person_with_synthetic_personas: undefined,
      });
    }
  }, [config.sampler_type, onUpdate]);

  return (
    <div className="space-y-4">
      <NameField
        value={config.name}
        onChange={(value) => onUpdate({ name: value })}
      />
      <div className="grid gap-3">
        <div className="rounded-2xl border border-border/60 px-3 py-2">
          <p className="text-xs font-semibold uppercase text-muted-foreground">
            Source
          </p>
          <p className="text-sm text-foreground">Faker</p>
        </div>
        <div className="grid gap-3 sm:grid-cols-2">
          <div className="grid gap-2">
            <FieldLabel
              label="Locale"
              htmlFor={localeId}
              hint="Faker locale e.g. en_US."
            />
            <Input
              id={localeId}
              className="nodrag"
              value={config.person_locale ?? ""}
              onChange={(event) =>
                updateField("person_locale", event.target.value)
              }
            />
          </div>
          <div className="grid gap-2">
            <FieldLabel
              label="Sex"
              htmlFor={sexId}
              hint="Optional demographic filter."
            />
            <Select
              value={config.person_sex?.trim() ? config.person_sex : "any"}
              onValueChange={(value) =>
                updateField("person_sex", value === "any" ? "" : value)
              }
            >
              <SelectTrigger className="nodrag w-full" id={sexId}>
                <SelectValue placeholder="Any" />
              </SelectTrigger>
              <SelectContent>
                <SelectItem value="any">Any</SelectItem>
                <SelectItem value="Male">Male</SelectItem>
                <SelectItem value="Female">Female</SelectItem>
              </SelectContent>
            </Select>
          </div>
          <div className="grid gap-2">
            <FieldLabel
              label="Age range"
              htmlFor={ageRangeId}
              hint="Range format: min-max, e.g. 18-70."
            />
            <Input
              id={ageRangeId}
              className="nodrag"
              value={config.person_age_range ?? ""}
              onChange={(event) =>
                updateField("person_age_range", event.target.value)
              }
              placeholder="18-70"
            />
          </div>
          <div className="grid gap-2">
            <FieldLabel
              label="City"
              htmlFor={cityId}
              hint="Optional city bias for faker generation."
            />
            <Input
              id={cityId}
              className="nodrag"
              value={config.person_city ?? ""}
              onChange={(event) =>
                updateField("person_city", event.target.value)
              }
            />
          </div>
        </div>
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/samplers/subcategory-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import { type ReactElement, useCallback, useEffect, useMemo } from "react";
import type { SamplerConfig } from "../../types";
import { ChipInput } from "../../components/chip-input";
import { NameField } from "../shared/name-field";
import { FieldLabel } from "../shared/field-label";

type SubcategoryDialogProps = {
  config: SamplerConfig;
  categoryOptions: SamplerConfig[];
  onUpdate: (patch: Partial<SamplerConfig>) => void;
};

export function SubcategoryDialog({
  config,
  categoryOptions,
  onUpdate,
}: SubcategoryDialogProps): ReactElement {
  const parentSelectId = `${config.id}-parent-category`;
  const updateField = useCallback(
    <K extends keyof SamplerConfig>(key: K, value: SamplerConfig[K]) => {
      onUpdate({ [key]: value } as Partial<SamplerConfig>);
    },
    [onUpdate],
  );
  const parent = useMemo(
    () =>
      categoryOptions.find(
        (option) => option.name === config.subcategory_parent,
      ) ?? null,
    [categoryOptions, config.subcategory_parent],
  );
  const categoryValues = parent?.values ?? [];
  const mapping = config.subcategory_mapping ?? {};

  const ensureMapping = useCallback(
    (nextParent?: SamplerConfig | null) => {
      const values = nextParent?.values ?? [];
      const nextMapping: Record<string, string[]> = {};
      for (const value of values) {
        nextMapping[value] = config.subcategory_mapping?.[value] ?? [];
      }
      const currentKeys = Object.keys(config.subcategory_mapping ?? {});
      const nextKeys = Object.keys(nextMapping);
      const changed =
        currentKeys.length !== nextKeys.length ||
        currentKeys.some((key) => !nextKeys.includes(key));
      if (changed) {
        updateField("subcategory_mapping", nextMapping);
      }
    },
    [config.subcategory_mapping, updateField],
  );

  useEffect(() => {
    if (parent) {
      ensureMapping(parent);
    }
  }, [ensureMapping, parent]);

  return (
    <div className="space-y-4">
      <NameField
        value={config.name}
        onChange={(value) => onUpdate({ name: value })}
      />
      <div className="space-y-3">
        <div className="grid gap-2">
          <FieldLabel
            label="Parent category column"
            htmlFor={parentSelectId}
            hint="Category column this block maps from."
          />
          <Select
            value={config.subcategory_parent ?? ""}
            onValueChange={(value) => {
              const nextParent =
                categoryOptions.find((option) => option.name === value) ?? null;
              updateField("subcategory_parent", value);
              ensureMapping(nextParent);
            }}
          >
            <SelectTrigger className="nodrag w-full" id={parentSelectId}>
              <SelectValue placeholder="Select category column" />
            </SelectTrigger>
            <SelectContent>
              {categoryOptions.map((option) => (
                <SelectItem key={option.id} value={option.name}>
                  {option.name}
                </SelectItem>
              ))}
            </SelectContent>
          </Select>
          <p className="text-xs text-muted-foreground">
            Map each parent category value to its subcategory options below.
          </p>
        </div>
        {categoryValues.length > 0 && (
          <div className="grid gap-4">
            {categoryValues.map((value) => (
              <div key={value}>
                <div className="mb-2 flex items-center justify-between gap-2">
                  <p className="text-sm font-semibold text-foreground">
                    {value}
                  </p>
                  <span className="text-xs text-muted-foreground">
                    {mapping[value]?.length ?? 0} subvalues
                  </span>
                </div>
                <ChipInput
                  values={mapping[value] ?? []}
                  onAdd={(item) => {
                    const next = { ...mapping };
                    const list = next[value] ? [...next[value]] : [];
                    list.push(item);
                    next[value] = list;
                    updateField("subcategory_mapping", next);
                  }}
                  onRemove={(index) => {
                    const next = { ...mapping };
                    const list = [...(next[value] ?? [])];
                    list.splice(index, 1);
                    next[value] = list;
                    updateField("subcategory_mapping", next);
                  }}
                  placeholder="Type subcategory and press Enter"
                />
                {(mapping[value] ?? []).length === 0 && (
                  <p className="mt-2 text-xs text-rose-500">
                    Add at least 1 subcategory.
                  </p>
                )}
              </div>
            ))}
          </div>
        )}
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/samplers/timedelta-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Input } from "@/components/ui/input";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import type { ReactElement } from "react";
import type { SamplerConfig } from "../../types";
import { NameField } from "../shared/name-field";
import { FieldLabel } from "../shared/field-label";

const TIMEDELTA_UNITS: Array<"D" | "h" | "m" | "s"> = ["D", "h", "m", "s"];
const NONE_VALUE = "__none";

type TimedeltaDialogProps = {
  config: SamplerConfig;
  datetimeOptions: string[];
  onUpdate: (patch: Partial<SamplerConfig>) => void;
};

export function TimedeltaDialog({
  config,
  datetimeOptions,
  onUpdate,
}: TimedeltaDialogProps): ReactElement {
  const dtMinId = `${config.id}-timedelta-min`;
  const dtMaxId = `${config.id}-timedelta-max`;
  const unitId = `${config.id}-timedelta-unit`;
  const referenceId = `${config.id}-timedelta-reference`;
  const updateField = <K extends keyof SamplerConfig>(
    key: K,
    value: SamplerConfig[K],
  ) => {
    onUpdate({ [key]: value } as Partial<SamplerConfig>);
  };
  return (
    <div className="space-y-4">
      <NameField
        value={config.name}
        onChange={(value) => onUpdate({ name: value })}
      />
      <div className="grid gap-3 sm:grid-cols-2">
        <div className="grid gap-2">
          <FieldLabel
            label="dt_min"
            htmlFor={dtMinId}
            hint="Minimum offset from reference datetime."
          />
          <Input
            id={dtMinId}
            type="number"
            className="nodrag"
            value={config.dt_min ?? ""}
            onChange={(event) => updateField("dt_min", event.target.value)}
          />
        </div>
        <div className="grid gap-2">
          <FieldLabel
            label="dt_max"
            htmlFor={dtMaxId}
            hint="Maximum offset from reference datetime."
          />
          <Input
            id={dtMaxId}
            type="number"
            className="nodrag"
            value={config.dt_max ?? ""}
            onChange={(event) => updateField("dt_max", event.target.value)}
          />
        </div>
      </div>
      <div className="grid gap-2">
        <FieldLabel
          label="Unit"
          htmlFor={unitId}
          hint="Offset unit. D/h/m/s."
        />
        <Select
          value={config.timedelta_unit ?? "D"}
          onValueChange={(value) =>
            updateField("timedelta_unit", value as "D" | "h" | "m" | "s")
          }
        >
          <SelectTrigger className="nodrag w-full" id={unitId}>
            <SelectValue placeholder="Select unit" />
          </SelectTrigger>
          <SelectContent>
            {TIMEDELTA_UNITS.map((unit) => (
              <SelectItem key={unit} value={unit}>
                {unit}
              </SelectItem>
            ))}
          </SelectContent>
        </Select>
      </div>
      <div className="grid gap-2">
        <FieldLabel
          label="Reference datetime column"
          htmlFor={referenceId}
          hint="Datetime column used as anchor before offset."
        />
        <Select
          value={config.reference_column_name?.trim() || NONE_VALUE}
          onValueChange={(value) =>
            updateField("reference_column_name", value === NONE_VALUE ? "" : value)
          }
        >
          <SelectTrigger className="nodrag w-full" id={referenceId}>
            <SelectValue placeholder="Select datetime column" />
          </SelectTrigger>
          <SelectContent>
            <SelectItem value={NONE_VALUE}>None</SelectItem>
            {datetimeOptions.map((name) => (
              <SelectItem key={name} value={name}>
                {name}
              </SelectItem>
            ))}
          </SelectContent>
        </Select>
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/samplers/uniform-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Input } from "@/components/ui/input";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import type { ReactElement } from "react";
import type { SamplerConfig } from "../../types";
import { NameField } from "../shared/name-field";
import { FieldLabel } from "../shared/field-label";

type UniformDialogProps = {
  config: SamplerConfig;
  onUpdate: (patch: Partial<SamplerConfig>) => void;
};

export function UniformDialog({
  config,
  onUpdate,
}: UniformDialogProps): ReactElement {
  const lowId = `${config.id}-uniform-low`;
  const highId = `${config.id}-uniform-high`;
  const convertId = `${config.id}-uniform-convert`;
  return (
    <div className="space-y-4">
      <NameField
        value={config.name}
        onChange={(value) => onUpdate({ name: value })}
      />
      <div className="grid gap-3 sm:grid-cols-2">
        <div className="grid gap-2">
          <FieldLabel
            label="Low"
            htmlFor={lowId}
            hint="Minimum sampled value."
          />
          <Input
            id={lowId}
            type="number"
            className="nodrag"
            value={config.low ?? ""}
            onChange={(event) => onUpdate({ low: event.target.value })}
          />
        </div>
        <div className="grid gap-2">
          <FieldLabel
            label="High"
            htmlFor={highId}
            hint="Maximum sampled value."
          />
          <Input
            id={highId}
            type="number"
            className="nodrag"
            value={config.high ?? ""}
            onChange={(event) => onUpdate({ high: event.target.value })}
          />
        </div>
      </div>
      <div className="grid gap-2">
        <FieldLabel
          label="Convert to"
          htmlFor={convertId}
          hint="Optionally cast sampled values before output."
        />
        <Select
          value={config.convert_to ?? "none"}
          onValueChange={(value) =>
            onUpdate({
              // biome-ignore lint/style/useNamingConvention: api schema
              convert_to: value === "none" ? undefined : (value as "int" | "float" | "str"),
            })
          }
        >
          <SelectTrigger className="nodrag w-full" id={convertId}>
            <SelectValue placeholder="No conversion" />
          </SelectTrigger>
          <SelectContent>
            <SelectItem value="none">None</SelectItem>
            <SelectItem value="int">int</SelectItem>
            <SelectItem value="float">float</SelectItem>
            <SelectItem value="str">str</SelectItem>
          </SelectContent>
        </Select>
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/samplers/uuid-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Input } from "@/components/ui/input";
import type { ReactElement } from "react";
import type { SamplerConfig } from "../../types";
import { NameField } from "../shared/name-field";
import { FieldLabel } from "../shared/field-label";

type UuidDialogProps = {
  config: SamplerConfig;
  onUpdate: (patch: Partial<SamplerConfig>) => void;
};

export function UuidDialog({
  config,
  onUpdate,
}: UuidDialogProps): ReactElement {
  const uuidId = `${config.id}-uuid-format`;
  const updateField = <K extends keyof SamplerConfig>(
    key: K,
    value: SamplerConfig[K],
  ) => {
    onUpdate({ [key]: value } as Partial<SamplerConfig>);
  };
  return (
    <div className="space-y-4">
      <NameField
        value={config.name}
        onChange={(value) => onUpdate({ name: value })}
      />
      <div className="grid gap-2">
        <FieldLabel
          label="UUID format (optional)"
          htmlFor={uuidId}
          hint="Optional formatter e.g. prefix:, short, uppercase."
        />
        <Input
          id={uuidId}
          className="nodrag"
          value={config.uuid_format ?? ""}
          onChange={(event) => updateField("uuid_format", event.target.value)}
        />
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/seed/seed-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import { Checkbox } from "@/components/ui/checkbox";
import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/components/ui/collapsible";
import {
  Empty,
  EmptyContent,
  EmptyDescription,
  EmptyHeader,
  EmptyTitle,
} from "@/components/ui/empty";
import { Input } from "@/components/ui/input";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import {
  Table,
  TableBody,
  TableCell,
  TableHead,
  TableHeader,
  TableRow,
} from "@/components/ui/table";
import {
  Tabs,
  TabsContent,
  TabsList,
  TabsTrigger,
} from "@/components/ui/tabs";
import mammoth from "mammoth";
import { type ReactElement, useCallback, useEffect, useMemo, useRef, useState } from "react";
import { extractText, getDocumentProxy } from "unpdf";
import { cn } from "@/lib/utils";
import { inspectSeedDataset, inspectSeedUpload } from "../../api";
import { resolveImagePreview } from "../../utils/image-preview";
import type {
  SeedConfig,
  SeedSamplingStrategy,
  SeedSelectionType,
} from "../../types";
import { CollapsibleSectionTriggerButton } from "../shared/collapsible-section-trigger";
import { HfDatasetCombobox } from "../../components/shared/hf-dataset-combobox";
import { FieldLabel } from "../shared/field-label";

const SAMPLING_OPTIONS: Array<{ value: SeedSamplingStrategy; label: string }> = [
  { value: "ordered", label: "Ordered" },
  { value: "shuffle", label: "Shuffle" },
];

const SELECTION_OPTIONS: Array<{ value: SeedSelectionType; label: string }> = [
  { value: "none", label: "None" },
  { value: "index_range", label: "Index range" },
  { value: "partition_block", label: "Partition block" },
];

const LOCAL_ACCEPT = ".csv,.json,.jsonl";
const UNSTRUCTURED_ACCEPT = ".txt,.pdf,.docx";
const MAX_UPLOAD_BYTES = 50 * 1024 * 1024;
const DEFAULT_CHUNK_SIZE = 1200;
const DEFAULT_CHUNK_OVERLAP = 200;
const MAX_CHUNK_SIZE = 20000;
const PREVIEW_TRUNCATE_AT = 320;

type SeedDialogProps = {
  config: SeedConfig;
  onUpdate: (patch: Partial<SeedConfig>) => void;
  open: boolean;
};

function getErrorMessage(error: unknown, fallback: string): string {
  if (error instanceof Error && error.message) {
    return error.message;
  }
  return fallback;
}

function stringifyCell(value: unknown): string {
  if (value === null || value === undefined) return "";
  if (typeof value === "string") return value;
  if (typeof value === "number" || typeof value === "boolean") return String(value);
  try {
    return JSON.stringify(value);
  } catch {
    return String(value);
  }
}

function isExpandablePreviewValue(value: string): boolean {
  return value.length > PREVIEW_TRUNCATE_AT;
}

function truncatePreviewValue(value: string): string {
  if (!isExpandablePreviewValue(value)) {
    return value;
  }
  return `${value.slice(0, PREVIEW_TRUNCATE_AT)}…`;
}

function getPreviewEmptyStateCopy(mode: SeedConfig["seed_source_type"]): {
  title: string;
  description: string;
} {
  if (mode === "local") {
    return {
      title: "No local preview yet",
      description: "Choose a CSV/JSON/JSONL file, then click Load to fetch 10 rows.",
    };
  }
  if (mode === "unstructured") {
    return {
      title: "No chunk preview yet",
      description:
        "Choose a TXT/PDF/DOCX file, then click Load to extract + preview chunk_text rows.",
    };
  }
  return {
    title: "No dataset preview yet",
    description: "Pick a Hugging Face dataset and click Load to fetch 10 sample rows.",
  };
}

function parseChunkNumber(
  value: string | undefined,
  fallback: number,
  min: number,
  max: number,
): number {
  const raw = value?.trim();
  if (!raw) return fallback;
  const parsed = Number(raw);
  if (!Number.isFinite(parsed)) return fallback;
  const int = Math.floor(parsed);
  if (int < min) return min;
  if (int > max) return max;
  return int;
}

function resolveChunking(config: SeedConfig): {
  chunkSize: number;
  chunkOverlap: number;
} {
  const chunkSize = parseChunkNumber(
    config.unstructured_chunk_size,
    DEFAULT_CHUNK_SIZE,
    1,
    MAX_CHUNK_SIZE,
  );
  const chunkOverlap = parseChunkNumber(
    config.unstructured_chunk_overlap,
    DEFAULT_CHUNK_OVERLAP,
    0,
    Math.max(0, chunkSize - 1),
  );
  return { chunkSize, chunkOverlap };
}

async function fileToBase64Payload(file: File): Promise<string> {
  return new Promise((resolve, reject) => {
    const reader = new FileReader();
    reader.onload = () => {
      const value = String(reader.result ?? "");
      const parts = value.split(",");
      resolve(parts.length > 1 ? parts[1] : value);
    };
    reader.onerror = () => reject(new Error("Failed to read file"));
    reader.readAsDataURL(file);
  });
}

async function extractUnstructuredText(file: File): Promise<string> {
  const lower = file.name.toLowerCase();
  if (lower.endsWith(".txt")) {
    return file.text();
  }
  if (lower.endsWith(".pdf")) {
    const buffer = new Uint8Array(await file.arrayBuffer());
    const pdf = await getDocumentProxy(buffer);
    const { text } = await extractText(pdf, { mergePages: true });
    return text;
  }
  if (lower.endsWith(".docx")) {
    const arrayBuffer = await file.arrayBuffer();
    const { value } = await mammoth.extractRawText({ arrayBuffer });
    return value;
  }
  throw new Error("Unsupported unstructured file type");
}

async function toUnstructuredUploadFile(file: File): Promise<File> {
  const lower = file.name.toLowerCase();
  if (lower.endsWith(".txt") || lower.endsWith(".md")) {
    return file;
  }

  const text = (await extractUnstructuredText(file)).trim();
  if (!text) {
    throw new Error("No text found in file.");
  }
  const normalized = text.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
  const stem = file.name.replace(/\.(pdf|docx)$/i, "") || "unstructured_seed";
  return new File([normalized], `${stem}.txt`, {
    type: "text/plain",
  });
}

export function SeedDialog({ config, onUpdate, open }: SeedDialogProps): ReactElement {
  const [inspectError, setInspectError] = useState<string | null>(null);
  const [isInspecting, setIsInspecting] = useState(false);
  const advancedOpen = config.advancedOpen === true;
  const [previewRows, setPreviewRows] = useState<Record<string, unknown>[]>([]);
  const [expandedPreviewRows, setExpandedPreviewRows] = useState<Record<number, boolean>>({});
  const [localFile, setLocalFile] = useState<File | null>(null);
  const [unstructuredFile, setUnstructuredFile] = useState<File | null>(null);

  const mode = config.seed_source_type ?? "hf";
  const previewEmpty = getPreviewEmptyStateCopy(mode);

  useEffect(() => {
    setInspectError(null);
    setLocalFile(null);
    setUnstructuredFile(null);
  }, [mode]);

  useEffect(() => {
    setPreviewRows(config.seed_preview_rows ?? []);
    setExpandedPreviewRows({});
  }, [config.seed_preview_rows]);

  const samplingId = `${config.id}-sampling`;
  const selectionId = `${config.id}-selection`;
  const tokenId = `${config.id}-hf-token`;
  const datasetId = `${config.id}-hf-dataset`;
  const chunkSizeId = `${config.id}-chunk-size`;
  const chunkOverlapId = `${config.id}-chunk-overlap`;
  const [lastLoadedKey, setLastLoadedKey] = useState<string | null>(null);
  const wasOpenRef = useRef(open);

  const getCurrentLoadKey = useCallback((): string | null => {
    if (mode === "hf") {
      const dataset = config.hf_repo_id.trim();
      if (!dataset) return null;
      const token = config.hf_token?.trim() ?? "";
      return `hf:${dataset}|${token}`;
    }
    if (mode === "local") {
      if (!localFile) return null;
      return `local:${localFile.name}|${localFile.size}|${localFile.lastModified}`;
    }
    if (!unstructuredFile) return null;
    const { chunkSize, chunkOverlap } = resolveChunking(config);
    return `unstructured:${unstructuredFile.name}|${unstructuredFile.size}|${unstructuredFile.lastModified}|${chunkSize}|${chunkOverlap}`;
  }, [
    config,
    localFile,
    mode,
    unstructuredFile,
  ]);

  const loadSeedMetadata = useCallback(async (opts?: { silent?: boolean }): Promise<boolean> => {
    const loadKey = getCurrentLoadKey();
    if (!opts?.silent) {
      setInspectError(null);
    }
    setIsInspecting(true);
    try {
      if (mode === "hf") {
        const datasetName = config.hf_repo_id.trim();
        if (!datasetName) {
          throw new Error("Dataset repo is required.");
        }
        const response = await inspectSeedDataset({
          dataset_name: datasetName,
          hf_token: config.hf_token?.trim() || undefined,
          split: config.hf_split?.trim() || undefined,
          subset: config.hf_subset?.trim() || undefined,
          preview_size: 10,
        });
        onUpdate({
          hf_path: response.resolved_path,
          seed_columns: response.columns,
          seed_drop_columns: (config.seed_drop_columns ?? []).filter((name) =>
            response.columns.includes(name),
          ),
          seed_preview_rows: response.preview_rows ?? [],
          hf_split: response.split ?? "",
          hf_subset: response.subset ?? "",
          local_file_name: "",
          unstructured_file_name: "",
        });
        setPreviewRows(response.preview_rows ?? []);
        setLastLoadedKey(loadKey);
        return true;
      }

      if (mode === "local") {
        if (!localFile) {
          throw new Error("Select a local CSV/JSON/JSONL file first.");
        }
        if (localFile.size > MAX_UPLOAD_BYTES) {
          throw new Error("File too large (max 50MB).");
        }
        const payload = await fileToBase64Payload(localFile);
        const response = await inspectSeedUpload({
          filename: localFile.name,
          content_base64: payload,
          preview_size: 10,
        });
        onUpdate({
          hf_path: response.resolved_path,
          seed_columns: response.columns,
          seed_drop_columns: (config.seed_drop_columns ?? []).filter((name) =>
            response.columns.includes(name),
          ),
          seed_preview_rows: response.preview_rows ?? [],
          hf_repo_id: "",
          hf_subset: "",
          hf_split: "",
          local_file_name: localFile.name,
          unstructured_file_name: "",
        });
        setPreviewRows(response.preview_rows ?? []);
        setLastLoadedKey(loadKey);
        return true;
      }

      if (!unstructuredFile) {
        throw new Error("Select a PDF/DOCX/TXT file first.");
      }
      if (unstructuredFile.size > MAX_UPLOAD_BYTES) {
        throw new Error("File too large (max 50MB).");
      }

      const { chunkSize, chunkOverlap } = resolveChunking(config);
      const uploadFile = await toUnstructuredUploadFile(unstructuredFile);
      if (uploadFile.size > MAX_UPLOAD_BYTES) {
        throw new Error("Processed text is too large (max 50MB).");
      }
      const payload = await fileToBase64Payload(uploadFile);
      const response = await inspectSeedUpload({
        filename: uploadFile.name,
        content_base64: payload,
        preview_size: 10,
        seed_source_type: "unstructured",
        unstructured_chunk_size: chunkSize,
        unstructured_chunk_overlap: chunkOverlap,
      });
      onUpdate({
        hf_path: response.resolved_path,
        seed_columns: response.columns,
        seed_drop_columns: (config.seed_drop_columns ?? []).filter((name) =>
          response.columns.includes(name),
        ),
        seed_preview_rows: response.preview_rows ?? [],
        hf_repo_id: "",
        hf_subset: "",
        hf_split: "",
        local_file_name: "",
        unstructured_file_name: unstructuredFile.name,
      });
      setPreviewRows(response.preview_rows ?? []);
      setLastLoadedKey(loadKey);
      return true;
    } catch (error) {
      if (!opts?.silent) {
        setInspectError(getErrorMessage(error, "Failed to load seed metadata."));
      }
      setPreviewRows([]);
      return false;
    } finally {
      setIsInspecting(false);
    }
  }, [
    config,
    getCurrentLoadKey,
    localFile,
    mode,
    onUpdate,
    unstructuredFile,
  ]);

  useEffect(() => {
    const wasOpen = wasOpenRef.current;
    wasOpenRef.current = open;
    if (!wasOpen || open || isInspecting) {
      return;
    }
    const key = getCurrentLoadKey();
    if (!key || key === lastLoadedKey) {
      return;
    }
    void loadSeedMetadata({ silent: true });
  }, [getCurrentLoadKey, isInspecting, lastLoadedKey, loadSeedMetadata, open]);

  const previewColumns = useMemo(() => {
    const loadedColumns = config.seed_columns ?? [];
    if (loadedColumns.length > 0) return loadedColumns;
    if (previewRows[0]) return Object.keys(previewRows[0]);
    return [];
  }, [config.seed_columns, previewRows]);
  const selectedSeedDropColumns = useMemo(
    () => (config.seed_drop_columns ?? []).filter((name) => name.trim().length > 0),
    [config.seed_drop_columns],
  );
  const selectedSeedDropSet = useMemo(
    () => new Set(selectedSeedDropColumns),
    [selectedSeedDropColumns],
  );
  const rowHasExpandableText = useCallback(
    (row: Record<string, unknown>): boolean =>
      previewColumns.some((columnName) => {
        if (resolveImagePreview(row[columnName])) {
          return false;
        }
        return isExpandablePreviewValue(stringifyCell(row[columnName]));
      }),
    [previewColumns],
  );

  return (
    <Tabs defaultValue="config" className="w-full min-w-0">
      <TabsList className="w-full">
        <TabsTrigger value="config">Config</TabsTrigger>
        <TabsTrigger value="preview">Preview</TabsTrigger>
      </TabsList>

      <TabsContent value="config" className="min-w-0 pt-3">
        <div className="space-y-4">
          {mode === "hf" && (
            <>
              <div className="grid gap-2">
                <FieldLabel
                  label="Dataset"
                  htmlFor={datasetId}
                  hint="Hugging Face dataset repo id (org/repo)."
                />
                <div className="flex items-center gap-2">
                  <HfDatasetCombobox
                    inputId={datasetId}
                    className="flex-1"
                    value={config.hf_repo_id}
                    accessToken={config.hf_token?.trim() || undefined}
                    placeholder="org/repo"
                    onValueChange={(nextValue) =>
                      onUpdate({
                        hf_repo_id: nextValue,
                        hf_subset: "",
                        hf_split: "",
                        hf_path: "",
                        seed_columns: [],
                        seed_drop_columns: [],
                        seed_preview_rows: [],
                      })
                    }
                  />
                  <Button
                    type="button"
                    variant="outline"
                    className="nodrag shrink-0"
                    onClick={() => void loadSeedMetadata()}
                    disabled={isInspecting || !config.hf_repo_id.trim()}
                  >
                    {isInspecting ? "Loading..." : "Load"}
                  </Button>
                </div>
              </div>

              <div className="grid gap-2">
                <FieldLabel
                  label="HF token (optional)"
                  htmlFor={tokenId}
                  hint="Only needed for private/gated datasets."
                />
                <Input
                  id={tokenId}
                  className="nodrag"
                  placeholder="hf_..."
                  value={config.hf_token ?? ""}
                  onChange={(event) => onUpdate({ hf_token: event.target.value })}
                />
              </div>

            </>
          )}

          {mode === "local" && (
            <div className="grid gap-2">
              <FieldLabel
                label="Structured file"
                hint="Upload CSV, JSON, or JSONL seed file."
              />
              <div className="flex items-center gap-2">
                <Input
                  className="nodrag flex-1"
                  type="file"
                  accept={LOCAL_ACCEPT}
                  onChange={(event) => {
                    const file = event.target.files?.[0] ?? null;
                    setLocalFile(file);
                    onUpdate({
                      hf_path: "",
                      seed_columns: [],
                      seed_drop_columns: [],
                      seed_preview_rows: [],
                      local_file_name: file?.name ?? "",
                    });
                  }}
                />
                <Button
                  type="button"
                  variant="outline"
                  className="nodrag shrink-0"
                  onClick={() => void loadSeedMetadata()}
                  disabled={isInspecting || !localFile}
                >
                  {isInspecting ? "Loading..." : "Load"}
                </Button>
              </div>
              <p className="text-xs text-muted-foreground">
                Upload-only. Max 50MB.
              </p>
              {(localFile?.name || config.local_file_name?.trim()) && (
                <p className="text-xs text-muted-foreground">
                  Selected: {localFile?.name ?? config.local_file_name?.trim()}
                </p>
              )}
            </div>
          )}

          {mode === "unstructured" && (
            <div className="grid gap-2">
              <FieldLabel
                label="Unstructured file"
                hint="Upload PDF, DOCX, or TXT. We chunk text into seed rows."
              />
              <div className="flex items-center gap-2">
                <Input
                  className="nodrag flex-1"
                  type="file"
                  accept={UNSTRUCTURED_ACCEPT}
                  onChange={(event) => {
                    const file = event.target.files?.[0] ?? null;
                    setUnstructuredFile(file);
                    onUpdate({
                      hf_path: "",
                      seed_columns: [],
                      seed_drop_columns: [],
                      seed_preview_rows: [],
                      unstructured_file_name: file?.name ?? "",
                    });
                  }}
                />
                <Button
                  type="button"
                  variant="outline"
                  className="nodrag shrink-0"
                  onClick={() => void loadSeedMetadata()}
                  disabled={isInspecting || !unstructuredFile}
                >
                  {isInspecting ? "Loading..." : "Load"}
                </Button>
              </div>
              <p className="text-xs text-muted-foreground">
                File is converted to text, then chunked server-side into chunk_text rows. Max 50MB.
              </p>
              {(unstructuredFile?.name ||
                config.unstructured_file_name?.trim()) && (
                <p className="text-xs text-muted-foreground">
                  Selected:{" "}
                  {unstructuredFile?.name ?? config.unstructured_file_name?.trim()}
                </p>
              )}
            </div>
          )}

          {inspectError && <p className="text-xs text-red-600">{inspectError}</p>}

          {mode !== "unstructured" && (
            <div className="space-y-2 rounded-xl corner-squircle border border-border/60 p-3">
              <FieldLabel
                label="Drop specific seed columns"
                hint="Dropped columns stay usable in prompts/expressions but are omitted from final dataset."
              />
              {previewColumns.length === 0 ? (
                <p className="text-xs text-muted-foreground">
                  Load columns to select which seed fields to drop.
                </p>
              ) : (
                <div className="grid gap-2 sm:grid-cols-2">
                  {previewColumns.map((columnName) => {
                    const checked = selectedSeedDropSet.has(columnName);
                    return (
                      <label
                        key={columnName}
                        className="flex cursor-pointer items-center gap-2 rounded-md border border-border/60 px-2 py-1.5 text-xs"
                      >
                        <Checkbox
                          checked={checked}
                          onCheckedChange={(value) => {
                            const isChecked = value === true;
                            const next = isChecked
                              ? Array.from(new Set([...selectedSeedDropColumns, columnName]))
                              : selectedSeedDropColumns.filter((name) => name !== columnName);
                            onUpdate({ seed_drop_columns: next });
                          }}
                        />
                        <span className="truncate">{columnName}</span>
                      </label>
                    );
                  })}
                </div>
              )}
            </div>
          )}

          <Collapsible
            open={advancedOpen}
            onOpenChange={(openState) => onUpdate({ advancedOpen: openState })}
          >
            <CollapsibleTrigger asChild={true}>
              <CollapsibleSectionTriggerButton
                label="Advanced source options"
                open={advancedOpen}
              />
            </CollapsibleTrigger>
            <CollapsibleContent className="mt-2 space-y-3">
              <div className="grid gap-2">
                <FieldLabel
                  label="Sampling strategy"
                  htmlFor={samplingId}
                  hint="Ordered keeps row order. Shuffle randomizes sampled rows."
                />
                <Select
                  value={config.sampling_strategy}
                  onValueChange={(value) =>
                    onUpdate({ sampling_strategy: value as SeedSamplingStrategy })
                  }
                >
                  <SelectTrigger className="nodrag w-full" id={samplingId}>
                    <SelectValue placeholder="Select sampling" />
                  </SelectTrigger>
                  <SelectContent>
                    {SAMPLING_OPTIONS.map((option) => (
                      <SelectItem key={option.value} value={option.value}>
                        {option.label}
                      </SelectItem>
                    ))}
                  </SelectContent>
                </Select>
              </div>

              <div className="grid gap-2">
                <FieldLabel
                  label="Selection strategy"
                  htmlFor={selectionId}
                  hint="Select all, a row range, or partition block."
                />
                <Select
                  value={config.selection_type}
                  onValueChange={(value) =>
                    onUpdate({ selection_type: value as SeedSelectionType })
                  }
                >
                  <SelectTrigger className="nodrag w-full" id={selectionId}>
                    <SelectValue placeholder="Select selection" />
                  </SelectTrigger>
                  <SelectContent>
                    {SELECTION_OPTIONS.map((option) => (
                      <SelectItem key={option.value} value={option.value}>
                        {option.label}
                      </SelectItem>
                    ))}
                  </SelectContent>
                </Select>
              </div>

              {mode === "unstructured" && (
                <div className="grid grid-cols-2 gap-3">
                  <div className="grid gap-2">
                    <FieldLabel
                      label="Chunk size"
                      htmlFor={chunkSizeId}
                      hint="Characters per chunk."
                    />
                    <Input
                      id={chunkSizeId}
                      className="nodrag"
                      inputMode="numeric"
                      value={config.unstructured_chunk_size ?? String(DEFAULT_CHUNK_SIZE)}
                      onChange={(event) =>
                        onUpdate({ unstructured_chunk_size: event.target.value })
                      }
                    />
                  </div>
                  <div className="grid gap-2">
                    <FieldLabel
                      label="Chunk overlap"
                      htmlFor={chunkOverlapId}
                      hint="Shared chars between adjacent chunks."
                    />
                    <Input
                      id={chunkOverlapId}
                      className="nodrag"
                      inputMode="numeric"
                      value={
                        config.unstructured_chunk_overlap ??
                        String(DEFAULT_CHUNK_OVERLAP)
                      }
                      onChange={(event) =>
                        onUpdate({ unstructured_chunk_overlap: event.target.value })
                      }
                    />
                  </div>
                </div>
              )}

              {config.selection_type === "index_range" && (
                <div className="grid grid-cols-2 gap-3">
                  <div className="grid gap-2">
                    <FieldLabel label="Start" hint="Inclusive start row index for index_range." />
                    <Input
                      className="nodrag"
                      inputMode="numeric"
                      value={config.selection_start ?? ""}
                      onChange={(event) => onUpdate({ selection_start: event.target.value })}
                    />
                  </div>
                  <div className="grid gap-2">
                    <FieldLabel label="End" hint="Inclusive end row index for index_range." />
                    <Input
                      className="nodrag"
                      inputMode="numeric"
                      value={config.selection_end ?? ""}
                      onChange={(event) => onUpdate({ selection_end: event.target.value })}
                    />
                  </div>
                </div>
              )}

              {config.selection_type === "partition_block" && (
                <div className="grid grid-cols-2 gap-3">
                  <div className="grid gap-2">
                    <FieldLabel label="Index" hint="Partition index to load." />
                    <Input
                      className="nodrag"
                      inputMode="numeric"
                      value={config.selection_index ?? ""}
                      onChange={(event) => onUpdate({ selection_index: event.target.value })}
                    />
                  </div>
                  <div className="grid gap-2">
                    <FieldLabel label="Partitions" hint="Total number of partitions." />
                    <Input
                      className="nodrag"
                      inputMode="numeric"
                      value={config.selection_num_partitions ?? ""}
                      onChange={(event) =>
                        onUpdate({ selection_num_partitions: event.target.value })
                      }
                    />
                  </div>
                </div>
              )}
            </CollapsibleContent>
          </Collapsible>
        </div>
      </TabsContent>

      <TabsContent value="preview" className="min-w-0 pt-3">
        <div className="space-y-4">
          {previewRows.length === 0 ? (
            <div className="flex w-full items-center justify-center">
              <Empty className="max-w-lg">
                <EmptyHeader>
                  <EmptyTitle>{previewEmpty.title}</EmptyTitle>
                  <EmptyDescription>
                    {previewEmpty.description}
                  </EmptyDescription>
                </EmptyHeader>
                <EmptyContent className="text-xs text-muted-foreground">
                  Preview appears here after loading source metadata.
                </EmptyContent>
              </Empty>
            </div>
          ) : (
            <div className="space-y-2">
              <div className="text-xs text-muted-foreground">
                Loaded columns: {previewColumns.join(", ") || "None"}
              </div>
              <div className="max-h-[360px] overflow-y-auto overflow-x-hidden rounded-xl corner-squircle border border-border/60">
                <Table className="corner-squircle min-w-max">
                  <TableHeader>
                    <TableRow>
                      {previewColumns.map((col) => (
                        <TableHead key={col} className="whitespace-nowrap">
                          {col}
                        </TableHead>
                      ))}
                    </TableRow>
                  </TableHeader>
                  <TableBody>
                    {previewRows.map((row, rowIdx) => (
                      <TableRow
                        key={`row-${rowIdx}`}
                        className={cn(
                          rowHasExpandableText(row) && "cursor-pointer hover:bg-primary/[0.06]",
                          expandedPreviewRows[rowIdx] && "bg-primary/[0.05]",
                        )}
                        onClick={() => {
                          const canExpand = rowHasExpandableText(row);
                          if (!canExpand) {
                            return;
                          }
                          setExpandedPreviewRows((current) => ({
                            ...current,
                            [rowIdx]: !current[rowIdx],
                          }));
                        }}
                      >
                        {previewColumns.map((col) => (
                          <TableCell
                            key={`${rowIdx}-${col}`}
                            className="max-w-[260px] whitespace-pre-wrap break-words text-xs"
                          >
                            {(() => {
                              const imagePreview = resolveImagePreview(row[col]);
                              if (imagePreview?.kind === "ready") {
                                return (
                                  <img
                                    src={imagePreview.src}
                                    alt={`${col} preview`}
                                    loading="lazy"
                                    className="h-20 w-auto max-w-[220px] rounded-md border border-border/60 bg-muted/20 object-contain"
                                  />
                                );
                              }
                              if (imagePreview?.kind === "too_large") {
                                return "Image too large to preview";
                              }
                              const value = stringifyCell(row[col]);
                              const rowHasExpandableCell = rowHasExpandableText(row);
                              const rowExpanded = Boolean(expandedPreviewRows[rowIdx]);
                              return rowHasExpandableCell && !rowExpanded
                                ? truncatePreviewValue(value)
                                : value;
                            })()}
                          </TableCell>
                        ))}
                      </TableRow>
                    ))}
                  </TableBody>
                </Table>
              </div>
            </div>
          )}
        </div>
      </TabsContent>
    </Tabs>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/shared/available-variables.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Badge } from "@/components/ui/badge";
import { ArrowDown01Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { type ReactElement, useMemo, useState } from "react";
import { useRecipeStudioStore } from "../../stores/recipe-studio";
import { getAvailableVariableEntries } from "../../utils/variables";
import { RECIPE_STUDIO_REFERENCE_BADGE_TONES } from "../../utils/ui-tones";

type AvailableVariablesProps = {
  configId: string;
};

const USER_EXPANDED_FIELDS = [
  "first_name",
  "last_name",
  "sex",
  "city",
  "state",
  "age",
] as const;
export function AvailableVariables({
  configId,
}: AvailableVariablesProps): ReactElement | null {
  const [showUserFields, setShowUserFields] = useState(false);
  const configs = useRecipeStudioStore((state) => state.configs);
  const vars = getAvailableVariableEntries(configs, configId);
  const variableNames = useMemo(() => new Set(vars.map((entry) => entry.name)), [vars]);
  const hasUserRoot = variableNames.has("user");
  const userFieldEntries = useMemo(
    () =>
      USER_EXPANDED_FIELDS.map((field) => ({
        source: "column" as const,
        name: `user.${field}`,
      })).filter((entry) => !variableNames.has(entry.name)),
    [variableNames],
  );

  if (vars.length === 0) return null;

  return (
    <div className="corner-squircle rounded-2xl border border-border/60 px-3 py-2">
      <p className="mb-2 text-xs font-semibold uppercase text-muted-foreground">
        Available references
      </p>
      <div className="flex flex-wrap gap-1.5">
        {vars.map((v) => {
          const className =
            v.name === "user" || v.name.startsWith("user.")
              ? RECIPE_STUDIO_REFERENCE_BADGE_TONES.user
              : v.source === "seed"
                ? RECIPE_STUDIO_REFERENCE_BADGE_TONES.seed
                : RECIPE_STUDIO_REFERENCE_BADGE_TONES.default;
          if (v.name !== "user") {
            return (
              <Badge
                key={`${v.source}:${v.name}`}
                variant="secondary"
                className={className}
              >
                {`{{ ${v.name} }}`}
              </Badge>
            );
          }
          return (
            <button
              key={`${v.source}:${v.name}`}
              type="button"
              onClick={() => setShowUserFields((prev) => !prev)}
              className="cursor-pointer"
              aria-expanded={showUserFields}
              aria-label={showUserFields ? "Hide user fields" : "Show user fields"}
            >
              <Badge variant="secondary" className={className}>
                <span>{`{{ ${v.name} }}`}</span>
                <HugeiconsIcon
                  icon={ArrowDown01Icon}
                  className={`size-3 transition-transform ${showUserFields ? "rotate-180" : ""}`}
                />
              </Badge>
            </button>
          );
        })}
        {hasUserRoot && showUserFields &&
          userFieldEntries.map((entry) => (
            <Badge
              key={`user-expanded:${entry.name}`}
              variant="secondary"
              className={RECIPE_STUDIO_REFERENCE_BADGE_TONES.user}
            >
              {`{{ ${entry.name} }}`}
            </Badge>
          ))}
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/shared/collapsible-section-trigger.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { cn } from "@/lib/utils";
import { ArrowDown01Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import {
  forwardRef,
  type ButtonHTMLAttributes,
  type ReactElement,
} from "react";

type CollapsibleSectionTriggerProps = {
  label: string;
  open: boolean;
  summary?: string;
} & ButtonHTMLAttributes<HTMLButtonElement>;

export const CollapsibleSectionTriggerButton = forwardRef<
  HTMLButtonElement,
  CollapsibleSectionTriggerProps
>(function CollapsibleSectionTriggerButton(
  {
    label,
    open,
    summary,
    className,
    type = "button",
    ...props
  }: CollapsibleSectionTriggerProps,
  ref,
): ReactElement {
  return (
    <button
      ref={ref}
      type={type}
      className={cn(
        "flex w-full items-center justify-between gap-3 text-left text-xs text-muted-foreground transition hover:text-foreground",
        className,
      )}
      {...props}
    >
      <span className="flex min-w-0 items-center gap-2">
        <HugeiconsIcon
          icon={ArrowDown01Icon}
          className={cn(
            "size-3.5 shrink-0 transition-transform",
            open && "rotate-180",
          )}
        />
        <span className="font-semibold uppercase">{label}</span>
      </span>
      <span className="shrink-0">{summary ?? (open ? "Hide" : "Show")}</span>
    </button>
  );
});


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/shared/dialog-shell.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  DialogDescription,
  DialogHeader,
  DialogTitle,
} from "@/components/ui/dialog";
import type { ReactElement } from "react";

type DialogShellProps = {
  title?: string;
  description?: string;
};

export function DialogShell({
  title = "Edit step",
  description = "Update this step before you run the recipe.",
}: DialogShellProps): ReactElement {
  return (
    <DialogHeader>
      <DialogTitle>{title}</DialogTitle>
      <DialogDescription>{description}</DialogDescription>
    </DialogHeader>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/shared/field-label.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
import { InformationCircleIcon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import type { ReactElement } from "react";

type FieldLabelProps = {
  label: string;
  htmlFor?: string;
  hint?: string;
};

export function FieldLabel({
  label,
  htmlFor,
  hint,
}: FieldLabelProps): ReactElement {
  return (
    <div className="flex min-w-0 items-start gap-1.5 text-xs font-semibold uppercase text-muted-foreground">
      {htmlFor ? (
        <label className="min-w-0 cursor-pointer" htmlFor={htmlFor}>
          <span className="break-words">{label}</span>
        </label>
      ) : (
        <span className="min-w-0 break-words">{label}</span>
      )}
      {hint && (
        <Tooltip>
          <TooltipTrigger asChild={true}>
            <button
              type="button"
              className="inline-flex size-6 shrink-0 items-center justify-center rounded-full text-muted-foreground/80 transition hover:text-foreground"
              aria-label={`More info: ${label}`}
              title={`More info about ${label}`}
            >
              <HugeiconsIcon icon={InformationCircleIcon} className="size-4" />
            </button>
          </TooltipTrigger>
          <TooltipContent className="max-w-64 break-words text-xs leading-relaxed">
            {hint}
          </TooltipContent>
        </Tooltip>
      )}
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/shared/name-field.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Input } from "@/components/ui/input";
import { type ReactElement, useId } from "react";
import { FieldLabel } from "./field-label";

type NameFieldProps = {
  id?: string;
  value: string;
  onChange: (value: string) => void;
  label?: string;
  hint?: string;
};

export function NameField({
  id,
  value,
  onChange,
  label,
  hint,
}: NameFieldProps): ReactElement {
  const fallbackId = useId();
  const inputId = id ?? fallbackId;
  return (
    <div className="grid gap-2">
      <FieldLabel
        label={label ?? "Field name"}
        htmlFor={inputId}
        hint={
          hint ??
          "This name is used in prompts and in the final dataset."
        }
      />
      <Input
        id={inputId}
        className="nodrag"
        value={value}
        onChange={(event) => onChange(event.target.value)}
      />
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/shared/validation-banner.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ReactElement } from "react";
import type { NodeConfig } from "../../types";
import { getConfigErrors } from "../../utils";

export function ValidationBanner({
  config,
}: {
  config: NodeConfig | null;
}): ReactElement | null {
  const errors = getConfigErrors(config);
  if (errors.length === 0) {
    return null;
  }
  return (
    <p className="text-xs text-amber-600">
      <span className="font-semibold">Needs attention: </span>
      {errors.join(". ")}.
    </p>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/tool-profile/helpers.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { LlmMcpProviderConfig } from "../../types";

export function createMcpProviderId(prefix: string, index: number): string {
  return `${prefix}-mcp-${Date.now()}-${index + 1}`;
}

export function addUnique(items: string[], value: string): string[] {
  const trimmed = value.trim();
  if (!trimmed || items.includes(trimmed)) {
    return items;
  }
  return [...items, trimmed];
}

export function collectToolSuggestions(
  providerNames: string[],
  toolsByProvider: Record<string, string[]>,
): string[] {
  return Array.from(
    new Set(
      providerNames.flatMap(
        (providerName) => toolsByProvider[providerName.trim()] ?? [],
      ),
    ),
  );
}

export function isProviderReadyForToolFetch(
  provider: LlmMcpProviderConfig,
): boolean {
  const hasName = provider.name.trim().length > 0;
  if (!hasName) {
    return false;
  }
  if (provider.provider_type === "stdio") {
    return (provider.command?.trim().length ?? 0) > 0;
  }
  return (provider.endpoint?.trim().length ?? 0) > 0;
}

export function toApiProvider(
  provider: LlmMcpProviderConfig,
): Record<string, unknown> {
  if (provider.provider_type === "stdio") {
    const env = Object.fromEntries(
      (provider.env ?? [])
        .map((item) => [item.key.trim(), item.value.trim()] as const)
        .filter(([key, value]) => key && value),
    );
    return {
      // biome-ignore lint/style/useNamingConvention: api schema
      provider_type: "stdio",
      name: provider.name.trim(),
      command: provider.command?.trim() ?? "",
      args: (provider.args ?? []).map((value) => value.trim()).filter(Boolean),
      env,
    };
  }
  return {
    // biome-ignore lint/style/useNamingConvention: api schema
    provider_type: "streamable_http",
    name: provider.name.trim(),
    endpoint: provider.endpoint?.trim() ?? "",
    // biome-ignore lint/style/useNamingConvention: api schema
    api_key: provider.api_key?.trim() || undefined,
    // biome-ignore lint/style/useNamingConvention: api schema
    api_key_env: provider.api_key_env?.trim() || undefined,
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/tool-profile/tool-profile-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/components/ui/collapsible";
import { Input } from "@/components/ui/input";
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
import { toastError } from "@/shared/toast";
import {
  ArrowRight01Icon,
  Delete02Icon,
  PlusSignIcon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { type ReactElement, useEffect, useMemo, useRef, useState } from "react";
import { listMcpTools } from "../../api";
import { ChipInput } from "../../components/chip-input";
import { CollapsibleSectionTriggerButton } from "../shared/collapsible-section-trigger";
import type { LlmMcpProviderConfig, McpEnvVar, ToolProfileConfig } from "../../types";
import { FieldLabel } from "../shared/field-label";
import { NameField } from "../shared/name-field";
import {
  addUnique,
  collectToolSuggestions,
  createMcpProviderId,
  isProviderReadyForToolFetch,
  toApiProvider,
} from "./helpers";

type ToolProfileDialogProps = {
  config: ToolProfileConfig;
  onUpdate: (patch: Partial<ToolProfileConfig>) => void;
};

function EmptyState({
  title,
  description,
}: {
  title: string;
  description: string;
}): ReactElement {
  return (
    <div className="rounded-2xl border border-dashed border-border/70 bg-muted/15 px-4 py-5 text-sm">
      <p className="font-semibold text-foreground">{title}</p>
      <p className="mt-1 text-xs text-muted-foreground">{description}</p>
    </div>
  );
}

function isProviderConfigured(provider: LlmMcpProviderConfig): boolean {
  const hasName = provider.name.trim().length > 0;
  if (!hasName) {
    return false;
  }
  if (provider.provider_type === "stdio") {
    return (provider.command?.trim().length ?? 0) > 0;
  }
  return (provider.endpoint?.trim().length ?? 0) > 0;
}

function McpServerCard({
  provider,
  index,
  toolsCount,
  error,
  open,
  onOpenChange,
  onUpdateProviderAt,
  onRemoveProvider,
  onAddProviderArg,
  onUpdateProviderArg,
  onRemoveProviderArg,
  onAddProviderEnv,
  onUpdateProviderEnv,
  onRemoveProviderEnv,
}: {
  provider: LlmMcpProviderConfig;
  index: number;
  toolsCount?: number;
  error?: string;
  open: boolean;
  onOpenChange: (open: boolean) => void;
  onUpdateProviderAt: (
    index: number,
    patch: Partial<LlmMcpProviderConfig>,
  ) => void;
  onRemoveProvider: (index: number) => void;
  onAddProviderArg: (index: number) => void;
  onUpdateProviderArg: (index: number, argIndex: number, value: string) => void;
  onRemoveProviderArg: (index: number, argIndex: number) => void;
  onAddProviderEnv: (index: number) => void;
  onUpdateProviderEnv: (
    index: number,
    envIndex: number,
    patch: Partial<McpEnvVar>,
  ) => void;
  onRemoveProviderEnv: (index: number, envIndex: number) => void;
}): ReactElement {
  const args = provider.args && provider.args.length > 0 ? provider.args : [""];
  const envVars =
    provider.env && provider.env.length > 0
      ? provider.env
      : [{ key: "", value: "" }];
  const summaryTitle = provider.name.trim() || `Tool server ${index + 1}`;
  const transportLabel =
    provider.provider_type === "stdio" ? "Local command" : "HTTP";
  const toolsLabel = typeof toolsCount === "number" ? `${toolsCount} tools` : null;
  const description =
    provider.provider_type === "stdio"
      ? "Runs a local tool server."
      : "Calls a remote tool server.";

  return (
    <Collapsible open={open} onOpenChange={onOpenChange}>
      <div className="rounded-2xl border border-border/60 bg-background/80">
        <div className="flex items-start gap-2 px-4 py-4">
          <CollapsibleTrigger asChild={true}>
            <button
              type="button"
              className="flex min-w-0 flex-1 items-start gap-3 text-left"
            >
              <HugeiconsIcon
                icon={ArrowRight01Icon}
                className={`mt-0.5 size-4 shrink-0 text-muted-foreground transition-transform ${
                  open ? "rotate-90" : ""
                }`}
              />
              <div className="min-w-0">
                <div className="flex flex-wrap items-center gap-2">
                  <p className="truncate text-sm font-semibold text-foreground">
                    {summaryTitle}
                  </p>
                  <Badge variant="outline" className="rounded-full text-[10px] uppercase">
                    {transportLabel}
                  </Badge>
                  {toolsLabel ? (
                    <Badge variant="secondary" className="rounded-full text-[10px]">
                      {toolsLabel}
                    </Badge>
                  ) : null}
                </div>
                <p className="mt-1 text-xs text-muted-foreground">{description}</p>
              </div>
            </button>
          </CollapsibleTrigger>
          <Button
            type="button"
            size="icon-sm"
            variant="ghost"
            onClick={() => onRemoveProvider(index)}
          >
            <HugeiconsIcon icon={Delete02Icon} className="size-4" />
          </Button>
        </div>

        <CollapsibleContent className="space-y-4 border-t border-border/50 px-4 pt-4 pb-4">
          {error && (
            <div className="rounded-xl border border-destructive/30 bg-destructive/5 px-3 py-2 text-xs text-destructive">
              {error}
            </div>
          )}

          <div className="grid gap-2">
            <FieldLabel label="Server name" hint="Name shown in this tool access setup." />
            <Input
              className="nodrag"
              value={provider.name}
              placeholder="context7"
              onChange={(event) =>
                onUpdateProviderAt(index, { name: event.target.value })
              }
            />
          </div>

          <Tabs
            value={provider.provider_type}
            onValueChange={(value) =>
              onUpdateProviderAt(index, {
                // biome-ignore lint/style/useNamingConvention: ui schema
                provider_type: value === "stdio" ? "stdio" : "streamable_http",
              })
            }
          >
              <TabsList className="w-full">
                <TabsTrigger value="stdio">Local command</TabsTrigger>
                <TabsTrigger value="streamable_http">HTTP endpoint</TabsTrigger>
              </TabsList>
          </Tabs>

          {provider.provider_type === "stdio" ? (
            <div className="space-y-4">
              <div className="grid gap-2">
                <FieldLabel label="Command" hint="Command used to start the tool server." />
                <Input
                  className="nodrag"
                  value={provider.command ?? ""}
                  placeholder="npx"
                  onChange={(event) =>
                    onUpdateProviderAt(index, { command: event.target.value })
                  }
                />
              </div>

              <div className="space-y-2">
                <div className="flex items-center justify-between gap-3">
                  <FieldLabel label="Arguments" hint="Optional command arguments." />
                  <Button
                    type="button"
                    size="xs"
                    variant="outline"
                    onClick={() => onAddProviderArg(index)}
                  >
                    <HugeiconsIcon icon={PlusSignIcon} className="size-3.5" />
                    Add arg
                  </Button>
                </div>
                {args.map((arg, argIndex) => (
                  <div key={`${provider.id}-arg-${argIndex}`} className="flex gap-2">
                    <Input
                      className="nodrag"
                      value={arg}
                      placeholder={argIndex === 0 ? "-y" : "argument"}
                      onChange={(event) =>
                        onUpdateProviderArg(index, argIndex, event.target.value)
                      }
                    />
                    <Button
                      type="button"
                      size="icon-sm"
                      variant="ghost"
                      onClick={() => onRemoveProviderArg(index, argIndex)}
                    >
                      <HugeiconsIcon icon={Delete02Icon} className="size-4" />
                    </Button>
                  </div>
                ))}
              </div>

              <div className="space-y-2">
                <div className="flex items-center justify-between gap-3">
                  <FieldLabel label="Environment variables" hint="Optional values passed to the tool server." />
                  <Button
                    type="button"
                    size="xs"
                    variant="outline"
                    onClick={() => onAddProviderEnv(index)}
                  >
                    <HugeiconsIcon icon={PlusSignIcon} className="size-3.5" />
                    Add env
                  </Button>
                </div>
                {envVars.map((item, envIndex) => (
                  <div
                    key={`${provider.id}-env-${envIndex}`}
                    className="grid grid-cols-[1fr_1fr_auto] gap-2"
                  >
                    <Input
                      className="nodrag"
                      value={item.key}
                      placeholder="KEY"
                      onChange={(event) =>
                        onUpdateProviderEnv(index, envIndex, {
                          key: event.target.value,
                        })
                      }
                    />
                    <Input
                      className="nodrag"
                      value={item.value}
                      placeholder="value"
                      onChange={(event) =>
                        onUpdateProviderEnv(index, envIndex, {
                          value: event.target.value,
                        })
                      }
                    />
                    <Button
                      type="button"
                      size="icon-sm"
                      variant="ghost"
                      onClick={() => onRemoveProviderEnv(index, envIndex)}
                    >
                      <HugeiconsIcon icon={Delete02Icon} className="size-4" />
                    </Button>
                  </div>
                ))}
              </div>
            </div>
          ) : (
            <div className="space-y-4">
              <div className="grid gap-2">
                <FieldLabel label="Endpoint" hint="URL for the tool server." />
                <Input
                  className="nodrag"
                  value={provider.endpoint ?? ""}
                  placeholder="https://example.com/mcp"
                  onChange={(event) =>
                    onUpdateProviderAt(index, { endpoint: event.target.value })
                  }
                />
              </div>
              <div className="grid gap-2 sm:grid-cols-2">
                <div className="grid gap-2">
                  <FieldLabel
                    label="API key environment variable"
                    hint="Optional environment variable that stores the API key."
                  />
                  <Input
                    className="nodrag"
                    value={provider.api_key_env ?? ""}
                    placeholder="TOOL_SERVER_API_KEY"
                    onChange={(event) =>
                      onUpdateProviderAt(index, {
                        // biome-ignore lint/style/useNamingConvention: api schema
                        api_key_env: event.target.value,
                      })
                    }
                  />
                </div>
                <div className="grid gap-2">
                  <FieldLabel
                    label="API key"
                    hint="Optional API key."
                  />
                  <Input
                    className="nodrag"
                    value={provider.api_key ?? ""}
                    placeholder="token"
                    onChange={(event) =>
                      onUpdateProviderAt(index, {
                        // biome-ignore lint/style/useNamingConvention: api schema
                        api_key: event.target.value,
                      })
                    }
                  />
                </div>
              </div>
            </div>
          )}
        </CollapsibleContent>
      </div>
    </Collapsible>
  );
}

export function ToolProfileDialog({
  config,
  onUpdate,
}: ToolProfileDialogProps): ReactElement {
  const providers = config.mcp_providers;
  const [activeTab, setActiveTab] = useState<"profile" | "servers">(
    providers.length > 0 ? "profile" : "servers",
  );
  const [advancedOpen, setAdvancedOpen] = useState(false);
  const [loadingTools, setLoadingTools] = useState(false);
  const [toolsByProvider, setToolsByProvider] = useState<Record<string, string[]>>(
    config.fetched_tools_by_provider ?? {},
  );
  const [providerErrors, setProviderErrors] = useState<Record<string, string>>({});
  const [duplicateTools, setDuplicateTools] = useState<Record<string, string[]>>({});
  const [openProviders, setOpenProviders] = useState<Record<string, boolean>>({});
  const previousProviderSignatureRef = useRef<string | null>(null);

  const providerSignature = useMemo(
    () =>
      JSON.stringify(
        providers.map((provider) => ({
          name: provider.name,
          // biome-ignore lint/style/useNamingConvention: ui schema
          provider_type: provider.provider_type,
          command: provider.command,
          args: provider.args,
          env: provider.env,
          endpoint: provider.endpoint,
          // biome-ignore lint/style/useNamingConvention: api schema
          api_key: provider.api_key,
          // biome-ignore lint/style/useNamingConvention: api schema
          api_key_env: provider.api_key_env,
        })),
      ),
    [providers],
  );

  useEffect(() => {
    const previousSignature = previousProviderSignatureRef.current;
    previousProviderSignatureRef.current = providerSignature;
    if (previousSignature === null) {
      setToolsByProvider(config.fetched_tools_by_provider ?? {});
      return;
    }
    if (previousSignature === providerSignature) {
      return;
    }
    setToolsByProvider({});
    setProviderErrors({});
    setDuplicateTools({});
    if (Object.keys(config.fetched_tools_by_provider ?? {}).length > 0) {
      onUpdate({
        // biome-ignore lint/style/useNamingConvention: ui schema
        fetched_tools_by_provider: {},
      });
    }
  }, [config.fetched_tools_by_provider, onUpdate, providerSignature]);

  useEffect(() => {
    const tools = config.fetched_tools_by_provider ?? {};
    setToolsByProvider(tools);
  }, [config.fetched_tools_by_provider]);

  useEffect(() => {
    setOpenProviders((current) => {
      const next: Record<string, boolean> = {};
      for (const provider of providers) {
        next[provider.id] =
          current[provider.id] ?? !isProviderConfigured(provider);
      }
      return next;
    });
  }, [providers]);

  function updateProviders(nextProviders: LlmMcpProviderConfig[]): void {
    onUpdate({
      // biome-ignore lint/style/useNamingConvention: ui schema
      mcp_providers: nextProviders,
    });
  }

  function updateProviderAt(
    index: number,
    patch: Partial<LlmMcpProviderConfig>,
  ): void {
    updateProviders(
      providers.map((provider, currentIndex) =>
        currentIndex === index ? { ...provider, ...patch } : provider,
      ),
    );
  }

  function mutateProviderAt(
    index: number,
    mapProvider: (provider: LlmMcpProviderConfig) => Partial<LlmMcpProviderConfig>,
  ): void {
    const provider = providers[index];
    if (!provider) {
      return;
    }
    updateProviderAt(index, mapProvider(provider));
  }

  function removeProvider(index: number): void {
    updateProviders(providers.filter((_, currentIndex) => currentIndex !== index));
  }

  function addProvider(): void {
    updateProviders([
      ...providers,
      {
        id: createMcpProviderId(config.id, providers.length),
        name: "",
        // biome-ignore lint/style/useNamingConvention: ui schema
        provider_type: "stdio",
        command: "",
        args: [],
        env: [],
        endpoint: "",
        // biome-ignore lint/style/useNamingConvention: api schema
        api_key: "",
        // biome-ignore lint/style/useNamingConvention: api schema
        api_key_env: "",
      },
    ]);
  }

  function addProviderArg(providerIndex: number): void {
    mutateProviderAt(providerIndex, (provider) => ({
      args: [...(provider.args ?? []), ""],
    }));
  }

  function updateProviderArg(
    providerIndex: number,
    argIndex: number,
    value: string,
  ): void {
    mutateProviderAt(providerIndex, (provider) => {
      const nextArgs =
        provider.args && provider.args.length > 0 ? [...provider.args] : [""];
      nextArgs[argIndex] = value;
      return { args: nextArgs };
    });
  }

  function removeProviderArg(providerIndex: number, argIndex: number): void {
    mutateProviderAt(providerIndex, (provider) => ({
      args: (provider.args ?? []).filter((_, currentIndex) => currentIndex !== argIndex),
    }));
  }

  function addProviderEnv(providerIndex: number): void {
    mutateProviderAt(providerIndex, (provider) => ({
      env: [...(provider.env ?? []), { key: "", value: "" }],
    }));
  }

  function updateProviderEnv(
    providerIndex: number,
    envIndex: number,
    patch: Partial<McpEnvVar>,
  ): void {
    mutateProviderAt(providerIndex, (provider) => ({
      env: (
        provider.env && provider.env.length > 0
          ? provider.env
          : [{ key: "", value: "" }]
      ).map((item, currentIndex) =>
        currentIndex === envIndex ? { ...item, ...patch } : item,
      ),
    }));
  }

  function removeProviderEnv(providerIndex: number, envIndex: number): void {
    mutateProviderAt(providerIndex, (provider) => ({
      env: (provider.env ?? []).filter((_, currentIndex) => currentIndex !== envIndex),
    }));
  }

  async function loadTools(): Promise<void> {
    const readyProviders = providers.filter(isProviderReadyForToolFetch);
    if (readyProviders.length === 0) {
      toastError(
        "No tool servers are ready",
        "Add a server name plus a command or endpoint first.",
      );
      return;
    }

    setLoadingTools(true);
    try {
      const timeoutRaw = config.timeout_sec?.trim();
      const timeoutSec =
        timeoutRaw && Number.isFinite(Number(timeoutRaw))
          ? Number(timeoutRaw)
          : 15;
      const response = await listMcpTools({
        // biome-ignore lint/style/useNamingConvention: api schema
        mcp_providers: readyProviders.map(toApiProvider),
        // biome-ignore lint/style/useNamingConvention: api schema
        timeout_sec: timeoutSec,
      });
      const nextToolsByProvider = Object.fromEntries(
        response.providers
          .filter((provider) => provider.name.trim())
          .map((provider) => [provider.name.trim(), provider.tools]),
      );
      setToolsByProvider(nextToolsByProvider);
      onUpdate({
        // biome-ignore lint/style/useNamingConvention: ui schema
        fetched_tools_by_provider: nextToolsByProvider,
      });
      setProviderErrors(
        Object.fromEntries(
          response.providers
            .filter((provider) => provider.name.trim() && provider.error)
            .map((provider) => [provider.name.trim(), provider.error ?? "Failed to load tools."]),
        ),
      );
      setDuplicateTools(response.duplicate_tools ?? {});
    } catch (error) {
      toastError(
        "Couldn't load tools",
        error instanceof Error ? error.message : "We couldn't load the tools for these servers.",
      );
    } finally {
      setLoadingTools(false);
    }
  }

  const providerNames = useMemo(
    () =>
      Array.from(
        new Set(providers.map((provider) => provider.name.trim()).filter(Boolean)),
      ),
    [providers],
  );
  const availableTools = useMemo(
    () => collectToolSuggestions(providerNames, toolsByProvider),
    [providerNames, toolsByProvider],
  );
  const hasProviders = providers.length > 0;

  useEffect(() => {
    if (!hasProviders && activeTab === "profile") {
      setActiveTab("servers");
    }
  }, [activeTab, hasProviders]);

  return (
    <Tabs
      value={activeTab}
      onValueChange={(value) =>
        setActiveTab(value === "servers" ? "servers" : "profile")
      }
      className="w-full"
    >
      <TabsList className="w-full">
        <TabsTrigger value="servers">1. Add servers</TabsTrigger>
        <TabsTrigger value="profile">2. Choose tools</TabsTrigger>
      </TabsList>

      <TabsContent value="profile" className="space-y-4 pt-3">
        <NameField
          label="Tool access name"
          value={config.name}
          onChange={(value) => onUpdate({ name: value })}
        />

        {!hasProviders ? (
          <div className="space-y-3">
            <EmptyState
              title="Add a server to start choosing tools"
              description="Set up a server first, then come back here to choose which tools this step can use."
            />
            <Button
              type="button"
              variant="outline"
              onClick={() => setActiveTab("servers")}
            >
              Add servers first
            </Button>
          </div>
        ) : (
          <>
            <div className="rounded-2xl border border-border/60 bg-muted/10 px-4 py-3">
              <p className="text-sm font-semibold text-foreground">
                Pick which tools this setup may use
              </p>
              <p className="mt-1 text-xs text-muted-foreground">
                1. Load tool names from your servers. 2. Leave the list empty to
                allow all tools, or add only the ones this step should use.
              </p>
            </div>
            <div className="space-y-3 rounded-2xl border border-border/60 bg-muted/10 p-4">
              <div className="flex items-start justify-between gap-3">
                <div>
                  <p className="text-sm font-semibold text-foreground">
                    Available tools
                  </p>
                  <p className="text-xs text-muted-foreground">
                    Load tool names so you can pick from a list instead of guessing.
                  </p>
                </div>
                <Button
                  type="button"
                  size="xs"
                  variant="outline"
                  disabled={loadingTools}
                  onClick={() => {
                    void loadTools();
                  }}
                >
                  {loadingTools ? "Loading..." : "Load tools"}
                </Button>
              </div>

              {Object.keys(toolsByProvider).length === 0 &&
                Object.keys(providerErrors).length === 0 && (
                  <p className="text-xs text-muted-foreground">
                    Load tools to browse what's available.
                  </p>
                )}

              {Object.entries(toolsByProvider).map(([providerName, toolNames]) => (
                <div key={providerName} className="space-y-2">
                  <div className="flex items-center gap-2">
                    <p className="text-xs font-semibold uppercase text-muted-foreground">
                      {providerName}
                    </p>
                    <Badge variant="outline" className="rounded-full text-[10px]">
                      {toolNames.length}
                    </Badge>
                  </div>
                  <div className="flex flex-wrap gap-2">
                    {toolNames.map((toolName) => (
                      <Badge key={`${providerName}-${toolName}`} variant="secondary">
                        {toolName}
                      </Badge>
                    ))}
                  </div>
                </div>
              ))}

              {Object.entries(duplicateTools).length > 0 && (
                <div className="rounded-xl border border-amber-500/30 bg-amber-500/5 px-3 py-2 text-xs text-amber-700 dark:text-amber-300">
                  Some tool names appear on more than one server:
                  {" "}
                  {Object.entries(duplicateTools)
                    .map(([toolName, providerList]) => `${toolName} (${providerList.join(", ")})`)
                    .join("; ")}
                </div>
              )}
            </div>

            <div className="grid gap-2">
              <FieldLabel
                label="Tools this setup may use"
                hint="Leave this empty to allow every tool from these servers."
              />
              <ChipInput
                values={config.allow_tools ?? []}
                suggestions={availableTools}
                onAdd={(value) =>
                  onUpdate({
                    // biome-ignore lint/style/useNamingConvention: api schema
                    allow_tools: addUnique(config.allow_tools ?? [], value),
                  })
                }
                onRemove={(toolIndex) =>
                  onUpdate({
                    // biome-ignore lint/style/useNamingConvention: api schema
                    allow_tools: (config.allow_tools ?? []).filter(
                      (_, currentIndex) => currentIndex !== toolIndex,
                    ),
                  })
                }
                placeholder="Type tool name and press Enter"
              />
            </div>

            <Collapsible open={advancedOpen} onOpenChange={setAdvancedOpen}>
              <CollapsibleTrigger asChild={true}>
                <CollapsibleSectionTriggerButton
                  label="Tool-call limits"
                  open={advancedOpen}
                />
              </CollapsibleTrigger>
              <CollapsibleContent className="mt-3">
                <div className="grid gap-3 sm:grid-cols-2">
                  <div className="grid gap-2">
                    <FieldLabel
                      label="Max tool-use turns"
                      hint="How many back-and-forth tool calls an AI step can make."
                    />
                    <Input
                      className="nodrag"
                      value={config.max_tool_call_turns ?? ""}
                      onChange={(event) =>
                        onUpdate({
                          // biome-ignore lint/style/useNamingConvention: api schema
                          max_tool_call_turns: event.target.value,
                        })
                      }
                    />
                  </div>
                  <div className="grid gap-2">
                    <FieldLabel
                      label="Timeout (seconds)"
                      hint="How long to wait when loading or calling tools."
                    />
                    <Input
                      className="nodrag"
                      value={config.timeout_sec ?? ""}
                      onChange={(event) =>
                        onUpdate({
                          // biome-ignore lint/style/useNamingConvention: api schema
                          timeout_sec: event.target.value,
                        })
                      }
                    />
                  </div>
                </div>
              </CollapsibleContent>
            </Collapsible>
          </>
        )}
      </TabsContent>

      <TabsContent value="servers" className="space-y-4 pt-3">
        <div className="rounded-2xl border border-border/60 bg-muted/10 px-4 py-3">
          <p className="text-sm font-semibold text-foreground">
            Add one or more tool servers
          </p>
          <p className="mt-1 text-xs text-muted-foreground">
            After your servers are ready, switch to Choose tools to load names
            and decide which ones this setup should allow.
          </p>
        </div>
        <div className="flex items-center justify-between gap-3">
          <FieldLabel
            label="Tool servers"
            hint="These servers belong to this tool access setup and can be reused by linked AI steps."
          />
          <Button type="button" size="xs" variant="outline" onClick={addProvider}>
            <HugeiconsIcon icon={PlusSignIcon} className="size-3.5" />
            Add server
          </Button>
        </div>

        {!hasProviders ? (
          <EmptyState
            title="No tool servers yet"
            description="Add one or more servers here, then go back to Access to load and choose tools."
          />
        ) : (
          <div className="space-y-3">
            {providers.map((provider, index) => (
              <McpServerCard
                key={provider.id}
                provider={provider}
                index={index}
                toolsCount={
                  provider.name.trim()
                    ? (toolsByProvider[provider.name.trim()] ?? []).length
                    : undefined
                }
                error={provider.name.trim() ? providerErrors[provider.name.trim()] : undefined}
                open={openProviders[provider.id] ?? !isProviderConfigured(provider)}
                onOpenChange={(open) =>
                  setOpenProviders((current) => ({
                    ...current,
                    [provider.id]: open,
                  }))
                }
                onUpdateProviderAt={updateProviderAt}
                onRemoveProvider={removeProvider}
                onAddProviderArg={addProviderArg}
                onUpdateProviderArg={updateProviderArg}
                onRemoveProviderArg={removeProviderArg}
                onAddProviderEnv={addProviderEnv}
                onUpdateProviderEnv={updateProviderEnv}
                onRemoveProviderEnv={removeProviderEnv}
              />
            ))}
          </div>
        )}
      </TabsContent>
    </Tabs>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/dialogs/validators/validator-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/components/ui/collapsible";
import {
  Combobox,
  ComboboxContent,
  ComboboxEmpty,
  ComboboxInput,
  ComboboxItem,
  ComboboxList,
} from "@/components/ui/combobox";
import { Input } from "@/components/ui/input";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import { type ReactElement, useMemo, useRef } from "react";
import { useRecipeStudioStore } from "../../stores/recipe-studio";
import type { ValidatorConfig } from "../../types";
import {
  isValidatorCodeLang,
  VALIDATOR_OXC_CODE_LANGS,
  VALIDATOR_SQL_CODE_LANGS,
} from "../../utils/validators/code-lang";
import {
  OXC_CODE_SHAPES,
  normalizeOxcCodeShape,
} from "../../utils/validators/oxc-code-shape";
import {
  OXC_VALIDATION_MODES,
  normalizeOxcValidationMode,
} from "../../utils/validators/oxc-mode";
import { CollapsibleSectionTriggerButton } from "../shared/collapsible-section-trigger";
import { FieldLabel } from "../shared/field-label";
import { NameField } from "../shared/name-field";

type ValidatorDialogProps = {
  config: ValidatorConfig;
  onUpdate: (patch: Partial<ValidatorConfig>) => void;
};

const NONE_VALUE = "__none__";

export function ValidatorDialog({
  config,
  onUpdate,
}: ValidatorDialogProps): ReactElement {
  const configs = useRecipeStudioStore((state) => state.configs);
  const targetColumnId = `${config.id}-target-column`;
  const oxcModeId = `${config.id}-oxc-mode`;
  const oxcCodeShapeId = `${config.id}-oxc-code-shape`;
  const batchSizeId = `${config.id}-batch-size`;
  const oxcModeAnchorRef = useRef<HTMLDivElement>(null);
  const oxcCodeShapeAnchorRef = useRef<HTMLDivElement>(null);
  const advancedOpen = config.advancedOpen === true;
  const selectedOxcMode = normalizeOxcValidationMode(config.oxc_validation_mode);
  const selectedOxcCodeShape = normalizeOxcCodeShape(config.oxc_code_shape);
  const codeOptions = useMemo(
    () =>
      Object.values(configs)
        .flatMap((item) => {
          if (!(item.kind === "llm" && item.llm_type === "code")) {
            return [];
          }
          if (config.validator_type === "oxc") {
            const lang = item.code_lang?.trim() ?? "";
            if (!VALIDATOR_OXC_CODE_LANGS.includes(lang as typeof config.code_lang)) {
              return [];
            }
          } else {
            const lang = item.code_lang?.trim() ?? "";
            if (
              !(
                lang === "python" ||
                VALIDATOR_SQL_CODE_LANGS.includes(lang as typeof config.code_lang)
              )
            ) {
              return [];
            }
          }
          return [
            {
              name: item.name,
              codeLang: item.code_lang?.trim() ?? "",
            },
          ];
        })
        .filter((item) => item.name.trim())
        .sort((a, b) => a.name.localeCompare(b.name)),
    [configs],
  );
  const currentTarget = config.target_columns[0] ?? "";

  return (
    <div className="space-y-4">
      <NameField
        label="Check name"
        hint="Name used for this check in the canvas and run results."
        value={config.name}
        onChange={(value) => onUpdate({ name: value })}
      />
      <div className="grid gap-2">
        <FieldLabel
          label="Code to check"
          htmlFor={targetColumnId}
          hint="Choose the AI code step this check should review."
        />
        <Select
          value={currentTarget || NONE_VALUE}
          onValueChange={(value) => {
            if (value === NONE_VALUE) {
              onUpdate({
                // biome-ignore lint/style/useNamingConvention: api schema
                target_columns: [],
              });
              return;
            }
            const targetConfig = codeOptions.find((item) => item.name === value);
            const nextCodeLang = targetConfig?.codeLang?.trim();
            onUpdate({
              // biome-ignore lint/style/useNamingConvention: api schema
              target_columns: [value],
              // biome-ignore lint/style/useNamingConvention: api schema
              code_lang:
                nextCodeLang && isValidatorCodeLang(nextCodeLang)
                  ? nextCodeLang
                  : config.code_lang,
            });
          }}
        >
          <SelectTrigger className="nodrag w-full" id={targetColumnId}>
            <SelectValue placeholder="Select code column" />
          </SelectTrigger>
          <SelectContent>
            <SelectItem value={NONE_VALUE}>None</SelectItem>
            {codeOptions.map((item) => (
              <SelectItem key={item.name} value={item.name}>
                {item.name}
              </SelectItem>
            ))}
          </SelectContent>
        </Select>
        {codeOptions.length === 0 && (
              <p className="text-xs text-muted-foreground">
                {config.validator_type === "oxc"
                  ? "Add an AI code step that generates JavaScript or TypeScript first."
                  : "Add an AI code step first."}
              </p>
        )}
      </div>
      {config.validator_type === "oxc" && (
        <div className="grid gap-3">
          <div className="grid gap-2">
            <FieldLabel
              label="Check mode"
              htmlFor={oxcModeId}
              hint="Choose whether to check syntax, lint rules, or both."
            />
            <div ref={oxcModeAnchorRef}>
              <Combobox
                items={OXC_VALIDATION_MODES}
                filteredItems={OXC_VALIDATION_MODES}
                filter={null}
                value={selectedOxcMode}
                onValueChange={(value) =>
                  onUpdate({
                    oxc_validation_mode: normalizeOxcValidationMode(value),
                  })
                }
                itemToStringValue={(value) => value}
                autoHighlight={true}
              >
                <ComboboxInput
                  id={oxcModeId}
                  className="nodrag w-full"
                  placeholder="Select validation mode"
                  readOnly={true}
                />
                <ComboboxContent anchor={oxcModeAnchorRef}>
                  <ComboboxEmpty>No modes available</ComboboxEmpty>
                  <ComboboxList>
                    {(mode: string) => (
                      <ComboboxItem key={mode} value={mode}>
                        {mode}
                      </ComboboxItem>
                    )}
                  </ComboboxList>
                </ComboboxContent>
              </Combobox>
            </div>
          </div>
          <div className="grid gap-2">
            <FieldLabel
              label="Code shape"
              htmlFor={oxcCodeShapeId}
              hint="Choose whether the code should be treated like a full file or a smaller snippet."
            />
            <div ref={oxcCodeShapeAnchorRef}>
              <Combobox
                items={OXC_CODE_SHAPES}
                filteredItems={OXC_CODE_SHAPES}
                filter={null}
                value={selectedOxcCodeShape}
                onValueChange={(value) =>
                  onUpdate({
                    oxc_code_shape: normalizeOxcCodeShape(value),
                  })
                }
                itemToStringValue={(value) => value}
                autoHighlight={true}
              >
                <ComboboxInput
                  id={oxcCodeShapeId}
                  className="nodrag w-full"
                  placeholder="Select code shape"
                  readOnly={true}
                />
                <ComboboxContent anchor={oxcCodeShapeAnchorRef}>
                  <ComboboxEmpty>No code-shape options</ComboboxEmpty>
                  <ComboboxList>
                    {(shape: string) => (
                      <ComboboxItem key={shape} value={shape}>
                        {shape}
                      </ComboboxItem>
                    )}
                  </ComboboxList>
                </ComboboxContent>
              </Combobox>
            </div>
          </div>
        </div>
      )}
      <Collapsible
        open={advancedOpen}
        onOpenChange={(open) => onUpdate({ advancedOpen: open })}
      >
        <CollapsibleTrigger asChild={true}>
          <CollapsibleSectionTriggerButton
            label="Advanced check settings"
            open={advancedOpen}
          />
        </CollapsibleTrigger>
        <CollapsibleContent className="mt-3">
          <div className="grid gap-2">
            <FieldLabel
              label="Batch size"
              htmlFor={batchSizeId}
              hint="How many records to check at a time."
            />
            <Input
              id={batchSizeId}
              className="nodrag"
              value={config.batch_size}
              onChange={(event) => onUpdate({ batch_size: event.target.value })}
            />
          </div>
        </CollapsibleContent>
      </Collapsible>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/execution-types.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export type RecipeStudioView = "editor" | "executions";

export type RecipeExecutionKind = "preview" | "full";

export type RecipeExecutionStatus =
  | "pending"
  | "running"
  | "active"
  | "cancelling"
  | "cancelled"
  | "completed"
  | "error";

export type RecipeExecutionProgress = {
  done?: number | null;
  total?: number | null;
  percent?: number | null;
  // biome-ignore lint/style/useNamingConvention: backend schema
  eta_sec?: number | null;
  rate?: number | null;
  ok?: number | null;
  failed?: number | null;
};

export type RecipeExecutionBatch = {
  idx?: number | null;
  total?: number | null;
};

export type RecipeExecutionAnalysis = {
  num_records?: number;
  target_num_records?: number;
  // biome-ignore lint/style/useNamingConvention: backend schema
  column_statistics?: Record<string, unknown>[];
  // biome-ignore lint/style/useNamingConvention: backend schema
  side_effect_column_names?: string[] | null;
  // biome-ignore lint/style/useNamingConvention: backend schema
  column_profiles?: Record<string, unknown>[] | null;
} & Record<string, unknown>;

export type RecipeExecutionRecord = {
  id: string;
  recipeId: string;
  // biome-ignore lint/style/useNamingConvention: backend schema
  jobId: string | null;
  kind: RecipeExecutionKind;
  // ui-only display label for full runs
  // biome-ignore lint/style/useNamingConvention: ui schema
  run_name: string | null;
  status: RecipeExecutionStatus;
  rows: number;
  createdAt: number;
  finishedAt: number | null;
  recipeSignature: string;
  stage: string | null;
  // biome-ignore lint/style/useNamingConvention: backend schema
  current_column: string | null;
  // biome-ignore lint/style/useNamingConvention: backend schema
  completed_columns: string[];
  progress: RecipeExecutionProgress | null;
  // biome-ignore lint/style/useNamingConvention: backend schema
  column_progress: RecipeExecutionProgress | null;
  batch: RecipeExecutionBatch | null;
  // biome-ignore lint/style/useNamingConvention: backend schema
  model_usage: Record<string, unknown> | null;
  // biome-ignore lint/style/useNamingConvention: backend schema
  lastEventId: number | null;
  // biome-ignore lint/style/useNamingConvention: backend schema
  artifact_path: string | null;
  // biome-ignore lint/style/useNamingConvention: backend schema
  log_lines: string[];
  dataset: Record<string, unknown>[];
  datasetTotal: number;
  datasetPage: number;
  datasetPageSize: number;
  analysis: RecipeExecutionAnalysis | null;
  // biome-ignore lint/style/useNamingConvention: api schema
  processor_artifacts: Record<string, unknown> | null;
  error: string | null;
};


================================================
FILE: studio/frontend/src/features/recipe-studio/executions/execution-helpers.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  RecipeExecutionAnalysis,
  RecipeExecutionRecord,
  RecipeExecutionStatus,
} from "../execution-types";
import type { RecipePayload } from "../utils/payload/types";

export const DATASET_PAGE_SIZE = 20;

export function buildSignature(name: string, payload: RecipePayload): string {
  return JSON.stringify({ name, payload });
}

export function formatSavedLabel(savedAt: number | null): string {
  if (!savedAt) {
    return "Not saved yet";
  }
  const time = new Date(savedAt).toLocaleTimeString([], {
    hour: "numeric",
    minute: "2-digit",
  });
  return `Saved ${time}`;
}

export function toErrorMessage(error: unknown, fallback: string): string {
  if (error instanceof Error) {
    return error.message;
  }
  return fallback;
}

export function normalizeDatasetRows(value: unknown): Record<string, unknown>[] {
  if (!Array.isArray(value)) {
    return [];
  }
  return value.filter(
    (row): row is Record<string, unknown> =>
      typeof row === "object" && row !== null && !Array.isArray(row),
  );
}

export function normalizeObject(value: unknown): Record<string, unknown> | null {
  if (typeof value !== "object" || value === null || Array.isArray(value)) {
    return null;
  }
  return value as Record<string, unknown>;
}

export function normalizeAnalysis(value: unknown): RecipeExecutionAnalysis | null {
  const normalized = normalizeObject(value);
  if (!normalized) {
    return null;
  }
  return normalized as RecipeExecutionAnalysis;
}

export function mapJobStatus(status: string): RecipeExecutionStatus {
  if (status === "active") {
    return "active";
  }
  if (status === "pending") {
    return "pending";
  }
  if (status === "cancelling") {
    return "cancelling";
  }
  if (status === "cancelled") {
    return "cancelled";
  }
  if (status === "completed") {
    return "completed";
  }
  if (status === "error") {
    return "error";
  }
  return "running";
}

export function isExecutionInProgress(status: RecipeExecutionStatus): boolean {
  return (
    status === "running" ||
    status === "active" ||
    status === "pending" ||
    status === "cancelling"
  );
}

export function executionLabel(kind: "preview" | "full"): string {
  return kind === "preview" ? "Preview" : "Full run";
}

export function normalizeRunName(value: unknown): string | null {
  if (typeof value !== "string") {
    return null;
  }
  const trimmed = value.trim();
  return trimmed.length > 0 ? trimmed : null;
}

function executionSortWeight(status: RecipeExecutionStatus): number {
  if (isExecutionInProgress(status)) {
    return 0;
  }
  if (status === "error" || status === "cancelled") {
    return 2;
  }
  return 1;
}

export function sortExecutions(records: RecipeExecutionRecord[]): RecipeExecutionRecord[] {
  const next = [...records];
  next.sort((a, b) => {
    const statusDelta = executionSortWeight(a.status) - executionSortWeight(b.status);
    if (statusDelta !== 0) {
      return statusDelta;
    }
    return b.createdAt - a.createdAt;
  });
  return next;
}

export function withExecutionDefaults(
  record: RecipeExecutionRecord,
): RecipeExecutionRecord {
  const dataset = Array.isArray(record.dataset) ? record.dataset : [];
  const logLines = Array.isArray(record.log_lines)
    ? record.log_lines.filter((line): line is string => typeof line === "string")
    : [];
  const datasetPageSize =
    typeof record.datasetPageSize === "number" && record.datasetPageSize > 0
      ? record.datasetPageSize
      : DATASET_PAGE_SIZE;
  const datasetPage =
    typeof record.datasetPage === "number" && record.datasetPage > 0
      ? record.datasetPage
      : 1;
  const datasetTotal =
    typeof record.datasetTotal === "number" && record.datasetTotal >= 0
      ? record.datasetTotal
      : dataset.length;

  return {
    ...record,
    run_name: normalizeRunName(record.run_name),
    dataset,
    log_lines: logLines,
    datasetTotal,
    datasetPage,
    datasetPageSize,
    completed_columns: Array.isArray(record.completed_columns)
      ? record.completed_columns.filter(
          (value): value is string => typeof value === "string" && value.trim().length > 0,
        )
      : [],
    column_progress: record.column_progress ?? null,
    batch: record.batch ?? null,
  };
}

export function delay(ms: number): Promise<void> {
  return new Promise((resolve) => {
    window.setTimeout(resolve, ms);
  });
}

export async function copyTextToClipboard(text: string): Promise<boolean> {
  try {
    if (navigator.clipboard?.writeText) {
      await navigator.clipboard.writeText(text);
      return true;
    }
  } catch {
    // fallthrough to legacy path
  }

  try {
    const textarea = document.createElement("textarea");
    textarea.value = text;
    textarea.setAttribute("readonly", "");
    textarea.style.position = "fixed";
    textarea.style.top = "0";
    textarea.style.left = "-9999px";
    document.body.appendChild(textarea);
    textarea.select();
    const ok = document.execCommand("copy");
    document.body.removeChild(textarea);
    return ok;
  } catch {
    return false;
  }
}


================================================
FILE: studio/frontend/src/features/recipe-studio/executions/hydration.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { listRecipeExecutions } from "../data/executions-db";
import type { RecipeExecutionRecord } from "../execution-types";
import {
  isExecutionInProgress,
  sortExecutions,
  withExecutionDefaults,
} from "./execution-helpers";

export async function loadSortedRecipeExecutions(
  recipeId: string,
): Promise<RecipeExecutionRecord[]> {
  const records = await listRecipeExecutions(recipeId);
  return sortExecutions(records.map(withExecutionDefaults));
}

export function findResumableExecution(
  records: RecipeExecutionRecord[],
): RecipeExecutionRecord | null {
  return (
    records.find((record) => record.jobId && isExecutionInProgress(record.status)) ?? null
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/executions/run-settings.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { RecipeExecutionKind } from "../execution-types";
import type { RecipeRunSettings } from "../stores/recipe-executions";
import type { RecipePayload } from "../utils/payload/types";

function toPositiveInt(
  value: number,
  fallback: number,
  min = 1,
  max = Number.MAX_SAFE_INTEGER,
): number {
  if (!Number.isFinite(value)) {
    return fallback;
  }
  const next = Math.floor(value);
  if (next < min) {
    return min;
  }
  if (next > max) {
    return max;
  }
  return next;
}

function toNonNegativeInt(
  value: number,
  fallback: number,
  max = Number.MAX_SAFE_INTEGER,
): number {
  if (!Number.isFinite(value)) {
    return fallback;
  }
  const next = Math.floor(value);
  if (next < 0) {
    return 0;
  }
  if (next > max) {
    return max;
  }
  return next;
}

function toRatio(value: number, fallback: number): number {
  if (!Number.isFinite(value)) {
    return fallback;
  }
  if (value < 0) {
    return 0;
  }
  if (value > 1) {
    return 1;
  }
  return value;
}

export function sanitizeExecutionRows(
  rows: number,
  kind: RecipeExecutionKind,
): number {
  return toPositiveInt(rows, kind === "preview" ? 5 : 1000);
}

export function normalizeRunSettings(settings: RecipeRunSettings): RecipeRunSettings {
  return {
    batchSize: toPositiveInt(settings.batchSize, 1000, 1, 200_000),
    batchEnabled: Boolean(settings.batchEnabled),
    mergeBatches: Boolean(settings.mergeBatches),
    llmParallelRequests:
      typeof settings.llmParallelRequests === "number"
        ? toPositiveInt(settings.llmParallelRequests, 4, 1, 2048)
        : null,
    nonInferenceWorkers: toPositiveInt(
      settings.nonInferenceWorkers,
      4,
      1,
      2048,
    ),
    maxConversationRestarts: toNonNegativeInt(
      settings.maxConversationRestarts,
      5,
      100,
    ),
    maxConversationCorrectionSteps: toNonNegativeInt(
      settings.maxConversationCorrectionSteps,
      0,
      100,
    ),
    disableEarlyShutdown: Boolean(settings.disableEarlyShutdown),
    shutdownErrorRate: toRatio(settings.shutdownErrorRate, 0.5),
    shutdownErrorWindow: toPositiveInt(settings.shutdownErrorWindow, 10, 1, 10_000),
  };
}

function buildRunConfigPayload(
  settings: RecipeRunSettings,
  rows: number,
  kind: RecipeExecutionKind,
): Record<string, unknown> {
  const useBatching = kind === "full" && settings.batchEnabled;
  return {
    // biome-ignore lint/style/useNamingConvention: backend schema
    buffer_size: useBatching ? settings.batchSize : toPositiveInt(rows, 1000, 1, 200_000),
    // biome-ignore lint/style/useNamingConvention: backend schema
    non_inference_max_parallel_workers: settings.nonInferenceWorkers,
    // biome-ignore lint/style/useNamingConvention: backend schema
    max_conversation_restarts: settings.maxConversationRestarts,
    // biome-ignore lint/style/useNamingConvention: backend schema
    max_conversation_correction_steps: settings.maxConversationCorrectionSteps,
    // biome-ignore lint/style/useNamingConvention: backend schema
    disable_early_shutdown: settings.disableEarlyShutdown,
    // biome-ignore lint/style/useNamingConvention: backend schema
    shutdown_error_rate: settings.shutdownErrorRate,
    // biome-ignore lint/style/useNamingConvention: backend schema
    shutdown_error_window: settings.shutdownErrorWindow,
  };
}

function applyGlobalParallelismOverride(
  payload: RecipePayload,
  llmParallelRequests: number | null,
): RecipePayload {
  if (typeof llmParallelRequests !== "number") {
    return payload;
  }

  const modelConfigs = payload.recipe.model_configs.map((modelConfig) => {
    const nextModelConfig = { ...modelConfig };
    const inferenceRaw = modelConfig.inference_parameters;
    const inference =
      inferenceRaw &&
      typeof inferenceRaw === "object" &&
      !Array.isArray(inferenceRaw)
        ? { ...(inferenceRaw as Record<string, unknown>) }
        : {};
    // biome-ignore lint/style/useNamingConvention: backend schema
    inference.max_parallel_requests = llmParallelRequests;
    // biome-ignore lint/style/useNamingConvention: backend schema
    nextModelConfig.inference_parameters = inference;
    return nextModelConfig;
  });

  return {
    ...payload,
    recipe: {
      ...payload.recipe,
      // biome-ignore lint/style/useNamingConvention: backend schema
      model_configs: modelConfigs,
    },
  };
}

export function buildExecutionPayload(input: {
  payload: RecipePayload;
  kind: RecipeExecutionKind;
  rows: number;
  settings: RecipeRunSettings;
  runName?: string | null;
}): RecipePayload {
  const normalizedSettings = normalizeRunSettings(input.settings);
  const payloadWithParallelism = applyGlobalParallelismOverride(
    input.payload,
    normalizedSettings.llmParallelRequests,
  );
  return {
    ...payloadWithParallelism,
    run: {
      ...payloadWithParallelism.run,
      rows: input.rows,
      // biome-ignore lint/style/useNamingConvention: backend schema
      execution_type: input.kind,
      // biome-ignore lint/style/useNamingConvention: backend schema
      run_config: buildRunConfigPayload(normalizedSettings, input.rows, input.kind),
      // biome-ignore lint/style/useNamingConvention: backend schema
      merge_batches:
        input.kind === "full" &&
        normalizedSettings.batchEnabled &&
        normalizedSettings.mergeBatches,
      // biome-ignore lint/style/useNamingConvention: backend schema
      run_name: input.kind === "full" ? (input.runName ?? null) : null,
    },
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/executions/runtime.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { JobEvent, JobStatusResponse } from "../api";
import type {
  RecipeExecutionBatch,
  RecipeExecutionKind,
  RecipeExecutionRecord,
} from "../execution-types";
import {
  DATASET_PAGE_SIZE,
  mapJobStatus,
  normalizeObject,
} from "./execution-helpers";

const MAX_LOG_LINES = 1500;

function formatEventTime(ts: unknown): string {
  if (typeof ts !== "number" || !Number.isFinite(ts)) {
    return new Date().toLocaleTimeString();
  }
  const ms = ts > 10_000_000_000 ? ts : ts * 1000;
  return new Date(ms).toLocaleTimeString();
}

export function appendExecutionLogLine(lines: string[], nextLine: string): string[] {
  const next = [...lines, nextLine];
  if (next.length <= MAX_LOG_LINES) {
    return next;
  }
  return next.slice(next.length - MAX_LOG_LINES);
}

export function toExecutionLogLine(event: JobEvent): string | null {
  const eventType =
    typeof event.payload.type === "string" ? event.payload.type : event.event;
  const ts = formatEventTime(event.payload.ts);

  if (eventType === "log") {
    const message =
      typeof event.payload.message === "string" ? event.payload.message.trim() : "";
    if (!message) {
      return null;
    }
    const level =
      typeof event.payload.level === "string" && event.payload.level.length > 0
        ? event.payload.level.toUpperCase()
        : "INFO";
    return `[${ts}] [${level}] ${message}`;
  }

  if (eventType === "job.started") {
    return `[${ts}] [INFO] Job started`;
  }
  if (eventType === "job.completed") {
    return `[${ts}] [INFO] Job completed`;
  }
  if (eventType === "job.cancelling") {
    return `[${ts}] [WARN] Cancellation requested`;
  }
  if (eventType === "job.cancelled") {
    return `[${ts}] [WARN] Job cancelled`;
  }
  if (eventType === "job.error") {
    const error =
      typeof event.payload.error === "string" && event.payload.error.length > 0
        ? event.payload.error
        : "Job failed";
    return `[${ts}] [ERROR] ${error}`;
  }

  return null;
}

export function applyExecutionStatusSnapshot(
  execution: RecipeExecutionRecord,
  status: JobStatusResponse,
): RecipeExecutionRecord {
  const mappedStatus = mapJobStatus(status.status);
  const batchRaw = normalizeObject(status.batch);
  const batch: RecipeExecutionBatch | null = batchRaw
    ? {
        idx: typeof batchRaw.idx === "number" ? batchRaw.idx : null,
        total: typeof batchRaw.total === "number" ? batchRaw.total : null,
      }
    : null;
  return {
    ...execution,
    status: mappedStatus,
    rows: status.rows ?? execution.rows,
    stage: status.stage ?? execution.stage,
    current_column: status.current_column ?? null,
    completed_columns: Array.isArray(status.completed_columns)
      ? status.completed_columns.filter(
          (value): value is string => typeof value === "string" && value.trim().length > 0,
        )
      : execution.completed_columns,
    progress: (normalizeObject(status.progress) as RecipeExecutionRecord["progress"]) ?? null,
    column_progress:
      (normalizeObject(status.column_progress) as RecipeExecutionRecord["column_progress"]) ??
      null,
    batch,
    model_usage: normalizeObject(status.model_usage),
    artifact_path: status.artifact_path ?? execution.artifact_path,
    error: status.error ?? null,
    finishedAt:
      mappedStatus === "completed" ||
      mappedStatus === "error" ||
      mappedStatus === "cancelled"
        ? Date.now()
        : null,
  };
}

export function createBaseExecutionRecord(input: {
  recipeId: string;
  kind: RecipeExecutionKind;
  rows: number;
  currentSignature: string;
  runName?: string | null;
}): RecipeExecutionRecord {
  const createdAt = Date.now();
  return {
    id: crypto.randomUUID(),
    recipeId: input.recipeId,
    jobId: null,
    kind: input.kind,
    run_name: input.runName ?? null,
    status: "pending",
    rows: input.rows,
    createdAt,
    finishedAt: null,
    recipeSignature: input.currentSignature,
    stage: "pending",
    current_column: null,
    completed_columns: [],
    progress: null,
    column_progress: null,
    batch: null,
    model_usage: null,
    lastEventId: null,
    artifact_path: null,
    log_lines: [],
    dataset: [],
    datasetTotal: 0,
    datasetPage: 1,
    datasetPageSize: DATASET_PAGE_SIZE,
    analysis: null,
    processor_artifacts: null,
    error: null,
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/executions/tracker.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { toastError, toastSuccess } from "@/shared/toast";
import {
  getRecipeJobAnalysis,
  getRecipeJobDataset,
  getRecipeJobStatus,
  streamRecipeJobEvents,
} from "../api";
import type {
  RecipeExecutionKind,
  RecipeExecutionProgress,
  RecipeExecutionRecord,
  RecipeExecutionStatus,
} from "../execution-types";
import {
  DATASET_PAGE_SIZE,
  delay,
  mapJobStatus,
  normalizeAnalysis,
  normalizeDatasetRows,
  toErrorMessage,
} from "./execution-helpers";
import {
  appendExecutionLogLine,
  applyExecutionStatusSnapshot,
  toExecutionLogLine,
} from "./runtime";

type TrackRecipeExecutionParams = {
  label: string;
  kind: RecipeExecutionKind;
  rows: number;
  jobId: string;
  initialExecution: RecipeExecutionRecord;
  notify: boolean;
  onUpsert: (record: RecipeExecutionRecord) => void;
  onSetPreviewErrors: (errors: string[]) => void;
  onPreviewSuccess?: () => void;
};

function isTerminalStatus(status: RecipeExecutionStatus): boolean {
  return status === "completed" || status === "error" || status === "cancelled";
}

function normalizeCompletedProgress(input: {
  latestExecution: RecipeExecutionRecord;
  rows: number;
}): {
  progress: RecipeExecutionProgress;
  columnProgress: RecipeExecutionProgress | null;
} {
  const { latestExecution, rows } = input;
  const progressTotal =
    typeof latestExecution.progress?.total === "number" && latestExecution.progress.total > 0
      ? latestExecution.progress.total
      : latestExecution.rows > 0
        ? latestExecution.rows
        : rows;

  const progress: RecipeExecutionProgress = {
    ...(latestExecution.progress ?? {}),
    done: progressTotal,
    total: progressTotal,
    percent: 100,
    eta_sec: 0,
  };

  const columnProgress =
    latestExecution.column_progress &&
    typeof latestExecution.column_progress.total === "number" &&
    latestExecution.column_progress.total > 0
      ? {
          ...latestExecution.column_progress,
          done: latestExecution.column_progress.total,
          percent: 100,
          eta_sec: 0,
        }
      : latestExecution.column_progress;

  return { progress, columnProgress };
}

export async function trackRecipeExecution({
  label,
  kind,
  rows,
  jobId,
  initialExecution,
  notify,
  onUpsert,
  onSetPreviewErrors,
  onPreviewSuccess,
}: TrackRecipeExecutionParams): Promise<boolean> {
  let done = false;
  let lastStatus: RecipeExecutionStatus = initialExecution.status;
  let completedEventPayload: Record<string, unknown> | null = null;
  let latestExecution: RecipeExecutionRecord = initialExecution;

  const eventsAbortController = new AbortController();
  void streamRecipeJobEvents({
    jobId,
    signal: eventsAbortController.signal,
    lastEventId: latestExecution.lastEventId,
    onEvent: (event) => {
      let changed = false;

      if (typeof event.id === "number") {
        latestExecution = {
          ...latestExecution,
          lastEventId: event.id,
        };
        changed = true;
      }

      const logLine = toExecutionLogLine(event);
      if (logLine) {
        latestExecution = {
          ...latestExecution,
          log_lines: appendExecutionLogLine(latestExecution.log_lines, logLine),
        };
        changed = true;
      }

      const eventType =
        typeof event.payload.type === "string" ? event.payload.type : event.event;

      if (eventType === "job.started") {
        latestExecution = {
          ...latestExecution,
          status: "active",
        };
        onUpsert(latestExecution);
        return;
      }

      if (eventType === "job.completed") {
        lastStatus = "completed";
        completedEventPayload = event.payload;
        done = true;
        latestExecution = {
          ...latestExecution,
          status: "completed",
          finishedAt: Date.now(),
          artifact_path:
            typeof event.payload.artifact_path === "string"
              ? event.payload.artifact_path
              : latestExecution.artifact_path,
          error: null,
        };
        onUpsert(latestExecution);
        return;
      }

      if (eventType === "job.error") {
        lastStatus = "error";
        done = true;
        latestExecution = {
          ...latestExecution,
          status: "error",
          finishedAt: Date.now(),
          error:
            typeof event.payload.error === "string"
              ? event.payload.error
              : latestExecution.error ?? `${label} failed.`,
        };
        onUpsert(latestExecution);
        return;
      }

      if (eventType === "job.cancelling") {
        latestExecution = {
          ...latestExecution,
          status: "cancelling",
        };
        onUpsert(latestExecution);
        return;
      }

      if (changed) {
        onUpsert(latestExecution);
      }
    },
  }).catch(() => {
    // polling is fallback source of truth
  });

  try {
    while (!done) {
      const status = await getRecipeJobStatus(jobId);
      const mappedStatus = mapJobStatus(status.status);
      lastStatus = mappedStatus;
      latestExecution = applyExecutionStatusSnapshot(latestExecution, status);
      onUpsert(latestExecution);

      done = isTerminalStatus(mappedStatus);
      if (!done) {
        await delay(1200);
      }
    }
  } catch (error) {
    const message = toErrorMessage(error, `${label} failed.`);
    latestExecution = {
      ...latestExecution,
      status: "error",
      error: message,
      finishedAt: Date.now(),
    };
    onUpsert(latestExecution);
    if (notify) {
      toastError(`${label} failed`, message);
    }
    return false;
  } finally {
    eventsAbortController.abort();
  }

  if (lastStatus === "completed") {
    for (let attempt = 0; attempt < 3; attempt += 1) {
      try {
        const finalStatus = await getRecipeJobStatus(jobId);
        latestExecution = applyExecutionStatusSnapshot(latestExecution, finalStatus);
      } catch {
        break;
      }
      if (attempt < 2) {
        await delay(250);
      }
    }

    const eventAnalysis = completedEventPayload
      ? completedEventPayload["analysis"]
      : null;
    const eventDataset = completedEventPayload
      ? completedEventPayload["dataset"]
      : null;
    const eventProcessorArtifacts =
      completedEventPayload &&
      typeof completedEventPayload["processor_artifacts"] === "object" &&
      completedEventPayload["processor_artifacts"] !== null
        ? (completedEventPayload["processor_artifacts"] as Record<string, unknown>)
        : null;
    const shouldFetchPreviewDataset = kind === "preview" && !Array.isArray(eventDataset);
    const shouldFetchAnalysis =
      !completedEventPayload ||
      typeof eventAnalysis !== "object" ||
      eventAnalysis === null ||
      kind === "full";

    const [analysisResult, datasetResult] = await Promise.allSettled([
      shouldFetchAnalysis
        ? getRecipeJobAnalysis(jobId)
        : Promise.resolve(eventAnalysis),
      shouldFetchPreviewDataset || kind === "full"
        ? getRecipeJobDataset(jobId, { limit: DATASET_PAGE_SIZE, offset: 0 })
        : Promise.resolve({ dataset: eventDataset ?? [], total: rows }),
    ]);

    const analysis =
      analysisResult.status === "fulfilled"
        ? normalizeAnalysis(analysisResult.value)
        : latestExecution.analysis;
    const datasetResponse =
      datasetResult.status === "fulfilled"
        ? datasetResult.value
        : null;
    const dataset = datasetResponse
      ? normalizeDatasetRows(datasetResponse.dataset)
      : latestExecution.dataset;
    const datasetTotal =
      datasetResponse && typeof datasetResponse.total === "number"
        ? datasetResponse.total
        : latestExecution.datasetTotal;
    const completedProgress = normalizeCompletedProgress({ latestExecution, rows });

    latestExecution = {
      ...latestExecution,
      status: "completed",
      progress: completedProgress.progress,
      column_progress: completedProgress.columnProgress,
      analysis,
      dataset,
      datasetTotal,
      datasetPage: 1,
      datasetPageSize: DATASET_PAGE_SIZE,
      error: null,
      processor_artifacts: eventProcessorArtifacts ?? latestExecution.processor_artifacts,
      finishedAt: latestExecution.finishedAt ?? Date.now(),
    };
    onUpsert(latestExecution);

    if (notify) {
      if (kind === "preview") {
        onSetPreviewErrors([]);
        onPreviewSuccess?.();
        toastSuccess(`Preview generated (${rows} rows).`);
      } else {
        toastSuccess("Full run completed.");
      }
    }
    return true;
  }

  if (lastStatus === "cancelled") {
    latestExecution = {
      ...latestExecution,
      status: "cancelled",
      error: latestExecution.error ?? "Run cancelled.",
      finishedAt: latestExecution.finishedAt ?? Date.now(),
    };
    onUpsert(latestExecution);
    if (notify) {
      toastError(`${label} cancelled`, "The execution was cancelled.");
    }
    return false;
  }

  latestExecution = {
    ...latestExecution,
    status: "error",
    error: latestExecution.error ?? `${label} failed.`,
    finishedAt: latestExecution.finishedAt ?? Date.now(),
  };
  onUpsert(latestExecution);
  if (notify) {
    toastError(`${label} failed`, latestExecution.error ?? "Execution failed.");
  }
  return false;
}


================================================
FILE: studio/frontend/src/features/recipe-studio/hooks/use-node-connection-status.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useMemo } from "react";
import { useRecipeStudioStore } from "../stores/recipe-studio";
import { INFRA_NODE_KINDS, type NodeConfig } from "../types";

type ConnectionStatus = {
  /** True when the node has zero edges at all. */
  isDisconnected: boolean;
  /** True when an LLM node has no incoming data edge (only infra). */
  missingDataInput: boolean;
};

export function useNodeConnectionStatus(
  nodeId: string,
  config: NodeConfig | undefined,
): ConnectionStatus {
  const edges = useRecipeStudioStore((state) => state.edges);
  const configs = useRecipeStudioStore((state) => state.configs);

  return useMemo(() => {
    const empty: ConnectionStatus = {
      isDisconnected: false,
      missingDataInput: false,
    };

    if (!config || config.kind === "markdown_note") {
      return empty;
    }

    const nodeEdges = edges.filter(
      (e) => e.source === nodeId || e.target === nodeId,
    );
    const isDisconnected = nodeEdges.length === 0;

    let missingDataInput = false;
    if (config.kind === "llm" && !isDisconnected) {
      const hasDataEdge = nodeEdges.some((e) => {
        const otherId = e.source === nodeId ? e.target : e.source;
        const otherConfig = configs[otherId];
        return otherConfig && !INFRA_NODE_KINDS.has(otherConfig.kind);
      });
      missingDataInput = !hasDataEdge;
    }

    return {
      isDisconnected,
      missingDataInput,
    };
  }, [nodeId, config, edges, configs]);
}


================================================
FILE: studio/frontend/src/features/recipe-studio/hooks/use-recipe-editor-graph.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  Edge,
  EdgeChange,
  Node,
  NodeChange,
  ReactFlowInstance,
  XYPosition,
} from "@xyflow/react";
import {
  type DragEvent as ReactDragEvent,
  type RefObject,
  useCallback,
  useMemo,
} from "react";
import { RECIPE_BLOCK_DND_MIME, type RecipeBlockDragPayload } from "../components/block-sheet";
import type { SeedBlockType } from "../blocks/registry";
import type {
  LlmType,
  NodeConfig,
  RecipeNode as RecipeBuilderNode,
  RecipeNodeData,
  SamplerType,
} from "../types";
import { applyAuxNodeChanges, filterEdgeChangesByIds, filterNodeChangesByIds } from "../utils/reactflow-changes";
import type { RecipeGraphAuxNodeData } from "../components/recipe-graph-aux-node";

const SUPPORTED_DRAG_KINDS: RecipeBlockDragPayload["kind"][] = [
  "sampler",
  "seed",
  "llm",
  "validator",
  "expression",
  "note",
];

function parseRecipeBlockDragPayload(raw: string): RecipeBlockDragPayload | null {
  try {
    const parsed = JSON.parse(raw) as {
      kind?: RecipeBlockDragPayload["kind"];
      type?: RecipeBlockDragPayload["type"];
    };
    if (!parsed.kind || !parsed.type || !SUPPORTED_DRAG_KINDS.includes(parsed.kind)) {
      return null;
    }
    return {
      kind: parsed.kind,
      type: parsed.type,
    };
  } catch {
    return null;
  }
}

type UseRecipeEditorGraphArgs = {
  nodes: RecipeBuilderNode[];
  edges: Edge[];
  configs: Record<string, NodeConfig>;
  reactFlowInstance: ReactFlowInstance<Node<RecipeNodeData | RecipeGraphAuxNodeData>, Edge> | null;
  flowContainerRef: RefObject<HTMLDivElement | null>;
  selectConfig: (id: string) => void;
  openConfig: (id: string) => void;
  onNodesChange: (changes: NodeChange<RecipeBuilderNode>[]) => void;
  onEdgesChange: (changes: EdgeChange<Edge>[]) => void;
  setAuxNodePosition: (id: string, position: XYPosition) => void;
  addSamplerNode: (type: SamplerType, position?: XYPosition, openDialog?: boolean) => void;
  addSeedNode: (type: SeedBlockType, position?: XYPosition, openDialog?: boolean) => void;
  addLlmNode: (type: LlmType, position?: XYPosition, openDialog?: boolean) => void;
  addModelProviderNode: (position?: XYPosition, openDialog?: boolean) => void;
  addModelConfigNode: (position?: XYPosition, openDialog?: boolean) => void;
  addToolProfileNode: (position?: XYPosition, openDialog?: boolean) => void;
  addExpressionNode: (position?: XYPosition, openDialog?: boolean) => void;
  addValidatorNode: (
    type: "validator_python" | "validator_sql" | "validator_oxc",
    position?: XYPosition,
    openDialog?: boolean,
  ) => void;
  addMarkdownNoteNode: (position?: XYPosition, openDialog?: boolean) => void;
};

type UseRecipeEditorGraphResult = {
  handleNodeClick: (_: unknown, node: Node<RecipeNodeData | RecipeGraphAuxNodeData>) => void;
  handleNodeDoubleClick: (_: unknown, node: Node<RecipeNodeData | RecipeGraphAuxNodeData>) => void;
  handleNodesChange: (
    changes: NodeChange<Node<RecipeNodeData | RecipeGraphAuxNodeData>>[],
  ) => void;
  handleEdgesChange: (changes: EdgeChange<Edge>[]) => void;
  handleDragOver: (event: ReactDragEvent<HTMLDivElement>) => void;
  handleDrop: (event: ReactDragEvent<HTMLDivElement>) => void;
  handleAddSamplerFromSheet: (type: SamplerType) => void;
  handleAddSeedFromSheet: (type: SeedBlockType) => void;
  handleAddLlmFromSheet: (type: LlmType) => void;
  handleAddModelProviderFromSheet: () => void;
  handleAddModelConfigFromSheet: () => void;
  handleAddToolProfileFromSheet: () => void;
  handleAddExpressionFromSheet: () => void;
  handleAddValidatorFromSheet: (
    type: "validator_python" | "validator_sql" | "validator_oxc",
  ) => void;
  handleAddMarkdownNoteFromSheet: () => void;
};

export function useRecipeEditorGraph({
  nodes,
  edges,
  configs,
  reactFlowInstance,
  flowContainerRef,
  selectConfig,
  openConfig,
  onNodesChange,
  onEdgesChange,
  setAuxNodePosition,
  addSamplerNode,
  addSeedNode,
  addLlmNode,
  addModelProviderNode,
  addModelConfigNode,
  addToolProfileNode,
  addExpressionNode,
  addValidatorNode,
  addMarkdownNoteNode,
}: UseRecipeEditorGraphArgs): UseRecipeEditorGraphResult {
  const baseNodeIds = useMemo(() => new Set(nodes.map((node) => node.id)), [nodes]);
  const baseEdgeIds = useMemo(() => new Set(edges.map((edge) => edge.id)), [edges]);

  const handleNodeClick = useCallback(
    (_: unknown, node: Node<RecipeNodeData | RecipeGraphAuxNodeData>) => {
      if (node.type !== "builder") {
        return;
      }
      selectConfig(node.id);
    },
    [selectConfig],
  );

  const handleNodeDoubleClick = useCallback(
    (_: unknown, node: Node<RecipeNodeData | RecipeGraphAuxNodeData>) => {
      if (node.type !== "builder") {
        return;
      }
      const nodeConfig = configs[node.id];
      if (nodeConfig?.kind === "markdown_note") {
        openConfig(node.id);
      }
    },
    [configs, openConfig],
  );

  const handleNodesChange = useCallback(
    (changes: NodeChange<Node<RecipeNodeData | RecipeGraphAuxNodeData>>[]) => {
      applyAuxNodeChanges(changes, { setAuxNodePosition });
      const next = filterNodeChangesByIds(
        changes as NodeChange<RecipeBuilderNode>[],
        baseNodeIds,
      );
      if (next.length) {
        onNodesChange(next);
      }
    },
    [baseNodeIds, onNodesChange, setAuxNodePosition],
  );

  const handleEdgesChange = useCallback(
    (changes: EdgeChange<Edge>[]) => {
      const next = filterEdgeChangesByIds(changes, baseEdgeIds);
      if (next.length) {
        onEdgesChange(next);
      }
    },
    [baseEdgeIds, onEdgesChange],
  );

  const handleDragOver = useCallback((event: ReactDragEvent<HTMLDivElement>) => {
    if (
      !event.dataTransfer.types.includes(RECIPE_BLOCK_DND_MIME) &&
      !event.dataTransfer.types.includes("text/plain")
    ) {
      return;
    }
    event.preventDefault();
    event.dataTransfer.dropEffect = "copy";
  }, []);

  const handleDrop = useCallback(
    (event: ReactDragEvent<HTMLDivElement>) => {
      if (!reactFlowInstance) {
        return;
      }
      const raw =
        event.dataTransfer.getData(RECIPE_BLOCK_DND_MIME) ||
        event.dataTransfer.getData("text/plain");
      if (!raw) {
        return;
      }
      const payload = parseRecipeBlockDragPayload(raw);
      if (!payload) {
        return;
      }
      event.preventDefault();
      const position = reactFlowInstance.screenToFlowPosition({
        x: event.clientX,
        y: event.clientY,
      });

      if (payload.kind === "sampler") {
        addSamplerNode(payload.type as SamplerType, position, false);
        return;
      }
      if (payload.kind === "seed") {
        addSeedNode(payload.type as SeedBlockType, position, false);
        return;
      }
      if (payload.kind === "expression") {
        addExpressionNode(position, false);
        return;
      }
      if (payload.kind === "validator") {
        addValidatorNode(
          payload.type as "validator_python" | "validator_sql" | "validator_oxc",
          position,
          false,
        );
        return;
      }
      if (payload.kind === "note") {
        addMarkdownNoteNode(position, false);
        return;
      }
      if (payload.type === "model_provider") {
        addModelProviderNode(position, false);
        return;
      }
      if (payload.type === "model_config") {
        addModelConfigNode(position, false);
        return;
      }
      if (payload.type === "tool_config") {
        addToolProfileNode(position, false);
        return;
      }
      addLlmNode(payload.type as LlmType, position, false);
    },
    [
      addExpressionNode,
      addLlmNode,
      addMarkdownNoteNode,
      addModelConfigNode,
      addModelProviderNode,
      addToolProfileNode,
      addSamplerNode,
      addSeedNode,
      addValidatorNode,
      reactFlowInstance,
    ],
  );

  const getViewportCenterPosition = useCallback(() => {
    if (!reactFlowInstance || !flowContainerRef.current) {
      return undefined;
    }
    const rect = flowContainerRef.current.getBoundingClientRect();
    return reactFlowInstance.screenToFlowPosition({
      x: rect.left + rect.width / 2,
      y: rect.top + rect.height / 2,
    });
  }, [flowContainerRef, reactFlowInstance]);

  const handleAddSamplerFromSheet = useCallback(
    (type: SamplerType) => {
      addSamplerNode(type, getViewportCenterPosition());
    },
    [addSamplerNode, getViewportCenterPosition],
  );

  const handleAddSeedFromSheet = useCallback(
    (type: SeedBlockType) => {
      addSeedNode(type, getViewportCenterPosition());
    },
    [addSeedNode, getViewportCenterPosition],
  );

  const handleAddLlmFromSheet = useCallback(
    (type: LlmType) => {
      addLlmNode(type, getViewportCenterPosition());
    },
    [addLlmNode, getViewportCenterPosition],
  );

  const handleAddModelProviderFromSheet = useCallback(() => {
    addModelProviderNode(getViewportCenterPosition());
  }, [addModelProviderNode, getViewportCenterPosition]);

  const handleAddModelConfigFromSheet = useCallback(() => {
    addModelConfigNode(getViewportCenterPosition());
  }, [addModelConfigNode, getViewportCenterPosition]);

  const handleAddExpressionFromSheet = useCallback(() => {
    addExpressionNode(getViewportCenterPosition());
  }, [addExpressionNode, getViewportCenterPosition]);

  const handleAddToolProfileFromSheet = useCallback(() => {
    addToolProfileNode(getViewportCenterPosition());
  }, [addToolProfileNode, getViewportCenterPosition]);

  const handleAddValidatorFromSheet = useCallback(
    (type: "validator_python" | "validator_sql" | "validator_oxc") => {
      addValidatorNode(type, getViewportCenterPosition());
    },
    [addValidatorNode, getViewportCenterPosition],
  );

  const handleAddMarkdownNoteFromSheet = useCallback(() => {
    addMarkdownNoteNode(getViewportCenterPosition());
  }, [addMarkdownNoteNode, getViewportCenterPosition]);

  return {
    handleNodeClick,
    handleNodeDoubleClick,
    handleNodesChange,
    handleEdgesChange,
    handleDragOver,
    handleDrop,
    handleAddSamplerFromSheet,
    handleAddSeedFromSheet,
    handleAddLlmFromSheet,
    handleAddModelProviderFromSheet,
    handleAddModelConfigFromSheet,
    handleAddToolProfileFromSheet,
    handleAddExpressionFromSheet,
    handleAddValidatorFromSheet,
    handleAddMarkdownNoteFromSheet,
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/hooks/use-recipe-executions.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useCallback, useEffect, useState } from "react";
import { useShallow } from "zustand/react/shallow";
import { toastError } from "@/shared/toast";
import {
  cancelRecipeJob,
  createRecipeJob,
  getRecipeJobDataset,
  validateRecipe,
} from "../api";
import { saveRecipeExecution } from "../data/executions-db";
import type {
  RecipeExecutionKind,
  RecipeExecutionRecord,
} from "../execution-types";
import {
  DATASET_PAGE_SIZE,
  executionLabel,
  normalizeRunName,
  normalizeDatasetRows,
  toErrorMessage,
  withExecutionDefaults,
} from "../executions/execution-helpers";
import {
  findResumableExecution,
  loadSortedRecipeExecutions,
} from "../executions/hydration";
import { createBaseExecutionRecord } from "../executions/runtime";
import {
  buildExecutionPayload,
  sanitizeExecutionRows,
} from "../executions/run-settings";
import { trackRecipeExecution } from "../executions/tracker";
import {
  type RecipeRunSettings,
  useRecipeExecutionsStore,
} from "../stores/recipe-executions";
import type { RecipePayload, RecipePayloadResult } from "../utils/payload/types";

type UseRecipeExecutionsParams = {
  recipeId: string;
  currentSignature: string;
  payloadResult: RecipePayloadResult;
  onExecutionStart?: () => void;
  onPreviewSuccess?: () => void;
};

type UseRecipeExecutionsResult = {
  runDialogOpen: boolean;
  runDialogKind: RecipeExecutionKind;
  setRunDialogKind: (kind: RecipeExecutionKind) => void;
  setRunDialogOpen: (open: boolean) => void;
  previewRows: number;
  fullRows: number;
  fullRunName: string;
  setPreviewRows: (rows: number) => void;
  setFullRows: (rows: number) => void;
  setFullRunName: (name: string) => void;
  runErrors: string[];
  runSettings: RecipeRunSettings;
  setRunSettings: (patch: Partial<RecipeRunSettings>) => void;
  previewLoading: boolean;
  fullLoading: boolean;
  executions: RecipeExecutionRecord[];
  selectedExecutionId: string | null;
  setSelectedExecutionId: (id: string) => void;
  openRunDialog: (kind: RecipeExecutionKind) => void;
  runFromDialog: () => Promise<boolean>;
  validateFromDialog: () => Promise<boolean>;
  validateLoading: boolean;
  validateResult: {
    valid: boolean;
    errors: string[];
    rawDetail: string | null;
  } | null;
  runPreview: () => Promise<boolean>;
  runFull: () => Promise<boolean>;
  cancelExecution: (id: string) => Promise<void>;
  loadExecutionDatasetPage: (id: string, page: number) => Promise<void>;
};

function formatValidationMessages(input: {
  errors: Array<{ message: string; path?: string | null; code?: string | null }>;
}): string[] {
  return input.errors.map((item) => {
    const path = item.path?.trim();
    const code = item.code?.trim();
    const prefix = [
      code ? code.toUpperCase() : null,
      path ? `column ${path}` : null,
    ]
      .filter(Boolean)
      .join(" · ");
    return prefix ? `${prefix}: ${item.message}` : item.message;
  });
}

export function useRecipeExecutions({
  recipeId,
  currentSignature,
  payloadResult,
  onExecutionStart,
  onPreviewSuccess,
}: UseRecipeExecutionsParams): UseRecipeExecutionsResult {
  const [validateLoading, setValidateLoading] = useState(false);
  const [validateResult, setValidateResult] = useState<{
    valid: boolean;
    errors: string[];
    rawDetail: string | null;
  } | null>(null);
  const {
    runDialogOpen,
    runDialogKind,
    previewRows,
    fullRows,
    fullRunName,
    runErrors,
    runSettings,
    previewLoading,
    fullLoading,
    executions,
    selectedExecutionId,
    setRunDialogOpen,
    setRunDialogKind,
    setPreviewRows,
    setFullRows,
    setFullRunName,
    setRunErrors,
    setRunSettings,
    setPreviewLoading,
    setFullLoading,
    setExecutions,
    upsertExecution,
    selectExecution,
    resetForRecipe,
  } = useRecipeExecutionsStore(
    useShallow((state) => ({
      runDialogOpen: state.runDialogOpen,
      runDialogKind: state.runDialogKind,
      previewRows: state.previewRows,
      fullRows: state.fullRows,
      fullRunName: state.fullRunName,
      runErrors: state.runErrors,
      runSettings: state.runSettings,
      previewLoading: state.previewLoading,
      fullLoading: state.fullLoading,
      executions: state.executions,
      selectedExecutionId: state.selectedExecutionId,
      setRunDialogOpen: state.setRunDialogOpen,
      setRunDialogKind: state.setRunDialogKind,
      setPreviewRows: state.setPreviewRows,
      setFullRows: state.setFullRows,
      setFullRunName: state.setFullRunName,
      setRunErrors: state.setRunErrors,
      setRunSettings: state.setRunSettings,
      setPreviewLoading: state.setPreviewLoading,
      setFullLoading: state.setFullLoading,
      setExecutions: state.setExecutions,
      upsertExecution: state.upsertExecution,
      selectExecution: state.selectExecution,
      resetForRecipe: state.resetForRecipe,
    })),
  );
  const payloadErrorMessage = payloadResult.errors[0] ?? "Invalid payload.";

  const upsertAndPersist = useCallback(
    (record: RecipeExecutionRecord): void => {
      const normalizedRecord = withExecutionDefaults(record);
      upsertExecution(normalizedRecord);
      void saveRecipeExecution(normalizedRecord).catch((error) => {
        console.error("Save recipe execution failed:", error);
      });
    },
    [upsertExecution],
  );

  useEffect(() => {
    let cancelled = false;

    resetForRecipe();

    async function hydrate(): Promise<void> {
      try {
        const records = await loadSortedRecipeExecutions(recipeId);
        if (cancelled) {
          return;
        }

        setExecutions(records);
        const resumable = findResumableExecution(records);
        if (!resumable?.jobId) {
          return;
        }

        void trackRecipeExecution({
          label: executionLabel(resumable.kind),
          kind: resumable.kind,
          rows: resumable.rows,
          jobId: resumable.jobId,
          initialExecution: resumable,
          notify: false,
          onUpsert: upsertAndPersist,
          onSetPreviewErrors: setRunErrors,
          onPreviewSuccess,
        });
      } catch (error) {
        console.error("Load recipe executions failed:", error);
      }
    }

    void hydrate();

    return () => {
      cancelled = true;
    };
  }, [
    onPreviewSuccess,
    recipeId,
    resetForRecipe,
    setExecutions,
    setRunErrors,
    upsertAndPersist,
  ]);

  const readPayload = useCallback((): RecipePayload | null => {
    if (payloadResult.errors.length === 0) {
      return payloadResult.payload;
    }
    return null;
  }, [payloadResult.errors.length, payloadResult.payload]);

  const readExecutablePayload = useCallback((): RecipePayload | null => {
    const payload = readPayload();
    if (payload) {
      return payload;
    }

    setRunErrors(payloadResult.errors);
    toastError("Invalid recipe payload", payloadErrorMessage);
    return null;
  }, [payloadErrorMessage, payloadResult.errors, readPayload, setRunErrors]);

  const runExecution = useCallback(
    async (input: {
      kind: RecipeExecutionKind;
      payload: RecipePayload;
      rows: number;
      settings: RecipeRunSettings;
      runName: string | null;
    }): Promise<boolean> => {
      const { kind, payload, rows, settings, runName } = input;
      const setLoading = kind === "preview" ? setPreviewLoading : setFullLoading;
      const label = executionLabel(kind);

      setLoading(true);
      const baseExecution = createBaseExecutionRecord({
        recipeId,
        kind,
        rows,
        currentSignature,
        runName,
      });

      upsertAndPersist(baseExecution);
      onExecutionStart?.();
      setRunDialogOpen(false);

      try {
        const jobPayload = buildExecutionPayload({
          payload,
          kind,
          rows,
          settings,
          runName,
        });
        const createdJob = await createRecipeJob(jobPayload);
        const executionWithJob = {
          ...baseExecution,
          jobId: createdJob.job_id,
        };
        upsertAndPersist(executionWithJob);

        return await trackRecipeExecution({
          label,
          kind,
          rows,
          jobId: createdJob.job_id,
          initialExecution: executionWithJob,
          notify: true,
          onUpsert: upsertAndPersist,
          onSetPreviewErrors: setRunErrors,
          onPreviewSuccess,
        });
      } catch (error) {
        const message = toErrorMessage(error, `${label} request failed.`);
        upsertAndPersist({
          ...baseExecution,
          status: "error",
          error: message,
          finishedAt: Date.now(),
        });
        setRunErrors([message]);
        toastError(`${label} failed`, message);
        return false;
      } finally {
        setLoading(false);
      }
    },
    [
      currentSignature,
      onExecutionStart,
      onPreviewSuccess,
      recipeId,
      setFullLoading,
      setPreviewLoading,
      setRunDialogOpen,
      setRunErrors,
      upsertAndPersist,
    ],
  );

  const runWithValidation = useCallback(
    async (
      kind: RecipeExecutionKind,
      rows: number,
      runName: string | null,
    ): Promise<boolean> => {
      const trimmedRunName = typeof runName === "string" ? runName.trim() : "";
      if (kind === "full" && !trimmedRunName) {
        const message = "Run name required for full runs.";
        setRunErrors([message]);
        toastError("Run name required", message);
        return false;
      }

      const payload = readExecutablePayload();
      if (!payload) {
        return false;
      }

      const normalizedRows = sanitizeExecutionRows(rows, kind);
      const executionPayload = buildExecutionPayload({
        payload,
        kind,
        rows: normalizedRows,
        settings: runSettings,
        runName,
      });

      try {
        const validation = await validateRecipe(executionPayload);
        if (!validation.valid) {
          const errors = formatValidationMessages({ errors: validation.errors });
          const fallback = validation.raw_detail ?? "Validation failed.";
          const nextErrors = errors.length > 0 ? errors : [fallback];
          setRunErrors(nextErrors);
          toastError("Validation failed", nextErrors[0]);
          return false;
        }
      } catch (error) {
        const message = toErrorMessage(error, "Validation failed.");
        setRunErrors([message]);
        toastError("Validation failed", message);
        return false;
      }

      return runExecution({
        kind,
        payload,
        rows: normalizedRows,
        settings: runSettings,
        runName,
      });
    },
    [readExecutablePayload, runExecution, runSettings, setRunErrors],
  );

  const runPreview = useCallback(async (): Promise<boolean> => {
    return runWithValidation("preview", previewRows, null);
  }, [previewRows, runWithValidation]);

  const runFull = useCallback(async (): Promise<boolean> => {
    return runWithValidation("full", fullRows, fullRunName);
  }, [fullRows, fullRunName, runWithValidation]);

  const runFromDialog = useCallback(async (): Promise<boolean> => {
    setValidateResult(null);
    if (runDialogKind === "preview") {
      return runPreview();
    }
    return runFull();
  }, [runDialogKind, runFull, runPreview]);

  const validateFromDialog = useCallback(async (): Promise<boolean> => {
    setRunErrors([]);
    const payload = readPayload();
    if (!payload) {
      const nextErrors = payloadResult.errors.length > 0
        ? payloadResult.errors
        : [payloadErrorMessage];
      setValidateResult({
        valid: false,
        errors: nextErrors,
        rawDetail: null,
      });
      return false;
    }

    const rows = runDialogKind === "preview" ? previewRows : fullRows;
    const normalizedRows = sanitizeExecutionRows(rows, runDialogKind);
    const executionPayload = buildExecutionPayload({
      payload,
      kind: runDialogKind,
      rows: normalizedRows,
      settings: runSettings,
      runName: runDialogKind === "full" ? normalizeRunName(fullRunName) : null,
    });

    setValidateLoading(true);
    try {
      const validation = await validateRecipe(executionPayload);
      const errors = formatValidationMessages({ errors: validation.errors });
      setValidateResult({
        valid: validation.valid,
        errors,
        rawDetail: validation.raw_detail ?? null,
      });
      return validation.valid;
    } catch (error) {
      const message = toErrorMessage(error, "Validation failed.");
      setValidateResult({
        valid: false,
        errors: [message],
        rawDetail: null,
      });
      return false;
    } finally {
      setValidateLoading(false);
    }
  }, [
    fullRunName,
    fullRows,
    payloadErrorMessage,
    payloadResult.errors,
    previewRows,
    readPayload,
    runDialogKind,
    runSettings,
    setRunErrors,
  ]);

  const openRunDialog = useCallback(
    (kind: RecipeExecutionKind): void => {
      setRunErrors([]);
      setValidateResult(null);
      setRunDialogKind(kind);
      if (kind === "full") {
        const payload = readPayload();
        const payloadRows = Number(payload?.run?.rows);
        if (Number.isFinite(payloadRows) && payloadRows > 0) {
          setFullRows(Math.floor(payloadRows));
        }
      }
      setRunDialogOpen(true);
    },
    [
      readPayload,
      setFullRows,
      setRunDialogKind,
      setRunDialogOpen,
      setRunErrors,
    ],
  );

  const cancelExecution = useCallback(
    async (id: string): Promise<void> => {
      const execution = executions.find((entry) => entry.id === id);
      if (!execution?.jobId) {
        return;
      }
      try {
        await cancelRecipeJob(execution.jobId);
        upsertAndPersist({
          ...execution,
          status: "cancelling",
        });
      } catch (error) {
        const message = toErrorMessage(error, "Could not cancel execution.");
        toastError("Cancel failed", message);
      }
    },
    [executions, upsertAndPersist],
  );

  const loadExecutionDatasetPage = useCallback(
    async (id: string, page: number): Promise<void> => {
      const execution = executions.find((entry) => entry.id === id);
      if (!execution || execution.kind !== "full" || !execution.jobId || page < 1) {
        return;
      }

      const pageSize = execution.datasetPageSize || DATASET_PAGE_SIZE;
      const offset = (page - 1) * pageSize;
      try {
        const response = await getRecipeJobDataset(execution.jobId, {
          limit: pageSize,
          offset,
        });
        const dataset = normalizeDatasetRows(response.dataset);
        const total =
          typeof response.total === "number" ? response.total : execution.datasetTotal;
        upsertAndPersist({
          ...execution,
          dataset,
          datasetTotal: total,
          datasetPage: page,
        });
      } catch (error) {
        const message = toErrorMessage(error, "Could not load dataset page.");
        toastError("Dataset page failed", message);
      }
    },
    [executions, upsertAndPersist],
  );

  const setSelectedExecutionId = useCallback(
    (id: string): void => {
      selectExecution(id);
    },
    [selectExecution],
  );

  return {
    runDialogOpen,
    runDialogKind,
    setRunDialogKind,
    setRunDialogOpen,
    previewRows,
    fullRows,
    fullRunName,
    setPreviewRows,
    setFullRows,
    setFullRunName,
    runErrors,
    runSettings,
    setRunSettings,
    previewLoading,
    fullLoading,
    executions,
    selectedExecutionId,
    setSelectedExecutionId,
    openRunDialog,
    runFromDialog,
    validateFromDialog,
    validateLoading,
    validateResult,
    runPreview,
    runFull,
    cancelExecution,
    loadExecutionDatasetPage,
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/hooks/use-recipe-persistence.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useCallback, useEffect, useMemo, useState } from "react";
import { toastError, toastSuccess } from "@/shared/toast";
import { normalizeNonEmptyName } from "@/utils";
import {
  buildSignature,
  copyTextToClipboard,
  formatSavedLabel,
} from "../executions/execution-helpers";
import { importRecipePayload, type RecipeSnapshot } from "../utils/import";
import type { RecipePayloadResult } from "../utils/payload/types";

type SaveTone = "success" | "error";

type PersistRecipeFn = (input: {
  id: string | null;
  name: string;
  payload: RecipePayloadResult["payload"];
}) => Promise<{
  id: string;
  updatedAt: number;
}>;

type UseRecipePersistenceParams = {
  recipeId: string;
  initialRecipeName: string;
  initialPayload: RecipePayloadResult["payload"];
  initialSavedAt: number;
  payloadResult: RecipePayloadResult;
  onPersistRecipe: PersistRecipeFn;
  resetRecipe: () => void;
  loadRecipe: (snapshot: RecipeSnapshot) => void;
  getCurrentPayloadFromStore: () => RecipePayloadResult["payload"];
};

type UseRecipePersistenceResult = {
  initialRecipeReady: boolean;
  workflowName: string;
  setWorkflowName: (value: string) => void;
  saveLoading: boolean;
  saveTone: SaveTone;
  savedAtLabel: string;
  copied: boolean;
  importOpen: boolean;
  setImportOpen: (open: boolean) => void;
  currentSignature: string;
  persistRecipe: () => Promise<void>;
  copyRecipe: () => Promise<void>;
  importRecipe: (value: string) => string | null;
};

function stripApiKeys(value: unknown): unknown {
  if (Array.isArray(value)) {
    return value.map(stripApiKeys);
  }
  if (!value || typeof value !== "object") {
    return value;
  }
  const output: Record<string, unknown> = {};
  for (const [key, entry] of Object.entries(value)) {
    if (key === "api_key") {
      continue;
    }
    output[key] = stripApiKeys(entry);
  }
  if (
    output.provider_type === "stdio" &&
    output.env &&
    typeof output.env === "object" &&
    !Array.isArray(output.env)
  ) {
    output.env = Object.fromEntries(
      Object.keys(output.env as Record<string, unknown>).map((envKey) => [envKey, ""]),
    );
  }
  return output;
}

function inferHfRepoIdFromPath(pathValue: unknown): string {
  if (typeof pathValue !== "string") {
    return "";
  }
  const parts = pathValue
    .trim()
    .split("/")
    .filter(Boolean);
  if (parts.length >= 3 && parts[0] === "datasets") {
    return `${parts[1]}/${parts[2]}`;
  }
  if (parts.length >= 2) {
    return `${parts[0]}/${parts[1]}`;
  }
  return "";
}

function sanitizeSeedForShare(payload: unknown): unknown {
  if (!payload || typeof payload !== "object") {
    return payload;
  }
  const root = payload as Record<string, unknown>;
  const recipe =
    root.recipe && typeof root.recipe === "object"
      ? (root.recipe as Record<string, unknown>)
      : null;
  const ui =
    root.ui && typeof root.ui === "object"
      ? (root.ui as Record<string, unknown>)
      : null;

  const seedConfig =
    recipe?.seed_config && typeof recipe.seed_config === "object"
      ? (recipe.seed_config as Record<string, unknown>)
      : null;
  const source =
    seedConfig?.source && typeof seedConfig.source === "object"
      ? (seedConfig.source as Record<string, unknown>)
      : null;

  if (source && "token" in source) {
    delete source.token;
  }

  const uiSourceType =
    typeof ui?.seed_source_type === "string" ? ui.seed_source_type : null;
  const sourceType =
    typeof source?.seed_type === "string" ? source.seed_type : null;
  const shouldResetHfState =
    sourceType === "hf" || uiSourceType === "hf";
  const shouldResetLocalState =
    sourceType === "local" ||
    sourceType === "unstructured" ||
    uiSourceType === "local" ||
    uiSourceType === "unstructured";

  if (shouldResetHfState) {
    const repoId = inferHfRepoIdFromPath(source?.path);
    if (source && "path" in source) {
      source.path = repoId;
    }
    if (ui) {
      ui.seed_columns = [];
      ui.seed_drop_columns = [];
      ui.seed_preview_rows = [];
      ui.local_file_name = "";
      ui.unstructured_file_name = "";
    }
  }

  if (shouldResetLocalState) {
    if (source && "path" in source) {
      source.path = "";
    }
    if (ui) {
      ui.seed_columns = [];
      ui.seed_drop_columns = [];
      ui.seed_preview_rows = [];
      ui.local_file_name = "";
      ui.unstructured_file_name = "";
    }
  }

  return root;
}

export function useRecipePersistence({
  recipeId,
  initialRecipeName,
  initialPayload,
  initialSavedAt,
  payloadResult,
  onPersistRecipe,
  resetRecipe,
  loadRecipe,
  getCurrentPayloadFromStore,
}: UseRecipePersistenceParams): UseRecipePersistenceResult {
  const [initialRecipeReady, setInitialRecipeReady] = useState(false);
  const [workflowName, setWorkflowName] = useState("Unnamed");
  const [lastSavedAt, setLastSavedAt] = useState<number | null>(null);
  const [savedSignature, setSavedSignature] = useState("");
  const [saveLoading, setSaveLoading] = useState(false);
  const [copied, setCopied] = useState(false);
  const [importOpen, setImportOpen] = useState(false);

  const normalizedWorkflowName = useMemo(
    () => normalizeNonEmptyName(workflowName, "Unnamed"),
    [workflowName],
  );
  const currentPayload = payloadResult.payload;
  const currentSignature = useMemo(
    () => buildSignature(normalizedWorkflowName, currentPayload),
    [currentPayload, normalizedWorkflowName],
  );
  const isDirty = savedSignature.length > 0 && currentSignature !== savedSignature;
  const saveTone: SaveTone = !isDirty && Boolean(lastSavedAt) ? "success" : "error";
  const savedAtLabel = formatSavedLabel(lastSavedAt);

  useEffect(() => {
    setInitialRecipeReady(false);
    const nextName = normalizeNonEmptyName(initialRecipeName, "Unnamed");
    resetRecipe();
    setWorkflowName(nextName);
    setLastSavedAt(initialSavedAt);
    setCopied(false);

    const parsed = importRecipePayload(JSON.stringify(initialPayload));
    if (parsed.snapshot) {
      loadRecipe(parsed.snapshot);
    } else {
      console.error("Failed to load recipe payload.", parsed.errors);
    }

    const payload = getCurrentPayloadFromStore();
    setSavedSignature(buildSignature(nextName, payload));
    setInitialRecipeReady(true);
  }, [
    getCurrentPayloadFromStore,
    initialPayload,
    initialRecipeName,
    initialSavedAt,
    loadRecipe,
    recipeId,
    resetRecipe,
  ]);

  const persistRecipe = useCallback(async (): Promise<void> => {
    if (saveLoading) {
      return;
    }
    const nextName = normalizeNonEmptyName(workflowName, "Unnamed");
    if (nextName !== workflowName) {
      setWorkflowName(nextName);
    }

    setSaveLoading(true);
    try {
      const result = await onPersistRecipe({
        id: recipeId,
        name: nextName,
        payload: currentPayload,
      });
      setLastSavedAt(result.updatedAt);
      setSavedSignature(buildSignature(nextName, currentPayload));
    } catch (error) {
      console.error("Save recipe failed:", error);
      toastError("Save failed", "Could not save recipe.");
    } finally {
      setSaveLoading(false);
    }
  }, [currentPayload, onPersistRecipe, recipeId, saveLoading, workflowName]);

  useEffect(() => {
    if (!isDirty || saveLoading) {
      return;
    }
    const timeoutId = window.setTimeout(() => {
      void persistRecipe();
    }, 800);
    return () => window.clearTimeout(timeoutId);
  }, [isDirty, persistRecipe, saveLoading]);

  const copyRecipe = useCallback(async (): Promise<void> => {
    setCopied(false);
    try {
      const safePayload = sanitizeSeedForShare(stripApiKeys(payloadResult.payload));
      const ok = await copyTextToClipboard(JSON.stringify(safePayload, null, 2));
      if (!ok) {
        throw new Error("Clipboard not available.");
      }
      setCopied(true);
      window.setTimeout(() => setCopied(false), 1500);
      toastSuccess("👨‍🍳 Recipe copied");
    } catch (error) {
      console.error("Copy failed:", error);
      toastError("Copy failed", "Could not copy payload.");
    }
  }, [payloadResult.payload]);

  const importRecipe = useCallback(
    (value: string): string | null => {
      const result = importRecipePayload(value);
      if (result.errors.length > 0 || !result.snapshot) {
        return result.errors[0] ?? "Invalid payload.";
      }
      loadRecipe(result.snapshot);
      toastSuccess("Recipe imported");
      return null;
    },
    [loadRecipe],
  );

  return {
    initialRecipeReady,
    workflowName,
    setWorkflowName,
    saveLoading,
    saveTone,
    savedAtLabel,
    copied,
    importOpen,
    setImportOpen,
    currentSignature,
    persistRecipe,
    copyRecipe,
    importRecipe,
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/hooks/use-recipe-runtime-visuals.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  BalanceScaleIcon,
  Clock01Icon,
  CodeIcon,
  CodeSimpleIcon,
  DiceFaces03Icon,
  EqualSignIcon,
  FingerPrintIcon,
  FunctionIcon,
  Plug01Icon,
  Parabola02Icon,
  PencilEdit02Icon,
  Plant01Icon,
  Shield02Icon,
  Tag01Icon,
  TagsIcon,
  UserAccountIcon,
} from "@hugeicons/core-free-icons";
import { useMemo } from "react";
import type { Edge } from "@xyflow/react";
import { deriveDisplayGraph } from "../utils/graph/derive-display-graph";
import {
  deriveGraphRuntimeVisualState,
  pickLatestActiveExecution,
} from "../utils/graph/runtime-visual-state";
import type {
  LayoutDirection,
  LlmType,
  NodeConfig,
  RecipeNode as RecipeBuilderNode,
  SamplerType,
} from "../types";
import type { RecipeExecutionRecord } from "../execution-types";

type IconType = typeof CodeIcon;

const SAMPLER_ICONS: Record<SamplerType, IconType> = {
  category: Tag01Icon,
  subcategory: TagsIcon,
  uniform: EqualSignIcon,
  gaussian: Parabola02Icon,
  bernoulli: EqualSignIcon,
  datetime: Clock01Icon,
  timedelta: Clock01Icon,
  uuid: FingerPrintIcon,
  person: UserAccountIcon,
  person_from_faker: UserAccountIcon,
};

const LLM_ICONS: Record<LlmType, IconType> = {
  text: PencilEdit02Icon,
  structured: CodeIcon,
  code: CodeSimpleIcon,
  judge: BalanceScaleIcon,
};

function resolveExecutionColumnIcon(config: NodeConfig | null): IconType {
  if (!config) {
    return DiceFaces03Icon;
  }
  if (config.kind === "sampler") {
    return SAMPLER_ICONS[config.sampler_type];
  }
  if (config.kind === "llm") {
    return LLM_ICONS[config.llm_type];
  }
  if (config.kind === "expression") {
    return FunctionIcon;
  }
  if (config.kind === "validator") {
    return Shield02Icon;
  }
  if (config.kind === "seed") {
    return Plant01Icon;
  }
  if (config.kind === "model_provider") {
    return Shield02Icon;
  }
  if (config.kind === "model_config") {
    return Plant01Icon;
  }
  if (config.kind === "tool_config") {
    return Plug01Icon;
  }
  return PencilEdit02Icon;
}

type UseRecipeRuntimeVisualsArgs = {
  executions: RecipeExecutionRecord[];
  configs: Record<string, NodeConfig>;
  nodes: RecipeBuilderNode[];
  edges: Edge[];
  layoutDirection: LayoutDirection;
  auxNodePositions: Record<string, { x: number; y: number }>;
  llmAuxVisibility: Record<string, boolean>;
};

type UseRecipeRuntimeVisualsResult = {
  activeExecution: RecipeExecutionRecord | null;
  runtimeVisualState: ReturnType<typeof deriveGraphRuntimeVisualState>;
  displayGraph: ReturnType<typeof deriveDisplayGraph>;
  displayNodeIds: string[];
  currentColumnIcon: IconType;
};

export function useRecipeRuntimeVisuals({
  executions,
  configs,
  nodes,
  edges,
  layoutDirection,
  auxNodePositions,
  llmAuxVisibility,
}: UseRecipeRuntimeVisualsArgs): UseRecipeRuntimeVisualsResult {
  const activeExecution = useMemo(
    () => pickLatestActiveExecution(executions),
    [executions],
  );

  const runtimeVisualState = useMemo(
    () =>
      deriveGraphRuntimeVisualState({
        activeExecution,
        configs,
        edges,
      }),
    [activeExecution, configs, edges],
  );

  const displayGraph = useMemo(
    () =>
      deriveDisplayGraph({
        nodes,
        edges,
        configs,
        layoutDirection,
        auxNodePositions,
        llmAuxVisibility,
        runtime: runtimeVisualState,
      }),
    [
      auxNodePositions,
      configs,
      edges,
      layoutDirection,
      llmAuxVisibility,
      nodes,
      runtimeVisualState,
    ],
  );

  const currentColumnConfig = useMemo(() => {
    const columnName = activeExecution?.current_column?.trim();
    if (!columnName) {
      return null;
    }
    for (const config of Object.values(configs)) {
      if (config.name.trim() === columnName) {
        return config;
      }
    }
    return null;
  }, [activeExecution?.current_column, configs]);

  const currentColumnIcon = useMemo(
    () => resolveExecutionColumnIcon(currentColumnConfig),
    [currentColumnConfig],
  );

  const displayNodeIds = useMemo(
    () => displayGraph.nodes.map((node) => node.id),
    [displayGraph.nodes],
  );

  return {
    activeExecution,
    runtimeVisualState,
    displayGraph,
    displayNodeIds,
    currentColumnIcon,
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/hooks/use-recipe-studio-actions.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useRecipeExecutions } from "./use-recipe-executions";
import { useRecipePersistence } from "./use-recipe-persistence";
import type {
  RecipeExecutionKind,
  RecipeExecutionRecord,
} from "../execution-types";
import type { RecipeRunSettings } from "../stores/recipe-executions";
import type { RecipeSnapshot } from "../utils/import";
import type { RecipePayload, RecipePayloadResult } from "../utils/payload/types";

type SaveTone = "success" | "error";

type PersistRecipeFn = (input: {
  id: string | null;
  name: string;
  payload: RecipePayload;
}) => Promise<{
  id: string;
  updatedAt: number;
}>;

type UseRecipeStudioActionsParams = {
  recipeId: string;
  initialRecipeName: string;
  initialPayload: RecipePayload;
  initialSavedAt: number;
  payloadResult: RecipePayloadResult;
  onPersistRecipe: PersistRecipeFn;
  resetRecipe: () => void;
  loadRecipe: (snapshot: RecipeSnapshot) => void;
  getCurrentPayloadFromStore: () => RecipePayload;
  onExecutionStart?: () => void;
  onPreviewSuccess?: () => void;
};

type UseRecipeStudioActionsResult = {
  initialRecipeReady: boolean;
  workflowName: string;
  setWorkflowName: (value: string) => void;
  saveLoading: boolean;
  saveTone: SaveTone;
  savedAtLabel: string;
  copied: boolean;
  importOpen: boolean;
  setImportOpen: (open: boolean) => void;
  runDialogOpen: boolean;
  runDialogKind: RecipeExecutionKind;
  setRunDialogKind: (kind: RecipeExecutionKind) => void;
  setRunDialogOpen: (open: boolean) => void;
  previewRows: number;
  fullRows: number;
  fullRunName: string;
  setPreviewRows: (rows: number) => void;
  setFullRows: (rows: number) => void;
  setFullRunName: (name: string) => void;
  runErrors: string[];
  runSettings: RecipeRunSettings;
  setRunSettings: (patch: Partial<RecipeRunSettings>) => void;
  previewLoading: boolean;
  fullLoading: boolean;
  currentSignature: string;
  executions: RecipeExecutionRecord[];
  selectedExecutionId: string | null;
  setSelectedExecutionId: (id: string) => void;
  persistRecipe: () => Promise<void>;
  openRunDialog: (kind: RecipeExecutionKind) => void;
  runFromDialog: () => Promise<boolean>;
  validateFromDialog: () => Promise<boolean>;
  validateLoading: boolean;
  validateResult: {
    valid: boolean;
    errors: string[];
    rawDetail: string | null;
  } | null;
  runPreview: () => Promise<boolean>;
  runFull: () => Promise<boolean>;
  cancelExecution: (id: string) => Promise<void>;
  loadExecutionDatasetPage: (id: string, page: number) => Promise<void>;
  copyRecipe: () => Promise<void>;
  importRecipe: (value: string) => string | null;
};

export function useRecipeStudioActions({
  recipeId,
  initialRecipeName,
  initialPayload,
  initialSavedAt,
  payloadResult,
  onPersistRecipe,
  resetRecipe,
  loadRecipe,
  getCurrentPayloadFromStore,
  onExecutionStart,
  onPreviewSuccess,
}: UseRecipeStudioActionsParams): UseRecipeStudioActionsResult {
  const persistence = useRecipePersistence({
    recipeId,
    initialRecipeName,
    initialPayload,
    initialSavedAt,
    payloadResult,
    onPersistRecipe,
    resetRecipe,
    loadRecipe,
    getCurrentPayloadFromStore,
  });

  const executions = useRecipeExecutions({
    recipeId,
    currentSignature: persistence.currentSignature,
    payloadResult,
    onExecutionStart,
    onPreviewSuccess,
  });

  return {
    initialRecipeReady: persistence.initialRecipeReady,
    workflowName: persistence.workflowName,
    setWorkflowName: persistence.setWorkflowName,
    saveLoading: persistence.saveLoading,
    saveTone: persistence.saveTone,
    savedAtLabel: persistence.savedAtLabel,
    copied: persistence.copied,
    importOpen: persistence.importOpen,
    setImportOpen: persistence.setImportOpen,
    runDialogOpen: executions.runDialogOpen,
    runDialogKind: executions.runDialogKind,
    setRunDialogKind: executions.setRunDialogKind,
    setRunDialogOpen: executions.setRunDialogOpen,
    previewRows: executions.previewRows,
    fullRows: executions.fullRows,
    fullRunName: executions.fullRunName,
    setPreviewRows: executions.setPreviewRows,
    setFullRows: executions.setFullRows,
    setFullRunName: executions.setFullRunName,
    runErrors: executions.runErrors,
    runSettings: executions.runSettings,
    setRunSettings: executions.setRunSettings,
    previewLoading: executions.previewLoading,
    fullLoading: executions.fullLoading,
    currentSignature: persistence.currentSignature,
    executions: executions.executions,
    selectedExecutionId: executions.selectedExecutionId,
    setSelectedExecutionId: executions.setSelectedExecutionId,
    persistRecipe: persistence.persistRecipe,
    openRunDialog: executions.openRunDialog,
    runFromDialog: executions.runFromDialog,
    validateFromDialog: executions.validateFromDialog,
    validateLoading: executions.validateLoading,
    validateResult: executions.validateResult,
    runPreview: executions.runPreview,
    runFull: executions.runFull,
    cancelExecution: executions.cancelExecution,
    loadExecutionDatasetPage: executions.loadExecutionDatasetPage,
    copyRecipe: persistence.copyRecipe,
    importRecipe: persistence.importRecipe,
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export { RecipeStudioPage } from "./recipe-studio-page";
export type {
  PersistRecipeInput,
  PersistRecipeResult,
  RecipeStudioPageProps,
} from "./recipe-studio-page";
export type { RecipePayload } from "./utils/payload/types";
export { createEmptyRecipePayload } from "./utils/payload/empty";


================================================
FILE: studio/frontend/src/features/recipe-studio/recipe-studio-page.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  DocumentAttachmentIcon,
  PlusSignIcon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import {
  Background,
  BackgroundVariant,
  type Edge,
  type EdgeTypes,
  type Node,
  type NodeTypes,
  Panel,
  ReactFlow,
  type ReactFlowInstance,
} from "@xyflow/react";
import {
  type ReactElement,
  useCallback,
  useEffect,
  useMemo,
  useRef,
  useState,
} from "react";
import { useShallow } from "zustand/react/shallow";
import "@xyflow/react/dist/style.css";
import { Button } from "@/components/ui/button";
import { BlockSheet } from "./components/block-sheet";
import { LayoutControls } from "./components/controls/layout-controls";
import { RunValidateFloatingControls } from "./components/controls/run-validate-floating-controls";
import { ViewportControls } from "./components/controls/viewport-controls";
import { ExecutionsView } from "./components/executions/executions-view";
import { InternalsSync } from "./components/graph/internals-sync";
import {
  RecipeGraphAuxNode,
  type RecipeGraphAuxNodeData,
} from "./components/recipe-graph-aux-node";
import { RecipeNode } from "./components/recipe-graph-node";
import { RecipeGraphSemanticEdge } from "./components/recipe-graph-semantic-edge";
import { RecipeStudioHeader } from "./components/recipe-studio-header";
import { DataEdge } from "./components/rf-ui/data-edge";
import { ExecutionProgressIsland } from "./components/runtime/execution-progress-island";
import { ConfigDialog } from "./dialogs/config-dialog";
import { ImportDialog } from "./dialogs/import-dialog";
import { RunDialog } from "./dialogs/preview-dialog";
import { ProcessorsDialog } from "./dialogs/processors-dialog";
import type {
  RecipeExecutionRecord,
  RecipeStudioView,
} from "./execution-types";
import { isExecutionInProgress } from "./executions/execution-helpers";
import { useRecipeEditorGraph } from "./hooks/use-recipe-editor-graph";
import { useRecipeRuntimeVisuals } from "./hooks/use-recipe-runtime-visuals";
import { useRecipeStudioActions } from "./hooks/use-recipe-studio-actions";
import { useRecipeStudioStore } from "./stores/recipe-studio";
import type { RecipeNodeData } from "./types";
import { getGraphWarnings } from "./utils/graph-warnings";
import { getFitNodeIdsIgnoringNotes } from "./utils/graph/fit-view";
import { buildRecipePayload } from "./utils/payload";
import type { RecipePayload } from "./utils/payload/types";
import { buildDefaultSchemaTransform } from "./utils/processors";
import { buildDialogOptions } from "./utils/recipe-studio-view";

const NODE_TYPES: NodeTypes = { builder: RecipeNode, aux: RecipeGraphAuxNode };
const EDGE_TYPES: EdgeTypes = {
  canvas: DataEdge,
  semantic: RecipeGraphSemanticEdge,
};
const COMPLETE_ISLAND_VISIBLE_MS = 7_000;
const TAB_SWITCH_FIT_DELAY_MS = 110;
const FIT_ANIMATION_MS = 340;

export type PersistRecipeInput = {
  id: string | null;
  name: string;
  payload: RecipePayload;
};

export type PersistRecipeResult = {
  id: string;
  updatedAt: number;
};

export type RecipeStudioPageProps = {
  recipeId: string;
  initialRecipeName: string;
  initialPayload: RecipePayload;
  initialSavedAt: number;
  onPersistRecipe: (input: PersistRecipeInput) => Promise<PersistRecipeResult>;
};

export function RecipeStudioPage({
  recipeId,
  initialRecipeName,
  initialPayload,
  initialSavedAt,
  onPersistRecipe,
}: RecipeStudioPageProps): ReactElement {
  const {
    nodes,
    edges,
    auxNodePositions,
    llmAuxVisibility,
    configs,
    processors,
    sheetOpen,
    sheetView,
    activeConfigId,
    dialogOpen,
    layoutDirection,
    fitViewTick,
    onNodesChange,
    onEdgesChange,
    onConnect,
    addSamplerNode,
    addSeedNode,
    addLlmNode,
    addModelProviderNode,
    addModelConfigNode,
    addToolProfileNode,
    addExpressionNode,
    addValidatorNode,
    addMarkdownNoteNode,
    selectConfig,
    openConfig,
    updateConfig,
    isValidConnection,
    setSheetOpen,
    setSheetView,
    setProcessors,
    setDialogOpen,
    resetRecipe,
    loadRecipe,
    setLayoutDirection,
    applyLayout,
    setAuxNodePosition,
    setExecutionLocked,
  } = useRecipeStudioStore(
    useShallow((state) => ({
      nodes: state.nodes,
      edges: state.edges,
      auxNodePositions: state.auxNodePositions,
      llmAuxVisibility: state.llmAuxVisibility,
      configs: state.configs,
      processors: state.processors,
      sheetOpen: state.sheetOpen,
      sheetView: state.sheetView,
      activeConfigId: state.activeConfigId,
      dialogOpen: state.dialogOpen,
      layoutDirection: state.layoutDirection,
      fitViewTick: state.fitViewTick,
      onNodesChange: state.onNodesChange,
      onEdgesChange: state.onEdgesChange,
      onConnect: state.onConnect,
      addSamplerNode: state.addSamplerNode,
      addSeedNode: state.addSeedNode,
      addLlmNode: state.addLlmNode,
      addModelProviderNode: state.addModelProviderNode,
      addModelConfigNode: state.addModelConfigNode,
      addToolProfileNode: state.addToolProfileNode,
      addExpressionNode: state.addExpressionNode,
      addValidatorNode: state.addValidatorNode,
      addMarkdownNoteNode: state.addMarkdownNoteNode,
      selectConfig: state.selectConfig,
      openConfig: state.openConfig,
      updateConfig: state.updateConfig,
      isValidConnection: state.isValidConnection,
      setSheetOpen: state.setSheetOpen,
      setSheetView: state.setSheetView,
      setProcessors: state.setProcessors,
      setDialogOpen: state.setDialogOpen,
      resetRecipe: state.resetRecipe,
      loadRecipe: state.loadRecipe,
      setLayoutDirection: state.setLayoutDirection,
      applyLayout: state.applyLayout,
      setAuxNodePosition: state.setAuxNodePosition,
      setExecutionLocked: state.setExecutionLocked,
    })),
  );
  const [sheetContainer, setSheetContainer] = useState<HTMLDivElement | null>(
    null,
  );
  const flowContainerRef = useRef<HTMLDivElement | null>(null);
  const [activeView, setActiveView] = useState<RecipeStudioView>("editor");
  const [processorsOpen, setProcessorsOpen] = useState(false);
  const [interactive, setInteractive] = useState(true);
  const [runtimeIslandMinimized, setRuntimeIslandMinimized] = useState(false);
  const [recentCompletedExecution, setRecentCompletedExecution] =
    useState<RecipeExecutionRecord | null>(null);
  const [reactFlowInstance, setReactFlowInstance] = useState<ReactFlowInstance<
    Node<RecipeNodeData | RecipeGraphAuxNodeData>,
    Edge
  > | null>(null);
  const lastProcessedFitTickRef = useRef(0);
  const previousActiveViewRef = useRef<RecipeStudioView>("editor");
  const previousActiveExecutionIdRef = useRef<string | null>(null);
  const pendingEditorTabFitRef = useRef(false);
  const forceEditorTabFitRef = useRef(false);
  const viewportMovedSinceAutoFitRef = useRef(true);
  const {
    handleNodeClick,
    handleNodeDoubleClick,
    handleNodesChange,
    handleEdgesChange,
    handleDragOver,
    handleDrop,
    handleAddSamplerFromSheet,
    handleAddSeedFromSheet,
    handleAddLlmFromSheet,
    handleAddModelProviderFromSheet,
    handleAddModelConfigFromSheet,
    handleAddToolProfileFromSheet,
    handleAddExpressionFromSheet,
    handleAddValidatorFromSheet,
    handleAddMarkdownNoteFromSheet,
  } = useRecipeEditorGraph({
    nodes,
    edges,
    configs,
    reactFlowInstance,
    flowContainerRef,
    selectConfig,
    openConfig,
    onNodesChange,
    onEdgesChange,
    setAuxNodePosition,
    addSamplerNode,
    addSeedNode,
    addLlmNode,
    addModelProviderNode,
    addModelConfigNode,
    addToolProfileNode,
    addExpressionNode,
    addValidatorNode,
    addMarkdownNoteNode,
  });

  const configList = useMemo(() => Object.values(configs), [configs]);
  const config = activeConfigId ? configs[activeConfigId] : null;
  const dialogOptions = useMemo(
    () => buildDialogOptions(configList),
    [configList],
  );

  const handleToggleDirection = useCallback(() => {
    setLayoutDirection(layoutDirection === "LR" ? "TB" : "LR");
  }, [layoutDirection, setLayoutDirection]);

  const payloadResult = useMemo(
    () =>
      buildRecipePayload(
        configs,
        nodes,
        edges,
        processors,
        layoutDirection,
        auxNodePositions,
      ),
    [auxNodePositions, configs, edges, layoutDirection, nodes, processors],
  );
  const getCurrentPayloadFromStore = useCallback((): RecipePayload => {
    const state = useRecipeStudioStore.getState();
    return buildRecipePayload(
      state.configs,
      state.nodes,
      state.edges,
      state.processors,
      state.layoutDirection,
      state.auxNodePositions,
    ).payload;
  }, []);
  const {
    initialRecipeReady,
    workflowName,
    setWorkflowName,
    saveLoading,
    saveTone,
    savedAtLabel,
    copied,
    importOpen,
    setImportOpen,
    runDialogOpen,
    runDialogKind,
    setRunDialogKind,
    setRunDialogOpen,
    previewRows,
    fullRows,
    fullRunName,
    setPreviewRows,
    setFullRows,
    setFullRunName,
    runErrors,
    runSettings,
    setRunSettings,
    previewLoading,
    fullLoading,
    currentSignature,
    executions,
    selectedExecutionId,
    setSelectedExecutionId,
    persistRecipe,
    openRunDialog,
    runFromDialog,
    validateFromDialog,
    validateLoading,
    validateResult,
    cancelExecution,
    loadExecutionDatasetPage,
    copyRecipe,
    importRecipe,
  } = useRecipeStudioActions({
    recipeId,
    initialRecipeName,
    initialPayload,
    initialSavedAt,
    payloadResult,
    onPersistRecipe,
    resetRecipe,
    loadRecipe,
    getCurrentPayloadFromStore,
  });
  const {
    activeExecution,
    runtimeVisualState,
    displayGraph,
    displayNodeIds,
    currentColumnIcon,
  } = useRecipeRuntimeVisuals({
    executions,
    configs,
    nodes,
    edges,
    layoutDirection,
    auxNodePositions,
    llmAuxVisibility,
  });
  const executionLocked = runtimeVisualState.executionLocked;
  const canvasInteractive = interactive && !executionLocked;
  const runBusy = previewLoading || fullLoading || executionLocked;
  const islandExecution = activeExecution ?? recentCompletedExecution;

  const toggleInteractive = useCallback(() => {
    if (executionLocked) {
      return;
    }
    setInteractive((value) => !value);
  }, [executionLocked]);

  useEffect(() => {
    setExecutionLocked(executionLocked);
  }, [executionLocked, setExecutionLocked]);

  useEffect(() => {
    const activeExecutionId = activeExecution?.id ?? null;
    if (
      activeExecutionId &&
      activeExecutionId !== previousActiveExecutionIdRef.current
    ) {
      setRuntimeIslandMinimized(false);
    }
    previousActiveExecutionIdRef.current = activeExecutionId;
  }, [activeExecution?.id]);

  useEffect(() => {
    if (activeExecution) {
      setRecentCompletedExecution(null);
      return;
    }
    const latestCompleted = executions.find(
      (execution) =>
        execution.status === "completed" &&
        typeof execution.finishedAt === "number",
    );
    if (!latestCompleted || typeof latestCompleted.finishedAt !== "number") {
      setRecentCompletedExecution(null);
      return;
    }
    const elapsedMs = Date.now() - latestCompleted.finishedAt;
    if (elapsedMs >= COMPLETE_ISLAND_VISIBLE_MS) {
      setRecentCompletedExecution(null);
      return;
    }
    setRecentCompletedExecution(latestCompleted);
    const hideTimer = window.setTimeout(() => {
      setRecentCompletedExecution(null);
      setActiveView((currentView) =>
        currentView === "editor" ? "executions" : currentView,
      );
    }, COMPLETE_ISLAND_VISIBLE_MS - elapsedMs);
    return () => {
      window.clearTimeout(hideTimer);
    };
  }, [activeExecution, executions]);

  const openProcessorsFromSheet = useCallback(() => {
    if (
      !processors.some(
        (processor) => processor.processor_type === "schema_transform",
      )
    ) {
      setProcessors([...processors, buildDefaultSchemaTransform()]);
    }
    setProcessorsOpen(true);
  }, [processors, setProcessors]);

  const openRootBlockSheet = useCallback(() => {
    setSheetView("root");
    setSheetOpen(true);
  }, [setSheetOpen, setSheetView]);
  const openSourceBlockSheet = useCallback(() => {
    setSheetView("seed");
    setSheetOpen(true);
  }, [setSheetOpen, setSheetView]);
  const runDialogRows = runDialogKind === "preview" ? previewRows : fullRows;
  const runDialogLoading =
    runDialogKind === "preview" ? previewLoading : fullLoading;

  const scheduleFitView = useCallback(
    ({ delayMs = 0 }: { delayMs?: number } = {}) => {
      if (!reactFlowInstance) {
        return () => {};
      }

      let timeoutId = 0;
      let frameId = 0;
      let retryFrameId = 0;

      const fitWithCurrentNodes = () => {
        const targetNodes = getFitNodeIdsIgnoringNotes(
          reactFlowInstance.getNodes(),
        );
        if (targetNodes.length === 0) {
          return false;
        }
        viewportMovedSinceAutoFitRef.current = false;
        reactFlowInstance.fitView({
          duration: FIT_ANIMATION_MS,
          nodes: targetNodes,
        });
        return true;
      };

      const runFit = () => {
        if (fitWithCurrentNodes()) {
          return;
        }

        retryFrameId = window.requestAnimationFrame(() => {
          fitWithCurrentNodes();
        });
      };

      const start = () => {
        frameId = window.requestAnimationFrame(runFit);
      };

      if (delayMs > 0) {
        timeoutId = window.setTimeout(start, delayMs);
      } else {
        start();
      }

      return () => {
        if (timeoutId) {
          window.clearTimeout(timeoutId);
        }
        if (frameId) {
          window.cancelAnimationFrame(frameId);
        }
        if (retryFrameId) {
          window.cancelAnimationFrame(retryFrameId);
        }
      };
    },
    [reactFlowInstance],
  );

  useEffect(() => {
    if (
      previousActiveViewRef.current !== activeView &&
      activeView === "editor"
    ) {
      pendingEditorTabFitRef.current = true;
      forceEditorTabFitRef.current =
        previousActiveViewRef.current === "executions";
    }
    previousActiveViewRef.current = activeView;
  }, [activeView]);

  useEffect(() => {
    if (activeView !== "editor" && reactFlowInstance) {
      setReactFlowInstance(null);
    }
  }, [activeView, reactFlowInstance]);

  useEffect(() => {
    if (
      !reactFlowInstance ||
      activeView !== "editor" ||
      !pendingEditorTabFitRef.current
    ) {
      return;
    }
    pendingEditorTabFitRef.current = false;
    const forceFit = forceEditorTabFitRef.current;
    forceEditorTabFitRef.current = false;
    if (!(forceFit || viewportMovedSinceAutoFitRef.current)) {
      return;
    }
    return scheduleFitView({ delayMs: TAB_SWITCH_FIT_DELAY_MS });
  }, [activeView, reactFlowInstance, scheduleFitView]);

  useEffect(() => {
    if (!reactFlowInstance || fitViewTick === 0 || activeView !== "editor") {
      return;
    }
    if (lastProcessedFitTickRef.current === fitViewTick) {
      return;
    }
    lastProcessedFitTickRef.current = fitViewTick;
    return scheduleFitView();
  }, [activeView, fitViewTick, reactFlowInstance, scheduleFitView]);

  let editorContent: ReactElement;
  if (initialRecipeReady) {
    editorContent = (
      <ReactFlow<Node<RecipeNodeData | RecipeGraphAuxNodeData>, Edge>
        onInit={setReactFlowInstance}
        onDragOver={handleDragOver}
        onDrop={handleDrop}
        nodes={displayGraph.nodes}
        edges={displayGraph.edges}
        proOptions={{ hideAttribution: true }}
        nodeTypes={NODE_TYPES}
        edgeTypes={EDGE_TYPES}
        defaultEdgeOptions={{
          type: "canvas",
          data: { path: "smoothstep" },
        }}
        onNodesChange={handleNodesChange}
        onEdgesChange={handleEdgesChange}
        onConnect={onConnect}
        onNodeClick={handleNodeClick}
        onNodeDoubleClick={handleNodeDoubleClick}
        isValidConnection={isValidConnection}
        onMoveEnd={(event) => {
          if (event) {
            viewportMovedSinceAutoFitRef.current = true;
          }
        }}
        nodesDraggable={canvasInteractive}
        nodesConnectable={canvasInteractive}
        elementsSelectable={canvasInteractive}
        fitView={false}
        className="h-full w-full rounded-t-none"
      >
        <LayoutControls
          direction={layoutDirection}
          onLayout={applyLayout}
          onToggleDirection={handleToggleDirection}
        />
        <InternalsSync nodeIds={displayNodeIds} />
        <Background
          variant={BackgroundVariant.Dots}
          gap={18}
          size={1}
          color="#d4d4d8"
        />
        {nodes.length === 0 && (
          <div className="pointer-events-none absolute inset-0 z-10 flex items-center justify-center p-4">
            <div className="pointer-events-auto w-full max-w-md rounded-2xl border border-dashed border-border/70 bg-background/80 px-6 py-6 text-center shadow-border backdrop-blur-[1px]">
              <div className="mx-auto flex size-12 items-center justify-center corner-squircle rounded-xl border border-border/70 bg-muted/40">
                <HugeiconsIcon
                  icon={DocumentAttachmentIcon}
                  className="size-6 text-muted-foreground"
                />
              </div>
              <div className="mt-4 space-y-2">
                <p className="text-[11px] font-semibold uppercase tracking-wide text-primary">
                  Best place to start
                </p>
                <p className="text-sm font-semibold text-foreground">
                  Start with source data
                </p>
                <p className="text-xs text-muted-foreground">
                  Most synthetic-data recipes begin with a document, dataset, or
                  file before adding generation and checks.
                </p>
              </div>
              <div className="mt-5 flex flex-col justify-center gap-2 sm:flex-row">
                <Button
                  type="button"
                  className="corner-squircle"
                  onClick={openSourceBlockSheet}
                >
                  <HugeiconsIcon
                    icon={DocumentAttachmentIcon}
                    className="size-4"
                  />
                  Start with source data
                </Button>
                <Button
                  type="button"
                  variant="outline"
                  className="corner-squircle"
                  onClick={openRootBlockSheet}
                >
                  <HugeiconsIcon icon={PlusSignIcon} className="size-4" />
                  Browse all steps
                </Button>
              </div>
            </div>
          </div>
        )}
        <Panel position="top-right" className="m-3">
          <BlockSheet
            container={sheetContainer}
            sheetView={sheetView}
            onViewChange={setSheetView}
            open={sheetOpen}
            onOpenChange={setSheetOpen}
            onAddSampler={handleAddSamplerFromSheet}
            onAddSeed={handleAddSeedFromSheet}
            onAddLlm={handleAddLlmFromSheet}
            onAddModelProvider={handleAddModelProviderFromSheet}
            onAddModelConfig={handleAddModelConfigFromSheet}
            onAddToolProfile={handleAddToolProfileFromSheet}
            onAddExpression={handleAddExpressionFromSheet}
            onAddValidator={handleAddValidatorFromSheet}
            onAddMarkdownNote={handleAddMarkdownNoteFromSheet}
            onOpenProcessors={openProcessorsFromSheet}
            copied={copied}
            onCopy={copyRecipe}
            onImport={() => setImportOpen(true)}
          />
        </Panel>
        <ViewportControls
          interactive={canvasInteractive}
          lockDisabled={executionLocked}
          onToggleInteractive={toggleInteractive}
        />
        {islandExecution &&
          (isExecutionInProgress(islandExecution.status) ||
            islandExecution.status === "completed") && (
            <Panel position="top-center" className="!m-0">
              <ExecutionProgressIsland
                execution={islandExecution}
                currentColumnIcon={currentColumnIcon}
                minimized={runtimeIslandMinimized}
                onMinimizedChange={setRuntimeIslandMinimized}
                onViewExecutions={() => setActiveView("executions")}
              />
            </Panel>
          )}
        <RunValidateFloatingControls
          runBusy={runBusy}
          runDialogKind={runDialogKind}
          validateLoading={validateLoading}
          executionLocked={executionLocked}
          onOpenRunDialog={openRunDialog}
          onValidate={() => {
            openRunDialog(runDialogKind);
            void validateFromDialog();
          }}
        />
      </ReactFlow>
    );
  } else {
    editorContent = (
      <div className="flex h-full items-center justify-center px-6">
        <div className="rounded-2xl border border-border/70 bg-background/80 px-5 py-4 text-center shadow-border backdrop-blur-[1px]">
          <p className="text-sm font-medium text-foreground">Loading recipe</p>
          <p className="mt-1 text-xs text-muted-foreground">
            Restoring the studio graph and saved settings.
          </p>
        </div>
      </div>
    );
  }

  return (
    <div className="min-h-screen bg-background">
      <main className="w-full px-6 py-8">
        <div
          className="relative w-full overflow-hidden rounded-2xl corner-squircle border"
          ref={setSheetContainer}
        >
          <RecipeStudioHeader
            activeView={activeView}
            saveLoading={saveLoading}
            saveTone={saveTone}
            savedAtLabel={savedAtLabel}
            workflowName={workflowName}
            warnings={getGraphWarnings(configs, edges)}
            onWorkflowNameChange={setWorkflowName}
            onViewChange={setActiveView}
            onSaveRecipe={() => {
              void persistRecipe();
            }}
          />
          <div
            className="h-[75vh] w-full rounded-t-none"
            ref={flowContainerRef}
          >
            {activeView === "editor" ? (
              editorContent
            ) : (
              <ExecutionsView
                executions={executions}
                selectedExecutionId={selectedExecutionId}
                currentSignature={currentSignature}
                onSelectExecution={setSelectedExecutionId}
                onCancelExecution={(executionId) => {
                  void cancelExecution(executionId);
                }}
                onLoadDatasetPage={(executionId, page) => {
                  void loadExecutionDatasetPage(executionId, page);
                }}
              />
            )}
          </div>
        </div>
      </main>
      <ConfigDialog
        open={dialogOpen}
        onOpenChange={setDialogOpen}
        config={config}
        readOnly={executionLocked}
        categoryOptions={dialogOptions.categoryOptions}
        modelConfigAliases={dialogOptions.modelConfigAliases}
        modelProviderOptions={dialogOptions.modelProviderOptions}
        toolProfileAliases={dialogOptions.toolProfileAliases}
        datetimeOptions={dialogOptions.datetimeOptions}
        onUpdate={updateConfig}
        container={sheetContainer}
      />
      <ImportDialog
        open={importOpen}
        onOpenChange={setImportOpen}
        onImport={importRecipe}
        container={sheetContainer}
      />
      <ProcessorsDialog
        open={processorsOpen}
        onOpenChange={setProcessorsOpen}
        processors={processors}
        onProcessorsChange={setProcessors}
        container={sheetContainer}
      />
      <RunDialog
        open={runDialogOpen}
        onOpenChange={setRunDialogOpen}
        kind={runDialogKind}
        onKindChange={setRunDialogKind}
        rows={runDialogRows}
        fullRunName={fullRunName}
        onFullRunNameChange={setFullRunName}
        onRowsChange={(rows) => {
          if (runDialogKind === "preview") {
            setPreviewRows(rows);
            return;
          }
          setFullRows(rows);
        }}
        settings={runSettings}
        onSettingsChange={setRunSettings}
        loading={runDialogLoading}
        validateLoading={validateLoading}
        validateResult={validateResult}
        errors={runErrors}
        onValidate={() => {
          void validateFromDialog();
        }}
        onRun={() => {
          void runFromDialog();
        }}
        container={sheetContainer}
      />
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/stores/helpers/edge-sync.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { type Edge, addEdge } from "@xyflow/react";
import type {
  LayoutDirection,
  ModelConfig,
  NodeConfig,
  SamplerConfig,
  ValidatorConfig,
} from "../../types";
import { applyRecipeConnection } from "../../utils/graph";
import { isCategoryConfig, isSubcategoryConfig } from "../../utils";
import { HANDLE_IDS } from "../../utils/handles";

function findNodeIdByName(
  configs: Record<string, NodeConfig>,
  name: string,
): string | null {
  const entry = Object.entries(configs).find(
    ([, config]) => config.name === name,
  );
  return entry ? entry[0] : null;
}

function addRecipeEdge(edges: Edge[], source: string, target: string): Edge[] {
  return addEdge(
    {
      source,
      target,
      sourceHandle: HANDLE_IDS.dataOut,
      targetHandle: HANDLE_IDS.dataIn,
      type: "canvas",
    },
    edges,
  );
}

function addValidatorSemanticEdge(
  edges: Edge[],
  source: string,
  target: string,
): Edge[] {
  return addEdge(
    {
      source,
      target,
      sourceHandle: HANDLE_IDS.dataOut,
      targetHandle: HANDLE_IDS.dataIn,
      type: "semantic",
    },
    edges,
  );
}

function removeTargetEdges(edges: Edge[], targetId: string): Edge[] {
  return edges.filter((edge) => edge.target !== targetId);
}

function removeTargetEdgesBySource(
  edges: Edge[],
  configs: Record<string, NodeConfig>,
  targetId: string,
  shouldRemove: (source: NodeConfig | undefined) => boolean,
): Edge[] {
  return edges.filter((edge) => {
    if (edge.target !== targetId) {
      return true;
    }
    return !shouldRemove(configs[edge.source]);
  });
}

export function syncEdgesForConfigPatch(
  current: NodeConfig,
  patch: Partial<NodeConfig>,
  configs: Record<string, NodeConfig>,
  edges: Edge[],
  layoutDirection: LayoutDirection,
): Edge[] {
  let nextEdges = edges;

  const hasParentPatch = Object.prototype.hasOwnProperty.call(
    patch,
    "subcategory_parent",
  );
  if (isSubcategoryConfig(current) && hasParentPatch) {
    const nextParent = (patch as Partial<SamplerConfig>).subcategory_parent ?? "";
    const parentId = nextParent ? findNodeIdByName(configs, nextParent) : null;
    nextEdges = removeTargetEdges(nextEdges, current.id);
    if (parentId) {
      nextEdges = addRecipeEdge(nextEdges, parentId, current.id);
    }
  }

  const hasProviderPatch = Object.prototype.hasOwnProperty.call(
    patch,
    "provider",
  );
  if (current.kind === "model_config" && hasProviderPatch) {
    const nextProvider = (patch as Partial<ModelConfig>).provider ?? "";
    if (nextProvider.trim() === current.provider.trim()) {
      return nextEdges;
    }
    nextEdges = removeTargetEdgesBySource(
      nextEdges,
      configs,
      current.id,
      (source) => Boolean(source && source.kind === "model_provider"),
    );
    if (nextProvider) {
      const providerId = findNodeIdByName(configs, nextProvider);
      if (providerId) {
        const result = applyRecipeConnection(
          {
            source: providerId,
            sourceHandle: HANDLE_IDS.semanticOut,
            target: current.id,
            targetHandle: HANDLE_IDS.semanticIn,
          },
          configs,
          nextEdges,
          layoutDirection,
        );
        nextEdges = result.edges;
      }
    }
  }

  const hasReferencePatch = Object.prototype.hasOwnProperty.call(
    patch,
    "reference_column_name",
  );
  if (
    current.kind === "sampler" &&
    current.sampler_type === "timedelta" &&
    hasReferencePatch
  ) {
    const nextReference =
      (patch as Partial<SamplerConfig>).reference_column_name ?? "";
    nextEdges = removeTargetEdgesBySource(
      nextEdges,
      configs,
      current.id,
      (source) =>
        Boolean(
          source &&
            source.kind === "sampler" &&
            source.sampler_type === "datetime",
        ),
    );
    if (nextReference) {
      const referenceId = findNodeIdByName(configs, nextReference);
      const source = referenceId ? configs[referenceId] : null;
      if (
        referenceId &&
        source &&
        source.kind === "sampler" &&
        source.sampler_type === "datetime"
      ) {
        nextEdges = addRecipeEdge(nextEdges, referenceId, current.id);
      }
    }
  }

  const hasModelAliasPatch = Object.prototype.hasOwnProperty.call(
    patch,
    "model_alias",
  );
  if (current.kind === "llm" && hasModelAliasPatch) {
    const nextAlias =
      (patch as Partial<NodeConfig> & { model_alias?: string }).model_alias ?? "";
    if (nextAlias.trim() === current.model_alias.trim()) {
      return nextEdges;
    }
    nextEdges = removeTargetEdgesBySource(
      nextEdges,
      configs,
      current.id,
      (source) => Boolean(source && source.kind === "model_config"),
    );
    if (nextAlias) {
      const modelConfigId = findNodeIdByName(configs, nextAlias);
      if (modelConfigId) {
        const result = applyRecipeConnection(
          {
            source: modelConfigId,
            sourceHandle: HANDLE_IDS.semanticOut,
            target: current.id,
            targetHandle: HANDLE_IDS.semanticIn,
          },
          configs,
          nextEdges,
          layoutDirection,
        );
        nextEdges = result.edges;
      }
    }
  }

  const hasToolAliasPatch = Object.prototype.hasOwnProperty.call(
    patch,
    "tool_alias",
  );
  if (current.kind === "llm" && hasToolAliasPatch) {
    const nextAlias =
      (patch as Partial<NodeConfig> & { tool_alias?: string }).tool_alias ?? "";
    if (nextAlias.trim() === (current.tool_alias ?? "").trim()) {
      return nextEdges;
    }
    nextEdges = removeTargetEdgesBySource(
      nextEdges,
      configs,
      current.id,
      (source) => Boolean(source && source.kind === "tool_config"),
    );
    if (nextAlias) {
      const toolConfigId = findNodeIdByName(configs, nextAlias);
      if (toolConfigId) {
        const result = applyRecipeConnection(
          {
            source: toolConfigId,
            sourceHandle: HANDLE_IDS.semanticOut,
            target: current.id,
            targetHandle: HANDLE_IDS.semanticIn,
          },
          configs,
          nextEdges,
          layoutDirection,
        );
        nextEdges = result.edges;
      }
    }
  }

  const hasValidatorTargetsPatch = Object.prototype.hasOwnProperty.call(
    patch,
    "target_columns",
  );
  if (current.kind === "validator" && hasValidatorTargetsPatch) {
    const nextTargets =
      ((patch as Partial<ValidatorConfig>).target_columns ?? [])
        .map((value) => value.trim())
        .filter(Boolean);
    nextEdges = nextEdges.filter((edge) => {
      if (edge.source !== current.id && edge.target !== current.id) {
        return true;
      }
      const otherId = edge.source === current.id ? edge.target : edge.source;
      const other = configs[otherId];
      return !(
        other &&
        other.kind === "llm" &&
        other.llm_type === "code"
      );
    });
    const nextTargetName = nextTargets[0];
    if (nextTargetName) {
      const targetId = findNodeIdByName(configs, nextTargetName);
      const target = targetId ? configs[targetId] : null;
      if (
        targetId &&
        target &&
        target.kind === "llm" &&
        target.llm_type === "code"
      ) {
        nextEdges = addValidatorSemanticEdge(nextEdges, targetId, current.id);
      }
    }
  }

  return nextEdges;
}

export function syncSubcategoryConfigsForCategoryUpdate(
  current: NodeConfig,
  next: NodeConfig,
  configs: Record<string, NodeConfig>,
  oldName: string,
  newName: string,
  nameChanged: boolean,
): Record<string, NodeConfig> {
  if (!isCategoryConfig(current)) {
    return configs;
  }
  const nextCategory = isCategoryConfig(next) ? next : current;
  const oldValues = current.values ?? [];
  const newValues = nextCategory.values ?? [];
  const valuesChanged =
    oldValues.length !== newValues.length ||
    oldValues.some((value, index) => value !== newValues[index]);

  let nextConfigs = configs;
  for (const config of Object.values(configs)) {
    if (!isSubcategoryConfig(config)) {
      continue;
    }
    if (config.subcategory_parent !== oldName) {
      continue;
    }
    const mapping = config.subcategory_mapping ?? {};
    const nextMapping: Record<string, string[]> = {};
    for (const value of newValues) {
      nextMapping[value] = mapping[value] ?? [];
    }
    const updated: NodeConfig = {
      ...config,
      // biome-ignore lint/style/useNamingConvention: api schema
      subcategory_parent: nameChanged ? newName : config.subcategory_parent,
      // biome-ignore lint/style/useNamingConvention: api schema
      subcategory_mapping: valuesChanged ? nextMapping : mapping,
    };
    nextConfigs = { ...nextConfigs, [config.id]: updated };
  }
  return nextConfigs;
}


================================================
FILE: studio/frontend/src/features/recipe-studio/stores/helpers/model-infra-layout.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { Edge, XYPosition } from "@xyflow/react";
import { DEFAULT_NODE_HEIGHT, DEFAULT_NODE_WIDTH } from "../../constants";
import type { LayoutDirection, NodeConfig, RecipeNode } from "../../types";
import { HANDLE_IDS, normalizeRecipeHandleId } from "../../utils/handles";
import { readNodeHeight, readNodeWidth } from "../../utils/rf-node-dimensions";

type Rect = {
  x: number;
  y: number;
  width: number;
  height: number;
};

type Bounds = {
  minX: number;
  maxX: number;
  minY: number;
  maxY: number;
};

function toRect(node: RecipeNode): Rect {
  return {
    x: node.position.x,
    y: node.position.y,
    width: readNodeWidth(node) ?? DEFAULT_NODE_WIDTH,
    height: readNodeHeight(node) ?? DEFAULT_NODE_HEIGHT,
  };
}

function intersects(a: Rect, b: Rect, pad = 18): boolean {
  return !(
    a.x + a.width + pad <= b.x ||
    b.x + b.width + pad <= a.x ||
    a.y + a.height + pad <= b.y ||
    b.y + b.height + pad <= a.y
  );
}

function findNonOverlappingPosition(
  preferred: XYPosition,
  width: number,
  height: number,
  occupied: Rect[],
): XYPosition {
  const step = 24;
  for (let ring = 0; ring <= 16; ring += 1) {
    for (let dx = -ring; dx <= ring; dx += 1) {
      for (let dy = -ring; dy <= ring; dy += 1) {
        if (ring > 0 && Math.max(Math.abs(dx), Math.abs(dy)) !== ring) {
          continue;
        }
        const candidate = {
          x: preferred.x + dx * step,
          y: preferred.y + dy * step,
        };
        const rect = {
          x: candidate.x,
          y: candidate.y,
          width,
          height,
        };
        if (!occupied.some((item) => intersects(rect, item))) {
          return candidate;
        }
      }
    }
  }
  return preferred;
}

function isProviderToConfigEdge(
  edge: Edge,
  configs: Record<string, NodeConfig>,
): boolean {
  const source = configs[edge.source];
  const target = configs[edge.target];
  return source?.kind === "model_provider" && target?.kind === "model_config";
}

function isConfigToLlmEdge(
  edge: Edge,
  configs: Record<string, NodeConfig>,
): boolean {
  const source = configs[edge.source];
  const target = configs[edge.target];
  return source?.kind === "model_config" && target?.kind === "llm";
}

function isToolConfigToLlmEdge(
  edge: Edge,
  configs: Record<string, NodeConfig>,
): boolean {
  const source = configs[edge.source];
  const target = configs[edge.target];
  return source?.kind === "tool_config" && target?.kind === "llm";
}

function usageKey(nodeId: string, handleId: string): string {
  return `${nodeId}::${handleId}`;
}

function incrementUsage(
  map: Map<string, number>,
  nodeId: string,
  handleId: string,
): void {
  const key = usageKey(nodeId, handleId);
  map.set(key, (map.get(key) ?? 0) + 1);
}

function decrementUsage(
  map: Map<string, number>,
  nodeId: string,
  handleId: string,
): void {
  const key = usageKey(nodeId, handleId);
  map.set(key, Math.max(0, (map.get(key) ?? 0) - 1));
}

function getUsage(
  map: Map<string, number>,
  nodeId: string,
  handleId: string,
): number {
  return map.get(usageKey(nodeId, handleId)) ?? 0;
}

function pickHandleByUsage(
  candidates: string[],
  nodeId: string,
  usageMap: Map<string, number>,
): string {
  const free = candidates.filter(
    (handleId) => getUsage(usageMap, nodeId, handleId) === 0,
  );
  if (free.length > 0) {
    return free[0];
  }
  let bestHandle = candidates[0];
  let bestCount = Number.POSITIVE_INFINITY;
  for (const handleId of candidates) {
    const count = getUsage(usageMap, nodeId, handleId);
    if (count < bestCount) {
      bestHandle = handleId;
      bestCount = count;
    }
  }
  return bestHandle;
}

function applyEdgeWithHandles(
  edge: Edge,
  sourceHandle: string,
  targetHandle: string,
  sourceUsage: Map<string, number>,
  targetUsage: Map<string, number>,
): Edge {
  incrementUsage(sourceUsage, edge.source, sourceHandle);
  incrementUsage(targetUsage, edge.target, targetHandle);
  return { ...edge, sourceHandle, targetHandle, type: "semantic" };
}

function getNodeCenter(node: RecipeNode): { x: number; y: number } {
  const width = readNodeWidth(node) ?? DEFAULT_NODE_WIDTH;
  const height = readNodeHeight(node) ?? DEFAULT_NODE_HEIGHT;
  return {
    x: node.position.x + width / 2,
    y: node.position.y + height / 2,
  };
}

function collectBounds(
  ids: string[],
  nodesById: Map<string, RecipeNode>,
): Bounds | null {
  const rects = ids
    .map((id) => nodesById.get(id))
    .flatMap((node) => (node ? [toRect(node)] : []));
  if (rects.length === 0) {
    return null;
  }
  return rects.reduce<Bounds>(
    (acc, rect) => ({
      minX: Math.min(acc.minX, rect.x),
      maxX: Math.max(acc.maxX, rect.x + rect.width),
      minY: Math.min(acc.minY, rect.y),
      maxY: Math.max(acc.maxY, rect.y + rect.height),
    }),
    {
      minX: rects[0].x,
      maxX: rects[0].x + rects[0].width,
      minY: rects[0].y,
      maxY: rects[0].y + rects[0].height,
    },
  );
}

function sortPreferredLlmTargetHandles(
  direction: LayoutDirection,
  sourceNode: RecipeNode | undefined,
  targetNode: RecipeNode | undefined,
): string[] {
  const sourceCenter = sourceNode ? getNodeCenter(sourceNode) : { x: 0, y: 0 };
  const targetCenter = targetNode ? getNodeCenter(targetNode) : { x: 0, y: 0 };

  if (direction === "TB") {
    const horizontalFirst =
      sourceCenter.x <= targetCenter.x
        ? [HANDLE_IDS.dataIn, HANDLE_IDS.dataInRight]
        : [HANDLE_IDS.dataInRight, HANDLE_IDS.dataIn];
    return [...horizontalFirst, HANDLE_IDS.dataInTop, HANDLE_IDS.dataInBottom];
  }

  const verticalFirst =
    sourceCenter.y <= targetCenter.y
      ? [HANDLE_IDS.dataInTop, HANDLE_IDS.dataInBottom]
      : [HANDLE_IDS.dataInBottom, HANDLE_IDS.dataInTop];
  return [...verticalFirst, HANDLE_IDS.dataIn, HANDLE_IDS.dataInRight];
}

function getProviderSourceHandleCandidates(
  direction: LayoutDirection,
): string[] {
  return direction === "TB"
    ? [HANDLE_IDS.semanticOut, HANDLE_IDS.semanticOutBottom]
    : [HANDLE_IDS.semanticOutBottom, HANDLE_IDS.semanticOut];
}

function getProviderTargetHandleCandidates(
  direction: LayoutDirection,
): string[] {
  return direction === "TB"
    ? [HANDLE_IDS.semanticIn, HANDLE_IDS.semanticInTop]
    : [HANDLE_IDS.semanticInTop, HANDLE_IDS.semanticIn];
}

function getConfigSourceHandleCandidates(direction: LayoutDirection): string[] {
  return direction === "TB"
    ? [HANDLE_IDS.semanticOut]
    : [HANDLE_IDS.semanticOutBottom];
}

export function optimizeModelInfraEdgeHandles(
  edges: Edge[],
  nodes: RecipeNode[],
  configs: Record<string, NodeConfig>,
  direction: LayoutDirection,
): Edge[] {
  const nodesById = new Map(nodes.map((node) => [node.id, node] as const));
  const sourceUsage = new Map<string, number>();
  const targetUsage = new Map<string, number>();

  for (const edge of edges) {
    const sourceHandle = normalizeRecipeHandleId(edge.sourceHandle);
    const targetHandle = normalizeRecipeHandleId(edge.targetHandle);
    if (sourceHandle) {
      incrementUsage(sourceUsage, edge.source, sourceHandle);
    }
    if (targetHandle) {
      incrementUsage(targetUsage, edge.target, targetHandle);
    }
  }

  const nextEdges: Edge[] = [];
  for (const edge of edges) {
    const source = configs[edge.source];
    const target = configs[edge.target];
    if (!(source && target)) {
      nextEdges.push(edge);
      continue;
    }

    const sourceHandleBefore = normalizeRecipeHandleId(edge.sourceHandle);
    const targetHandleBefore = normalizeRecipeHandleId(edge.targetHandle);
    const isSemanticInfra =
      isProviderToConfigEdge(edge, configs) ||
      isConfigToLlmEdge(edge, configs) ||
      isToolConfigToLlmEdge(edge, configs);
    if (!isSemanticInfra) {
      nextEdges.push(edge);
      continue;
    }

    if (sourceHandleBefore) {
      decrementUsage(sourceUsage, edge.source, sourceHandleBefore);
    }
    if (targetHandleBefore) {
      decrementUsage(targetUsage, edge.target, targetHandleBefore);
    }

    if (isProviderToConfigEdge(edge, configs)) {
      const sourceCandidates = getProviderSourceHandleCandidates(direction);
      const targetCandidates = getProviderTargetHandleCandidates(direction);
      const sourceHandle = pickHandleByUsage(
        sourceCandidates,
        edge.source,
        sourceUsage,
      );
      const targetHandle = pickHandleByUsage(
        targetCandidates,
        edge.target,
        targetUsage,
      );
      nextEdges.push(
        applyEdgeWithHandles(
          edge,
          sourceHandle,
          targetHandle,
          sourceUsage,
          targetUsage,
        ),
      );
      continue;
    }

    const targetCandidates = sortPreferredLlmTargetHandles(
      direction,
      nodesById.get(edge.source),
      nodesById.get(edge.target),
    );
    const sourceCandidates = getConfigSourceHandleCandidates(direction);
    const sourceHandle = pickHandleByUsage(
      sourceCandidates,
      edge.source,
      sourceUsage,
    );
    const targetHandle = pickHandleByUsage(
      targetCandidates,
      edge.target,
      targetUsage,
    );
    nextEdges.push(
      applyEdgeWithHandles(
        edge,
        sourceHandle,
        targetHandle,
        sourceUsage,
        targetUsage,
      ),
    );
  }

  return nextEdges;
}

export function centerModelInfraNodes(
  nodes: RecipeNode[],
  edges: Edge[],
  configs: Record<string, NodeConfig>,
  direction: LayoutDirection,
): RecipeNode[] {
  const nodesById = new Map(nodes.map((node) => [node.id, node] as const));
  const configToLlmIds = new Map<string, string[]>();
  const toolConfigToLlmIds = new Map<string, string[]>();
  const providerToConfigIds = new Map<string, string[]>();

  for (const edge of edges) {
    if (isProviderToConfigEdge(edge, configs)) {
      const entries = providerToConfigIds.get(edge.source) ?? [];
      if (!entries.includes(edge.target)) {
        entries.push(edge.target);
      }
      providerToConfigIds.set(edge.source, entries);
      continue;
    }
    if (isConfigToLlmEdge(edge, configs)) {
      const entries = configToLlmIds.get(edge.source) ?? [];
      if (!entries.includes(edge.target)) {
        entries.push(edge.target);
      }
      configToLlmIds.set(edge.source, entries);
      continue;
    }
    if (isToolConfigToLlmEdge(edge, configs)) {
      const entries = toolConfigToLlmIds.get(edge.source) ?? [];
      if (!entries.includes(edge.target)) {
        entries.push(edge.target);
      }
      toolConfigToLlmIds.set(edge.source, entries);
    }
  }

  const modelConfigIds = Object.values(configs)
    .filter(
      (config) => config.kind === "model_config" && nodesById.has(config.id),
    )
    .map((config) => config.id);
  const modelProviderIds = Object.values(configs)
    .filter(
      (config) => config.kind === "model_provider" && nodesById.has(config.id),
    )
    .map((config) => config.id);
  const toolConfigIds = Object.values(configs)
    .filter((config) => config.kind === "tool_config" && nodesById.has(config.id))
    .map((config) => config.id);

  const occupiedById = new Map(
    nodes.map((node) => [node.id, toRect(node)] as const),
  );
  const clusterGap = 72;

  const placeNode = (nodeId: string, preferred: XYPosition): void => {
    const currentNode = nodesById.get(nodeId);
    if (!currentNode) {
      return;
    }
    const width = readNodeWidth(currentNode) ?? DEFAULT_NODE_WIDTH;
    const height = readNodeHeight(currentNode) ?? DEFAULT_NODE_HEIGHT;
    occupiedById.delete(nodeId);
    const position = findNonOverlappingPosition(
      preferred,
      width,
      height,
      Array.from(occupiedById.values()),
    );
    const nextNode = { ...currentNode, position };
    nodesById.set(nodeId, nextNode);
    occupiedById.set(nodeId, {
      x: position.x,
      y: position.y,
      width,
      height,
    });
  };

  for (const modelConfigId of modelConfigIds) {
    const llmIds = configToLlmIds.get(modelConfigId) ?? [];
    const targetBounds = collectBounds(llmIds, nodesById);
    const modelConfigNode = nodesById.get(modelConfigId);
    if (!(targetBounds && modelConfigNode)) {
      continue;
    }
    const width = readNodeWidth(modelConfigNode) ?? DEFAULT_NODE_WIDTH;
    const height = readNodeHeight(modelConfigNode) ?? DEFAULT_NODE_HEIGHT;
    const preferred =
      direction === "LR"
        ? {
            x: (targetBounds.minX + targetBounds.maxX) / 2 - width / 2,
            y: targetBounds.minY - height - clusterGap,
          }
        : {
            x: targetBounds.minX - width - clusterGap,
            y: (targetBounds.minY + targetBounds.maxY) / 2 - height / 2,
          };
    placeNode(modelConfigId, preferred);
  }

  for (const modelProviderId of modelProviderIds) {
    const configIds = providerToConfigIds.get(modelProviderId) ?? [];
    const targetBounds = collectBounds(configIds, nodesById);
    const modelProviderNode = nodesById.get(modelProviderId);
    if (!(targetBounds && modelProviderNode)) {
      continue;
    }
    const width = readNodeWidth(modelProviderNode) ?? DEFAULT_NODE_WIDTH;
    const height = readNodeHeight(modelProviderNode) ?? DEFAULT_NODE_HEIGHT;
    const preferred =
      direction === "LR"
        ? {
            x: (targetBounds.minX + targetBounds.maxX) / 2 - width / 2,
            y: targetBounds.minY - height - clusterGap,
          }
        : {
            x: targetBounds.minX - width - clusterGap,
            y: (targetBounds.minY + targetBounds.maxY) / 2 - height / 2,
          };
    placeNode(modelProviderId, preferred);
  }

  for (const toolConfigId of toolConfigIds) {
    const llmIds = toolConfigToLlmIds.get(toolConfigId) ?? [];
    const targetBounds = collectBounds(llmIds, nodesById);
    const toolConfigNode = nodesById.get(toolConfigId);
    if (!(targetBounds && toolConfigNode)) {
      continue;
    }
    const width = readNodeWidth(toolConfigNode) ?? DEFAULT_NODE_WIDTH;
    const height = readNodeHeight(toolConfigNode) ?? DEFAULT_NODE_HEIGHT;
    const preferred =
      direction === "LR"
        ? {
            x: (targetBounds.minX + targetBounds.maxX) / 2 - width / 2,
            y: targetBounds.minY - height - clusterGap,
          }
        : {
            x: targetBounds.minX - width - clusterGap,
            y: (targetBounds.minY + targetBounds.maxY) / 2 - height / 2,
          };
    placeNode(toolConfigId, preferred);
  }

  return nodes.map((node) => nodesById.get(node.id) ?? node);
}


================================================
FILE: studio/frontend/src/features/recipe-studio/stores/helpers/node-updates.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { XYPosition } from "@xyflow/react";
import { DEFAULT_NODE_WIDTH } from "../../constants";
import type {
  RecipeNode,
  LayoutDirection,
  NodeConfig,
} from "../../types";
import { nodeDataFromConfig } from "../../utils";
import { getConfigUiMode } from "../../components/inline/inline-policy";

export type NodeUpdateState = {
  configs: Record<string, NodeConfig>;
  nodes: RecipeNode[];
  nextId: number;
  nextY: number;
};

export type NodeUpdateResult = {
  configs: Record<string, NodeConfig>;
  nodes: RecipeNode[];
  nextId: number;
  nextY: number;
  activeConfigId: string;
  dialogOpen: boolean;
};

export function updateNodeData(
  nodes: RecipeNode[],
  id: string,
  config: NodeConfig,
  layoutDirection: LayoutDirection,
): RecipeNode[] {
  return nodes.map((node) =>
    node.id === id
      ? { ...node, data: nodeDataFromConfig(config, layoutDirection) }
      : node,
  );
}

export function buildNodeUpdate(
  state: NodeUpdateState,
  config: NodeConfig,
  layoutDirection: LayoutDirection,
  position?: XYPosition,
  openDialog = true,
): NodeUpdateResult {
  const node: RecipeNode = {
    id: config.id,
    type: "builder",
    position: position ?? { x: 0, y: state.nextY },
    data: nodeDataFromConfig(config, layoutDirection),
    style: { width: DEFAULT_NODE_WIDTH },
    selected: true,
  };
  const mode = getConfigUiMode(config);
  return {
    configs: { ...state.configs, [config.id]: config },
    nodes: [...state.nodes.map((item) => ({ ...item, selected: false })), node],
    nextId: state.nextId + 1,
    nextY: position ? state.nextY : state.nextY + 140,
    activeConfigId: config.id,
    dialogOpen: openDialog && mode === "dialog",
  };
}

export function applyLayoutDirectionToNodes(
  nodes: RecipeNode[],
  configs: Record<string, NodeConfig>,
  layoutDirection: LayoutDirection,
): RecipeNode[] {
  return nodes.map((node) => {
    const config = configs[node.id];
    if (config) {
      return { ...node, data: nodeDataFromConfig(config, layoutDirection) };
    }
    return {
      ...node,
      data: { ...node.data, layoutDirection },
    };
  });
}


================================================
FILE: studio/frontend/src/features/recipe-studio/stores/helpers/reference-sync.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  LlmConfig,
  ModelConfig,
  NodeConfig,
  SamplerConfig,
} from "../../types";
import { removeRef, replaceRef } from "../../utils/refs";

function updateTemplateFields(
  config: NodeConfig,
  updater: (value: string) => string,
): NodeConfig {
  if (config.kind === "llm") {
    const nextPrompt = updater(config.prompt);
    const nextSystem = updater(config.system_prompt);
    const nextOutput =
      typeof config.output_format === "string"
        ? updater(config.output_format)
        : config.output_format;
    if (
      nextPrompt === config.prompt &&
      nextSystem === config.system_prompt &&
      nextOutput === config.output_format
    ) {
      return config;
    }
    return {
      ...config,
      prompt: nextPrompt,
      // biome-ignore lint/style/useNamingConvention: api schema
      system_prompt: nextSystem,
      // biome-ignore lint/style/useNamingConvention: api schema
      output_format: nextOutput,
    };
  }
  if (config.kind === "expression") {
    const nextExpr = updater(config.expr);
    if (nextExpr === config.expr) {
      return config;
    }
    return { ...config, expr: nextExpr };
  }
  return config;
}

export function applyRenameToConfig(
  config: NodeConfig,
  from: string,
  to: string,
): NodeConfig {
  let next = updateTemplateFields(config, (value) =>
    replaceRef(value, from, to),
  );
  if (
    config.kind === "sampler" &&
    config.sampler_type === "subcategory" &&
    config.subcategory_parent === from
  ) {
    const base = next as SamplerConfig;
    next = {
      ...base,
      // biome-ignore lint/style/useNamingConvention: api schema
      subcategory_parent: to,
    };
  }
  if (
    config.kind === "sampler" &&
    config.sampler_type === "timedelta" &&
    config.reference_column_name === from
  ) {
    const base = next as SamplerConfig;
    next = {
      ...base,
      // biome-ignore lint/style/useNamingConvention: api schema
      reference_column_name: to,
    };
  }
  if (config.kind === "model_config" && config.provider === from) {
    const base = next as ModelConfig;
    next = { ...base, provider: to };
  }
  if (config.kind === "llm" && config.model_alias === from) {
    const base = next as LlmConfig;
    next = { ...base, model_alias: to };
  }
  if (config.kind === "llm" && config.tool_alias === from) {
    const base = next as LlmConfig;
    next = { ...base, tool_alias: to };
  }
  if (config.kind === "validator") {
    const targets = config.target_columns ?? [];
    if (targets.includes(from)) {
      const base = next as typeof config;
      next = {
        ...base,
        // biome-ignore lint/style/useNamingConvention: api schema
        target_columns: targets.map((target) => (target === from ? to : target)),
      };
    }
  }
  return next;
}

export function applyRemovalToConfig(
  config: NodeConfig,
  ref: string,
): NodeConfig {
  let next = updateTemplateFields(config, (value) => removeRef(value, ref));
  if (
    config.kind === "sampler" &&
    config.sampler_type === "subcategory" &&
    config.subcategory_parent === ref
  ) {
    const base = next as SamplerConfig;
    next = {
      ...base,
      // biome-ignore lint/style/useNamingConvention: api schema
      subcategory_parent: "",
      // biome-ignore lint/style/useNamingConvention: api schema
      subcategory_mapping: {},
    };
  }
  if (
    config.kind === "sampler" &&
    config.sampler_type === "timedelta" &&
    config.reference_column_name === ref
  ) {
    const base = next as SamplerConfig;
    next = {
      ...base,
      // biome-ignore lint/style/useNamingConvention: api schema
      reference_column_name: "",
    };
  }
  if (config.kind === "model_config" && config.provider === ref) {
    const base = next as ModelConfig;
    next = { ...base, provider: "" };
  }
  if (config.kind === "llm" && config.model_alias === ref) {
    const base = next as LlmConfig;
    next = { ...base, model_alias: "" };
  }
  if (config.kind === "llm" && config.tool_alias === ref) {
    const base = next as LlmConfig;
    next = { ...base, tool_alias: "" };
  }
  if (config.kind === "validator") {
    const targets = (config.target_columns ?? []).filter((target) => target !== ref);
    if (targets.length !== (config.target_columns ?? []).length) {
      const base = next as typeof config;
      next = {
        ...base,
        // biome-ignore lint/style/useNamingConvention: api schema
        target_columns: targets,
      };
    }
  }
  return next;
}

function applyConfigTransform(
  configs: Record<string, NodeConfig>,
  transform: (config: NodeConfig) => NodeConfig,
): Record<string, NodeConfig> {
  let next = configs;
  for (const [id, config] of Object.entries(configs)) {
    const updated = transform(config);
    if (updated !== config) {
      if (next === configs) {
        next = { ...configs };
      }
      next[id] = updated;
    }
  }
  return next;
}

export function applyRenameToConfigs(
  configs: Record<string, NodeConfig>,
  from: string,
  to: string,
): Record<string, NodeConfig> {
  if (!from || from === to) {
    return configs;
  }
  return applyConfigTransform(configs, (config) =>
    applyRenameToConfig(config, from, to),
  );
}

export function applyRemovalToConfigs(
  configs: Record<string, NodeConfig>,
  ref: string,
): Record<string, NodeConfig> {
  if (!ref) {
    return configs;
  }
  return applyConfigTransform(configs, (config) => applyRemovalToConfig(config, ref));
}


================================================
FILE: studio/frontend/src/features/recipe-studio/stores/helpers/removals.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { Edge } from "@xyflow/react";
import type { NodeConfig } from "../../types";
import { isCategoryConfig, isSubcategoryConfig } from "../../utils";
import { applyRemovalToConfig, applyRemovalToConfigs } from "../recipe-studio-helpers";

export function applyNodeRemovals(
  input: { edges: Edge[]; configs: Record<string, NodeConfig> },
  removedIds: string[],
): { edges: Edge[]; configs: Record<string, NodeConfig> } {
  if (removedIds.length === 0) {
    return input;
  }

  const edges = input.edges.filter(
    (edge) => !(removedIds.includes(edge.source) || removedIds.includes(edge.target)),
  );
  let configs: Record<string, NodeConfig> = { ...input.configs };
  const removedNames: string[] = [];

  for (const id of removedIds) {
    const removed = configs[id];
    delete configs[id];
    if (removed?.name) {
      removedNames.push(removed.name);
    }

    if (isCategoryConfig(removed)) {
      const removedName = removed.name;
      for (const config of Object.values(configs)) {
        if (!isSubcategoryConfig(config)) {
          continue;
        }
        if (config.subcategory_parent !== removedName) {
          continue;
        }
        configs[config.id] = {
          ...config,
          // biome-ignore lint/style/useNamingConvention: api schema
          subcategory_parent: "",
          // biome-ignore lint/style/useNamingConvention: api schema
          subcategory_mapping: {},
        };
      }
    }
  }

  for (const name of removedNames) {
    configs = applyRemovalToConfigs(configs, name);
  }

  return { edges, configs };
}

export function applyEdgeRemovals(
  configs: Record<string, NodeConfig>,
  removedEdges: Edge[],
): Record<string, NodeConfig> {
  if (removedEdges.length === 0) {
    return configs;
  }

  let next = configs;
  for (const edge of removedEdges) {
    const source = next[edge.source];
    const target = next[edge.target];
    if (!(source && target)) {
      continue;
    }
    const updated = applyRemovalToConfig(target, source.name);
    if (updated !== target) {
      if (next === configs) {
        next = { ...configs };
      }
      next[target.id] = updated;
    }
    if (
      source.kind === "validator" &&
      target.kind === "llm" &&
      target.llm_type === "code"
    ) {
      const sourceUpdated = applyRemovalToConfig(source, target.name);
      if (sourceUpdated !== source) {
        if (next === configs) {
          next = { ...configs };
        }
        next[source.id] = sourceUpdated;
      }
    }
  }
  return next;
}


================================================
FILE: studio/frontend/src/features/recipe-studio/stores/recipe-executions.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { create } from "zustand";
import type { RecipeExecutionKind } from "../execution-types";
import type { RecipeExecutionRecord } from "../execution-types";
import { sortExecutions, withExecutionDefaults } from "../executions/execution-helpers";

export type RecipeRunSettings = {
  batchSize: number;
  batchEnabled: boolean;
  mergeBatches: boolean;
  llmParallelRequests: number | null;
  nonInferenceWorkers: number;
  maxConversationRestarts: number;
  maxConversationCorrectionSteps: number;
  disableEarlyShutdown: boolean;
  shutdownErrorRate: number;
  shutdownErrorWindow: number;
};

const DEFAULT_RUN_SETTINGS: RecipeRunSettings = {
  batchSize: 1000,
  batchEnabled: false,
  mergeBatches: false,
  llmParallelRequests: null,
  nonInferenceWorkers: 4,
  maxConversationRestarts: 5,
  maxConversationCorrectionSteps: 0,
  disableEarlyShutdown: true,
  shutdownErrorRate: 0.5,
  shutdownErrorWindow: 10,
};

type RecipeExecutionsState = {
  runDialogOpen: boolean;
  runDialogKind: RecipeExecutionKind;
  previewRows: number;
  fullRows: number;
  fullRunName: string;
  runErrors: string[];
  runSettings: RecipeRunSettings;
  previewLoading: boolean;
  fullLoading: boolean;
  executions: RecipeExecutionRecord[];
  selectedExecutionId: string | null;
  setRunDialogOpen: (open: boolean) => void;
  setRunDialogKind: (kind: RecipeExecutionKind) => void;
  setPreviewRows: (rows: number) => void;
  setFullRows: (rows: number) => void;
  setFullRunName: (name: string) => void;
  setRunErrors: (errors: string[]) => void;
  setRunSettings: (patch: Partial<RecipeRunSettings>) => void;
  setPreviewLoading: (loading: boolean) => void;
  setFullLoading: (loading: boolean) => void;
  setExecutions: (records: RecipeExecutionRecord[]) => void;
  upsertExecution: (record: RecipeExecutionRecord) => void;
  selectExecution: (id: string | null) => void;
  resetForRecipe: () => void;
};

const INITIAL_STATE = {
  runDialogOpen: false,
  runDialogKind: "preview",
  previewRows: 5,
  fullRows: 100,
  fullRunName: "",
  runErrors: [],
  runSettings: DEFAULT_RUN_SETTINGS,
  previewLoading: false,
  fullLoading: false,
  executions: [],
  selectedExecutionId: null,
} satisfies Pick<
  RecipeExecutionsState,
  | "runDialogOpen"
  | "runDialogKind"
  | "previewRows"
  | "fullRows"
  | "fullRunName"
  | "runErrors"
  | "runSettings"
  | "previewLoading"
  | "fullLoading"
  | "executions"
  | "selectedExecutionId"
>;

export const useRecipeExecutionsStore = create<RecipeExecutionsState>((set) => ({
  ...INITIAL_STATE,
  setRunDialogOpen: (open) => set({ runDialogOpen: open }),
  setRunDialogKind: (kind) =>
    set((state) => {
      if (state.runDialogKind === "preview" && kind === "full") {
        return {
          runDialogKind: kind,
          fullRows: 100,
          runSettings: {
            ...state.runSettings,
            batchEnabled: false,
          },
        };
      }
      return { runDialogKind: kind };
    }),
  setPreviewRows: (rows) =>
    set({ previewRows: Number.isFinite(rows) && rows > 0 ? Math.floor(rows) : 1 }),
  setFullRows: (rows) =>
    set({ fullRows: Number.isFinite(rows) && rows > 0 ? Math.floor(rows) : 1 }),
  setFullRunName: (name) => set({ fullRunName: name }),
  setRunErrors: (errors) => set({ runErrors: errors }),
  setRunSettings: (patch) =>
    set((state) => ({
      runSettings: {
        ...state.runSettings,
        ...patch,
      },
    })),
  setPreviewLoading: (loading) => set({ previewLoading: loading }),
  setFullLoading: (loading) => set({ fullLoading: loading }),
  setExecutions: (records) =>
    set(() => {
      const normalized = sortExecutions(records.map(withExecutionDefaults));
      return {
        executions: normalized,
        selectedExecutionId: normalized[0]?.id ?? null,
      };
    }),
  upsertExecution: (record) =>
    set((state) => {
      const normalized = withExecutionDefaults(record);
      const withoutCurrent = state.executions.filter((item) => item.id !== normalized.id);
      return {
        executions: sortExecutions([normalized, ...withoutCurrent]),
        selectedExecutionId: normalized.id,
      };
    }),
  selectExecution: (id) => set({ selectedExecutionId: id }),
  resetForRecipe: () => set(INITIAL_STATE),
}));


================================================
FILE: studio/frontend/src/features/recipe-studio/stores/recipe-studio-helpers.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export {
  applyLayoutDirectionToNodes,
  buildNodeUpdate,
  type NodeUpdateResult,
  type NodeUpdateState,
  updateNodeData,
} from "./helpers/node-updates";
export {
  syncEdgesForConfigPatch,
  syncSubcategoryConfigsForCategoryUpdate,
} from "./helpers/edge-sync";
export {
  applyRemovalToConfig,
  applyRemovalToConfigs,
  applyRenameToConfig,
  applyRenameToConfigs,
} from "./helpers/reference-sync";


================================================
FILE: studio/frontend/src/features/recipe-studio/stores/recipe-studio.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  type Connection,
  type Edge,
  type EdgeChange,
  type IsValidConnection,
  type NodeChange,
  type XYPosition,
  applyEdgeChanges,
  applyNodeChanges,
} from "@xyflow/react";
import { create } from "zustand";
import type {
  RecipeNode,
  RecipeProcessorConfig,
  LayoutDirection,
  LlmType,
  NodeConfig,
  SeedSourceType,
  SamplerType,
} from "../types";
import {
  getBlockDefinition,
  type BlockKind,
  type BlockType,
  type SeedBlockType,
} from "../blocks/registry";
import { deriveDisplayGraph } from "../utils/graph/derive-display-graph";
import { applyRecipeConnection, isValidRecipeConnection } from "../utils/graph";
import {
  HANDLE_IDS,
  normalizeRecipeHandleId,
  remapRecipeEdgeHandlesForLayout,
} from "../utils/handles";
import type { RecipeSnapshot } from "../utils/import";
import { getLayoutedElements } from "../utils/layout";
import {
  centerModelInfraNodes,
  optimizeModelInfraEdgeHandles,
} from "./helpers/model-infra-layout";
import { applyEdgeRemovals, applyNodeRemovals } from "./helpers/removals";
import {
  applyRenameToConfigs,
  applyLayoutDirectionToNodes,
  buildNodeUpdate,
  syncEdgesForConfigPatch,
  syncSubcategoryConfigsForCategoryUpdate,
  updateNodeData,
} from "./recipe-studio-helpers";

type SheetView =
  | "root"
  | "sampler"
  | "seed"
  | "llm"
  | "validator"
  | "expression"
  | "note"
  | "processor";

type RecipeStudioState = {
  nodes: RecipeNode[];
  edges: Edge[];
  auxNodePositions: Record<string, XYPosition>;
  llmAuxVisibility: Record<string, boolean>;
  configs: Record<string, NodeConfig>;
  processors: RecipeProcessorConfig[];
  sheetOpen: boolean;
  sheetView: SheetView;
  activeConfigId: string | null;
  dialogOpen: boolean;
  layoutDirection: LayoutDirection;
  executionLocked: boolean;
  nextId: number;
  nextY: number;
  fitViewTick: number;
  setSheetOpen: (open: boolean) => void;
  setSheetView: (view: SheetView) => void;
  setProcessors: (processors: RecipeProcessorConfig[]) => void;
  setDialogOpen: (open: boolean) => void;
  setExecutionLocked: (locked: boolean) => void;
  resetRecipe: () => void;
  selectConfig: (id: string) => void;
  openConfig: (id: string) => void;
  setLayoutDirection: (direction: LayoutDirection) => void;
  applyLayout: () => void;
  setLlmAuxVisibility: (id: string, visible: boolean) => void;
  addSamplerNode: (
    type: SamplerType,
    position?: XYPosition,
    openDialog?: boolean,
  ) => void;
  addSeedNode: (
    type: SeedBlockType,
    position?: XYPosition,
    openDialog?: boolean,
  ) => void;
  addLlmNode: (type: LlmType, position?: XYPosition, openDialog?: boolean) => void;
  addModelProviderNode: (position?: XYPosition, openDialog?: boolean) => void;
  addModelConfigNode: (position?: XYPosition, openDialog?: boolean) => void;
  addToolProfileNode: (position?: XYPosition, openDialog?: boolean) => void;
  addExpressionNode: (position?: XYPosition, openDialog?: boolean) => void;
  addValidatorNode: (
    type: "validator_python" | "validator_sql" | "validator_oxc",
    position?: XYPosition,
    openDialog?: boolean,
  ) => void;
  addMarkdownNoteNode: (position?: XYPosition, openDialog?: boolean) => void;
  updateConfig: (id: string, patch: Partial<NodeConfig>) => void;
  loadRecipe: (snapshot: RecipeSnapshot) => void;
  setAuxNodePosition: (id: string, position: XYPosition) => void;
  onNodesChange: (changes: NodeChange<RecipeNode>[]) => void;
  onEdgesChange: (changes: EdgeChange<Edge>[]) => void;
  onConnect: (connection: Connection) => void;
  isValidConnection: IsValidConnection;
};

const INITIAL_STATE = {
  nodes: [],
  edges: [],
  auxNodePositions: {},
  llmAuxVisibility: {},
  configs: {},
  processors: [],
  sheetOpen: false,
  sheetView: "root",
  activeConfigId: null,
  dialogOpen: false,
  layoutDirection: "LR",
  executionLocked: false,
  nextId: 3,
  nextY: 280,
  fitViewTick: 0,
} satisfies Pick<
  RecipeStudioState,
  | "nodes"
  | "edges"
  | "auxNodePositions"
  | "llmAuxVisibility"
  | "configs"
  | "processors"
  | "sheetOpen"
  | "sheetView"
  | "activeConfigId"
  | "dialogOpen"
  | "layoutDirection"
  | "executionLocked"
  | "nextId"
  | "nextY"
  | "fitViewTick"
>;

function buildAddedNodeState(
  state: RecipeStudioState,
  kind: BlockKind,
  type: BlockType,
  position?: XYPosition,
  openDialog = true,
): Partial<RecipeStudioState> | RecipeStudioState {
  const id = `n${state.nextId}`;
  const existing = Object.values(state.configs);
  const definition = getBlockDefinition(kind, type);
  if (!definition) {
    return state;
  }
  const config = definition.createConfig(id, existing);
  return buildNodeUpdate(
    state,
    config,
    state.layoutDirection,
    position,
    openDialog,
  );
}

function getAddedNodeContext(
  update: Partial<RecipeStudioState> | RecipeStudioState,
): {
  nodes: RecipeNode[];
  configs: Record<string, NodeConfig>;
  newNodeId: string;
} | null {
  const nodes = "nodes" in update ? update.nodes : null;
  const configs = "configs" in update ? update.configs : null;
  const newNodeId = "activeConfigId" in update ? update.activeConfigId : null;
  if (!(nodes && configs && newNodeId)) {
    return null;
  }
  return { nodes, configs, newNodeId };
}

function placeNodeNear(
  nodes: RecipeNode[],
  nodeId: string,
  anchorId: string,
  direction: LayoutDirection,
  relation: "before" | "after",
): RecipeNode[] {
  const anchor = nodes.find((node) => node.id === anchorId);
  if (!anchor) {
    return nodes;
  }
  const primaryOffset = relation === "before" ? -440 : 440;
  return nodes.map((node) => {
    if (node.id !== nodeId) {
      return node;
    }
    if (direction === "TB") {
      return {
        ...node,
        position: {
          x: anchor.position.x,
          y: anchor.position.y + primaryOffset,
        },
      };
    }
    return {
      ...node,
      position: {
        x: anchor.position.x + primaryOffset,
        y: anchor.position.y,
      },
    };
  });
}

function connectSemantic(
  edges: Edge[],
  configs: Record<string, NodeConfig>,
  sourceId: string,
  targetId: string,
  layoutDirection: LayoutDirection,
): { edges: Edge[]; configs: Record<string, NodeConfig> } {
  const result = applyRecipeConnection(
    {
      source: sourceId,
      sourceHandle: HANDLE_IDS.semanticOut,
      target: targetId,
      targetHandle: HANDLE_IDS.semanticIn,
    },
    configs,
    edges,
    layoutDirection,
  );
  return {
    edges: result.edges,
    configs: result.configs ?? configs,
  };
}

function isModelSemanticEdge(edge: Edge, configs: Record<string, NodeConfig>): boolean {
  const source = configs[edge.source];
  const target = configs[edge.target];
  return Boolean(
    source &&
      target &&
      ((source.kind === "model_provider" && target.kind === "model_config") ||
        (source.kind === "model_config" && target.kind === "llm") ||
        (source.kind === "tool_config" && target.kind === "llm")),
  );
}

export const useRecipeStudioStore = create<RecipeStudioState>((set, get) => ({
  ...INITIAL_STATE,
  setSheetOpen: (open) => set({ sheetOpen: open }),
  setSheetView: (view) => set({ sheetView: view }),
  setProcessors: (processors) =>
    set((state) => (state.executionLocked ? state : { processors })),
  setDialogOpen: (open) => set({ dialogOpen: open }),
  setExecutionLocked: (locked) => set({ executionLocked: locked }),
  resetRecipe: () => set(INITIAL_STATE),
  selectConfig: (id) => set({ activeConfigId: id, dialogOpen: false }),
  openConfig: (id) => set({ activeConfigId: id, dialogOpen: true }),
  setLayoutDirection: (direction) =>
    set((state) => {
      if (state.executionLocked) {
        return state;
      }
      return {
        layoutDirection: direction,
        edges: state.edges.map((edge) => {
          if (isModelSemanticEdge(edge, state.configs)) {
            return {
              ...edge,
              sourceHandle: normalizeRecipeHandleId(edge.sourceHandle),
              targetHandle: normalizeRecipeHandleId(edge.targetHandle),
            };
          }
          return {
            ...edge,
            ...remapRecipeEdgeHandlesForLayout(edge, direction),
          };
        }),
        nodes: applyLayoutDirectionToNodes(
          state.nodes,
          state.configs,
          direction,
        ),
      };
    }),
  applyLayout: () =>
    set((state) => {
      if (state.executionLocked) {
        return state;
      }
      const isTopBottom = state.layoutDirection === "TB";

      const displayGraph = deriveDisplayGraph({
        nodes: state.nodes,
        edges: state.edges,
        configs: state.configs,
        layoutDirection: state.layoutDirection,
        auxNodePositions: {},
        llmAuxVisibility: state.llmAuxVisibility,
      });
      const { nodes } = getLayoutedElements(displayGraph.nodes, displayGraph.edges, {
        direction: state.layoutDirection,
        nodesep: isTopBottom ? 120 : 80,
        ranksep: isTopBottom ? 140 : 80,
        configs: state.configs,
      });
      const layoutedPositions = new Map(
        nodes.map((node) => [node.id, node.position] as const),
      );
      const nextNodes = state.nodes.map((node) => {
        const position = layoutedPositions.get(node.id);
        if (!position) {
          return node;
        }
        return { ...node, position };
      });
      const centeredNodes = centerModelInfraNodes(
        nextNodes,
        state.edges,
        state.configs,
        state.layoutDirection,
      );
      const optimizedEdges = optimizeModelInfraEdgeHandles(
        state.edges,
        centeredNodes,
        state.configs,
        state.layoutDirection,
      );
      return {
        auxNodePositions: {},
        edges: optimizedEdges,
        nodes: applyLayoutDirectionToNodes(
          centeredNodes,
          state.configs,
          state.layoutDirection,
        ),
      };
    }),
  setLlmAuxVisibility: (id, visible) =>
    set((state) => {
      if (state.llmAuxVisibility[id] === visible) {
        return state;
      }
      return {
        llmAuxVisibility: {
          ...state.llmAuxVisibility,
          [id]: visible,
        },
      };
    }),
  addSamplerNode: (type, position, openDialog = true) =>
    set((state) => {
      if (state.executionLocked) {
        return state;
      }
      return buildAddedNodeState(state, "sampler", type, position, openDialog);
    }),
  addSeedNode: (type, position, openDialog = true) =>
    set((state) => {
      if (state.executionLocked) {
        return state;
      }
      const existing = Object.values(state.configs).find(
        (config) => config.kind === "seed",
      );
      if (!existing) {
        return buildAddedNodeState(
          state,
          "seed",
          type,
          position,
          openDialog,
        );
      }
      let nextSourceType: SeedSourceType = "hf";
      if (type === "seed_local") {
        nextSourceType = "local";
      } else if (type === "seed_unstructured") {
        nextSourceType = "unstructured";
      }

      const nextConfig = {
        ...existing,
        seed_source_type: nextSourceType,
        hf_repo_id: "",
        hf_subset: "",
        hf_split: "",
        hf_path: "",
        hf_token: "",
        hf_endpoint: "https://huggingface.co",
        local_file_name: "",
        unstructured_file_name: "",
        seed_columns: [],
        seed_drop_columns: [],
        seed_preview_rows: [],
        unstructured_chunk_size: "1200",
        unstructured_chunk_overlap: "200",
      };
      return {
        configs: {
          ...state.configs,
          [existing.id]: nextConfig,
        },
        nodes: updateNodeData(
          state.nodes.map((node) => ({ ...node, selected: node.id === existing.id })),
          existing.id,
          nextConfig,
          state.layoutDirection,
        ),
        activeConfigId: existing.id,
        dialogOpen: openDialog,
      };
    }),
  addLlmNode: (type, position, openDialog = true) =>
    set((state) => {
      if (state.executionLocked) {
        return state;
      }
      const added = buildAddedNodeState(state, "llm", type, position, openDialog);
      const context = getAddedNodeContext(added);
      if (!context) {
        return added;
      }
      let { nodes, configs } = context;
      let edges = state.edges;
      const modelConfigs = Object.values(configs).filter(
        (config) => config.kind === "model_config",
      );
      if (modelConfigs.length === 1) {
        if (!position) {
          nodes = placeNodeNear(
            nodes,
            context.newNodeId,
            modelConfigs[0].id,
            state.layoutDirection,
            "after",
          );
        }
        const next = connectSemantic(
          edges,
          configs,
          modelConfigs[0].id,
          context.newNodeId,
          state.layoutDirection,
        );
        edges = next.edges;
        configs = next.configs;
      }
      return { ...added, nodes, edges, configs };
    }),
  addModelProviderNode: (position, openDialog = true) =>
    set((state) => {
      if (state.executionLocked) {
        return state;
      }
      const added = buildAddedNodeState(
        state,
        "llm",
        "model_provider",
        position,
        openDialog,
      );
      const context = getAddedNodeContext(added);
      if (!context) {
        return added;
      }
      let { nodes, configs } = context;
      let edges = state.edges;
      const unboundModelConfigs = Object.values(configs).filter(
        (config) =>
          config.kind === "model_config" &&
          !config.provider.trim(),
      );
      if (!position && unboundModelConfigs.length > 0) {
        nodes = placeNodeNear(
          nodes,
          context.newNodeId,
          unboundModelConfigs[0].id,
          state.layoutDirection,
          "before",
        );
      }
      if (unboundModelConfigs.length === 1) {
        const next = connectSemantic(
          edges,
          configs,
          context.newNodeId,
          unboundModelConfigs[0].id,
          state.layoutDirection,
        );
        edges = next.edges;
        configs = next.configs;
      }
      return { ...added, nodes, edges, configs };
    }),
  addModelConfigNode: (position, openDialog = true) =>
    set((state) => {
      if (state.executionLocked) {
        return state;
      }
      const added = buildAddedNodeState(
        state,
        "llm",
        "model_config",
        position,
        openDialog,
      );
      const context = getAddedNodeContext(added);
      if (!context) {
        return added;
      }
      let { nodes, configs } = context;
      let edges = state.edges;
      const providers = Object.values(configs).filter(
        (config) => config.kind === "model_provider",
      );
      const unboundLlms = Object.values(configs).filter(
        (config) => config.kind === "llm" && !config.model_alias.trim(),
      );
      if (!position && providers.length === 1) {
        nodes = placeNodeNear(
          nodes,
          context.newNodeId,
          providers[0].id,
          state.layoutDirection,
          "after",
        );
      } else if (!position && unboundLlms.length > 0) {
        nodes = placeNodeNear(
          nodes,
          context.newNodeId,
          unboundLlms[0].id,
          state.layoutDirection,
          "before",
        );
      }
      if (providers.length === 1) {
        const next = connectSemantic(
          edges,
          configs,
          providers[0].id,
          context.newNodeId,
          state.layoutDirection,
        );
        edges = next.edges;
        configs = next.configs;
      }
      if (unboundLlms.length === 1) {
        const next = connectSemantic(
          edges,
          configs,
          context.newNodeId,
          unboundLlms[0].id,
          state.layoutDirection,
        );
        edges = next.edges;
        configs = next.configs;
      }
      return { ...added, nodes, edges, configs };
    }),
  addToolProfileNode: (position, openDialog = true) =>
    set((state) => {
      if (state.executionLocked) {
        return state;
      }
      const added = buildAddedNodeState(
        state,
        "llm",
        "tool_config",
        position,
        openDialog,
      );
      const context = getAddedNodeContext(added);
      if (!context) {
        return added;
      }
      let { nodes, configs } = context;
      let edges = state.edges;
      const unboundLlms = Object.values(configs).filter(
        (config) => config.kind === "llm" && !(config.tool_alias?.trim()),
      );
      if (!position && unboundLlms.length > 0) {
        nodes = placeNodeNear(
          nodes,
          context.newNodeId,
          unboundLlms[0].id,
          state.layoutDirection,
          "before",
        );
      }
      if (unboundLlms.length === 1) {
        const next = connectSemantic(
          edges,
          configs,
          context.newNodeId,
          unboundLlms[0].id,
          state.layoutDirection,
        );
        edges = next.edges;
        configs = next.configs;
      }
      return { ...added, nodes, edges, configs };
    }),
  addExpressionNode: (position, openDialog = true) =>
    set((state) => {
      if (state.executionLocked) {
        return state;
      }
      return buildAddedNodeState(
        state,
        "expression",
        "expression",
        position,
        openDialog,
      );
    }),
  addValidatorNode: (type, position, openDialog = true) =>
    set((state) => {
      if (state.executionLocked) {
        return state;
      }
      return buildAddedNodeState(
        state,
        "validator",
        type,
        position,
        openDialog,
      );
    }),
  addMarkdownNoteNode: (position, openDialog = true) =>
    set((state) => {
      if (state.executionLocked) {
        return state;
      }
      return buildAddedNodeState(
        state,
        "note",
        "markdown_note",
        position,
        openDialog,
      );
    }),
  loadRecipe: (snapshot) =>
    set((state) => ({
      configs: snapshot.configs,
      nodes: applyLayoutDirectionToNodes(
        snapshot.nodes,
        snapshot.configs,
        snapshot.layoutDirection,
      ),
      edges: snapshot.edges,
      processors: snapshot.processors,
      layoutDirection: snapshot.layoutDirection,
      nextId: snapshot.nextId,
      nextY: snapshot.nextY,
      auxNodePositions: snapshot.auxNodePositions ?? {},
      llmAuxVisibility: {},
      activeConfigId: null,
      dialogOpen: false,
      sheetView: "root",
      fitViewTick: state.fitViewTick + 1,
    })),
  setAuxNodePosition: (id, position) =>
    set((state) => {
      const current = state.auxNodePositions[id];
      if (current && current.x === position.x && current.y === position.y) {
        return state;
      }
      return {
        auxNodePositions: {
          ...state.auxNodePositions,
          [id]: position,
        },
      };
    }),
  updateConfig: (id, patch) => {
    const applyUpdate = (state: RecipeStudioState) => {
      if (state.executionLocked) {
        return state;
      }
      const current = state.configs[id];
      if (!current) {
        return state;
      }
      const next = { ...current, ...patch } as NodeConfig;
      const oldName = current.name;
      const newName = next.name;
      const nameChanged = oldName !== newName;
      let configs: Record<string, NodeConfig> = {
        ...state.configs,
        [id]: next,
      };
      const nodes = updateNodeData(
        state.nodes,
        id,
        next,
        state.layoutDirection,
      );
      const edges = syncEdgesForConfigPatch(
        current,
        patch,
        configs,
        state.edges,
        state.layoutDirection,
      );
      configs = syncSubcategoryConfigsForCategoryUpdate(
        current,
        next,
        configs,
        oldName,
        newName,
        nameChanged,
      );

      if (nameChanged) {
        configs = applyRenameToConfigs(configs, oldName, newName);
      }

      return { configs, nodes, edges };
    };
    set(applyUpdate);
  },
  onNodesChange: (changes) => {
    const applyNodesChange = (state: RecipeStudioState) => {
      if (state.executionLocked) {
        return state;
      }
      const removedIds = changes
        .filter((change) => change.type === "remove")
        .map((change) => change.id);

      const removed = applyNodeRemovals(
        { edges: state.edges, configs: state.configs },
        removedIds,
      );
      const nodes = applyNodeChanges<RecipeNode>(changes, state.nodes);
      const llmAuxVisibility =
        removedIds.length === 0
          ? state.llmAuxVisibility
          : Object.fromEntries(
              Object.entries(state.llmAuxVisibility).filter(
                ([id]) => !removedIds.includes(id),
              ),
            );
      return {
        nodes,
        edges: removed.edges,
        configs: removed.configs,
        llmAuxVisibility,
      };
    };
    set(applyNodesChange);
  },
  onEdgesChange: (changes) => {
    set((state) => {
      if (state.executionLocked) {
        return state;
      }
      const removedEdges = changes
        .filter((change) => change.type === "remove")
        .map((change) => state.edges.find((edge) => edge.id === change.id))
        .filter((edge): edge is Edge => Boolean(edge));

      const configs = applyEdgeRemovals(state.configs, removedEdges);

      const edges = applyEdgeChanges(changes, state.edges);
      return configs === state.configs ? { edges } : { edges, configs };
    });
  },
  onConnect: (connection) => {
    set((state) => {
      if (state.executionLocked) {
        return state;
      }
      const result = applyRecipeConnection(
        connection,
        state.configs,
        state.edges,
        state.layoutDirection,
      );
      return result.configs
        ? { edges: result.edges, configs: result.configs }
        : { edges: result.edges };
    });
  },
  isValidConnection: (connection) =>
    isValidRecipeConnection(
      {
        source: connection.source ?? null,
        target: connection.target ?? null,
        sourceHandle: connection.sourceHandle ?? null,
        targetHandle: connection.targetHandle ?? null,
      },
      get().configs,
    ),
}));


================================================
FILE: studio/frontend/src/features/recipe-studio/types/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { Node } from "@xyflow/react";

export type SamplerType =
  | "category"
  | "subcategory"
  | "uniform"
  | "gaussian"
  | "bernoulli"
  | "datetime"
  | "timedelta"
  | "uuid"
  | "person"
  | "person_from_faker";

export type LlmType = "text" | "structured" | "code" | "judge";
export type ValidatorCodeLang =
  | "javascript"
  | "typescript"
  | "jsx"
  | "tsx"
  | "python"
  | "sql:sqlite"
  | "sql:postgres"
  | "sql:mysql"
  | "sql:tsql"
  | "sql:bigquery"
  | "sql:ansi";
export type ValidatorType = "code" | "oxc";
export type OxcValidationMode = "syntax" | "lint" | "syntax+lint";
export type OxcCodeShape = "auto" | "module" | "snippet";

export type ExpressionDtype = "str" | "int" | "float" | "bool";

export type LayoutDirection = "LR" | "TB";

export type SeedSamplingStrategy = "ordered" | "shuffle";
export type SeedSelectionType = "none" | "index_range" | "partition_block";
export type SeedSourceType = "hf" | "local" | "unstructured";
export const INFRA_NODE_KINDS = new Set([
  "model_provider",
  "model_config",
  "tool_config",
]);

export type RecipeNodeData = {
  title: string;
  name: string;
  kind:
    | "sampler"
    | "llm"
    | "validator"
    | "expression"
    | "seed"
    | "note"
    | "model_provider"
    | "model_config"
    | "tool_config";
  subtype: string;
  blockType:
    | SamplerType
    | LlmType
    | "validator_python"
    | "validator_sql"
    | "validator_oxc"
    | "expression"
    | "seed"
    | "markdown_note"
    | "model_provider"
    | "model_config"
    | "tool_config";
  layoutDirection?: LayoutDirection;
  runtimeState?: "idle" | "running" | "done";
  executionLocked?: boolean;
};

export type RecipeNode = Node<RecipeNodeData, "builder">;

export type CategoryConditionalParams = {
  // biome-ignore lint/style/useNamingConvention: api schema
  sampler_type: "category";
  values: string[];
  weights?: Array<number | null>;
};

export type SamplerConfig = {
  id: string;
  kind: "sampler";
  // ui-only
  advancedOpen?: boolean;
  // biome-ignore lint/style/useNamingConvention: api schema
  sampler_type: SamplerType;
  name: string;
  drop?: boolean;
  // biome-ignore lint/style/useNamingConvention: api schema
  convert_to?: "float" | "int" | "str";
  values?: string[];
  weights?: Array<number | null>;
  low?: string;
  high?: string;
  mean?: string;
  std?: string;
  p?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  datetime_start?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  datetime_end?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  datetime_unit?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  dt_min?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  dt_max?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  reference_column_name?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  timedelta_unit?: "D" | "h" | "m" | "s";
  // biome-ignore lint/style/useNamingConvention: api schema
  uuid_format?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  person_locale?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  person_sex?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  person_age_range?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  person_city?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  person_with_synthetic_personas?: boolean;
  // biome-ignore lint/style/useNamingConvention: api schema
  subcategory_parent?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  subcategory_mapping?: Record<string, string[]>;
  // biome-ignore lint/style/useNamingConvention: api schema
  conditional_params?: Record<string, CategoryConditionalParams>;
};

export type ScoreOption = {
  value: string;
  description: string;
};

export type Score = {
  name: string;
  description: string;
  options: ScoreOption[];
};

export type McpProviderType = "stdio" | "streamable_http";

export type McpEnvVar = {
  key: string;
  value: string;
};

export type LlmMcpProviderConfig = {
  id: string;
  name: string;
  // biome-ignore lint/style/useNamingConvention: ui schema
  provider_type: McpProviderType;
  command?: string;
  args?: string[];
  env?: McpEnvVar[];
  endpoint?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  api_key?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  api_key_env?: string;
};

export type LlmToolConfig = {
  id: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  tool_alias: string;
  providers: string[];
  // biome-ignore lint/style/useNamingConvention: api schema
  allow_tools?: string[];
  // biome-ignore lint/style/useNamingConvention: api schema
  max_tool_call_turns?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  timeout_sec?: string;
};

export type ToolProfileConfig = {
  id: string;
  kind: "tool_config";
  name: string;
  // biome-ignore lint/style/useNamingConvention: ui schema
  mcp_providers: LlmMcpProviderConfig[];
  // biome-ignore lint/style/useNamingConvention: ui schema
  fetched_tools_by_provider?: Record<string, string[]>;
  // biome-ignore lint/style/useNamingConvention: api schema
  allow_tools?: string[];
  // biome-ignore lint/style/useNamingConvention: api schema
  max_tool_call_turns?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  timeout_sec?: string;
};

export type LlmImageContextConfig = {
  enabled: boolean;
  // biome-ignore lint/style/useNamingConvention: api schema
  column_name: string;
};

export type LlmTraceType = "none" | "last_message" | "all_messages";

export type LlmConfig = {
  id: string;
  kind: "llm";
  // ui-only
  advancedOpen?: boolean;
  // biome-ignore lint/style/useNamingConvention: api schema
  llm_type: LlmType;
  name: string;
  drop?: boolean;
  // biome-ignore lint/style/useNamingConvention: api schema
  model_alias: string;
  prompt: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  system_prompt: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  code_lang?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  output_format?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  tool_alias?: string;
  scores?: Score[];
  // ui-only, serialized into multi_modal_context for DataDesigner
  // biome-ignore lint/style/useNamingConvention: ui schema
  image_context?: LlmImageContextConfig;
  // biome-ignore lint/style/useNamingConvention: api schema
  with_trace?: LlmTraceType;
  // biome-ignore lint/style/useNamingConvention: api schema
  extract_reasoning_content?: boolean;
};

export type ModelProviderConfig = {
  id: string;
  kind: "model_provider";
  name: string;
  endpoint: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  provider_type: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  api_key_env?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  api_key?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  extra_headers?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  extra_body?: string;
};

export type ModelConfig = {
  id: string;
  kind: "model_config";
  name: string;
  model: string;
  provider: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  inference_temperature?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  inference_top_p?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  inference_max_tokens?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  inference_timeout?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  inference_extra_body?: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  skip_health_check?: boolean;
};

export type ExpressionConfig = {
  id: string;
  kind: "expression";
  name: string;
  drop?: boolean;
  expr: string;
  dtype: ExpressionDtype;
};

export type ValidatorConfig = {
  id: string;
  kind: "validator";
  // ui-only
  advancedOpen?: boolean;
  name: string;
  drop?: boolean;
  // biome-ignore lint/style/useNamingConvention: api schema
  target_columns: string[];
  // ui-only
  validator_type: ValidatorType;
  // biome-ignore lint/style/useNamingConvention: api schema
  code_lang: ValidatorCodeLang;
  // ui-only (used for OXC validators)
  oxc_validation_mode: OxcValidationMode;
  // ui-only (used for OXC validators)
  oxc_code_shape: OxcCodeShape;
  // ui ergonomics (serialized to int in payload)
  batch_size: string;
};

export type MarkdownNoteConfig = {
  id: string;
  kind: "markdown_note";
  name: string;
  markdown: string;
  // ui-only
  note_color?: string;
  // ui-only (0-100 as string for slider/input ergonomics)
  note_opacity?: string;
};

export type SeedConfig = {
  id: string;
  kind: "seed";
  // ui-only
  advancedOpen?: boolean;
  name: string;
  drop?: boolean;
  // ui-only: explicit per-column drop for structured seed sources (hf/local)
  seed_drop_columns?: string[];
  seed_source_type: SeedSourceType;
  // ui-only (serialized in seed_config)
  hf_repo_id: string;
  hf_subset?: string;
  hf_split?: string;
  hf_path: string;
  hf_token?: string;
  hf_endpoint?: string;
  local_file_name?: string;
  unstructured_file_name?: string;
  // ui-only
  seed_preview_rows?: Record<string, unknown>[];
  // ui-only (string for input ergonomics)
  unstructured_chunk_size?: string;
  // ui-only (string for input ergonomics)
  unstructured_chunk_overlap?: string;
  seed_splits?: string[];
  // ui-only
  // biome-ignore lint/style/useNamingConvention: ui schema
  seed_globs_by_split?: Record<string, string>;
  seed_columns?: string[];
  sampling_strategy: SeedSamplingStrategy;
  selection_type: SeedSelectionType;
  selection_start?: string;
  selection_end?: string;
  selection_index?: string;
  selection_num_partitions?: string;
};

export type SchemaTransformProcessorConfig = {
  id: string;
  // biome-ignore lint/style/useNamingConvention: api schema
  processor_type: "schema_transform";
  name: string;
  template: string;
};

export type RecipeProcessorConfig = SchemaTransformProcessorConfig;

export type NodeConfig =
  | SamplerConfig
  | LlmConfig
  | ValidatorConfig
  | ExpressionConfig
  | MarkdownNoteConfig
  | SeedConfig
  | ModelProviderConfig
  | ModelConfig
  | ToolProfileConfig;


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/config-factories.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  ExpressionConfig,
  LlmConfig,
  LlmType,
  MarkdownNoteConfig,
  ModelConfig,
  ModelProviderConfig,
  NodeConfig,
  SeedConfig,
  SeedSourceType,
  SamplerConfig,
  SamplerType,
  ToolProfileConfig,
  ValidatorCodeLang,
  ValidatorType,
  ValidatorConfig,
} from "../types";
import { nextName } from "./naming";

export function makeSamplerConfig(
  id: string,
  samplerType: SamplerType,
  existing: NodeConfig[],
): SamplerConfig {
  const namePrefix =
    samplerType === "subcategory" ? "subcategory" : samplerType;
  const name = nextName(existing, namePrefix);
  if (samplerType === "category") {
    return {
      id,
      kind: "sampler",
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "category",
      name,
      drop: false,
      values: [],
      weights: [],
    };
  }
  if (samplerType === "subcategory") {
    return {
      id,
      kind: "sampler",
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "subcategory",
      name,
      drop: false,
      // biome-ignore lint/style/useNamingConvention: api schema
      subcategory_parent: "",
      // biome-ignore lint/style/useNamingConvention: api schema
      subcategory_mapping: {},
    };
  }
  if (samplerType === "uniform") {
    return {
      id,
      kind: "sampler",
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "uniform",
      name,
      drop: false,
      low: "0",
      high: "1",
    };
  }
  if (samplerType === "gaussian") {
    return {
      id,
      kind: "sampler",
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "gaussian",
      name,
      drop: false,
      mean: "0",
      std: "1",
    };
  }
  if (samplerType === "bernoulli") {
    return {
      id,
      kind: "sampler",
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "bernoulli",
      name,
      drop: false,
      p: "0.5",
    };
  }
  if (samplerType === "datetime") {
    return {
      id,
      kind: "sampler",
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "datetime",
      name,
      drop: false,
      // biome-ignore lint/style/useNamingConvention: api schema
      datetime_start: "",
      // biome-ignore lint/style/useNamingConvention: api schema
      datetime_end: "",
      // biome-ignore lint/style/useNamingConvention: api schema
      datetime_unit: "day",
    };
  }
  if (samplerType === "timedelta") {
    return {
      id,
      kind: "sampler",
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "timedelta",
      name,
      drop: false,
      // biome-ignore lint/style/useNamingConvention: api schema
      dt_min: "0",
      // biome-ignore lint/style/useNamingConvention: api schema
      dt_max: "1",
      // biome-ignore lint/style/useNamingConvention: api schema
      reference_column_name: "",
      // biome-ignore lint/style/useNamingConvention: api schema
      timedelta_unit: "D",
    };
  }
  if (samplerType === "uuid") {
    return {
      id,
      kind: "sampler",
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "uuid",
      name,
      drop: false,
      // biome-ignore lint/style/useNamingConvention: api schema
      uuid_format: "",
    };
  }
  if (samplerType === "person" || samplerType === "person_from_faker") {
    return {
      id,
      kind: "sampler",
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "person_from_faker",
      name,
      drop: false,
      // biome-ignore lint/style/useNamingConvention: api schema
      person_locale: "",
      // biome-ignore lint/style/useNamingConvention: api schema
      person_sex: "",
      // biome-ignore lint/style/useNamingConvention: api schema
      person_age_range: "",
      // biome-ignore lint/style/useNamingConvention: api schema
      person_city: "",
    };
  }
  return {
    id,
    kind: "sampler",
    // biome-ignore lint/style/useNamingConvention: api schema
    sampler_type: "person_from_faker",
    name,
    drop: false,
    // biome-ignore lint/style/useNamingConvention: api schema
    person_locale: "",
    // biome-ignore lint/style/useNamingConvention: api schema
    person_sex: "",
    // biome-ignore lint/style/useNamingConvention: api schema
    person_age_range: "",
    // biome-ignore lint/style/useNamingConvention: api schema
    person_city: "",
  };
}

export function makeLlmConfig(
  id: string,
  llmType: LlmType,
  existing: NodeConfig[],
): LlmConfig {
  let namePrefix = "llm_text";
  if (llmType === "structured") {
    namePrefix = "llm_structured";
  } else if (llmType === "code") {
    namePrefix = "llm_code";
  } else if (llmType === "judge") {
    namePrefix = "llm_judge";
  }
  const name = nextName(existing, namePrefix);
  return {
    id,
    kind: "llm",
    // biome-ignore lint/style/useNamingConvention: api schema
    llm_type: llmType,
    name,
    drop: false,
    // biome-ignore lint/style/useNamingConvention: api schema
    model_alias: "",
    prompt:
      llmType === "judge"
        ? "Evaluate the content using the scoring criteria below."
        : "Write a response.",
    // biome-ignore lint/style/useNamingConvention: api schema
    system_prompt: "",
    // biome-ignore lint/style/useNamingConvention: api schema
    code_lang: llmType === "code" ? "python" : undefined,
    // biome-ignore lint/style/useNamingConvention: api schema
    output_format:
      llmType === "structured" ? '{\n  "field": "string"\n}' : undefined,
    // biome-ignore lint/style/useNamingConvention: api schema
    tool_alias: "",
    // biome-ignore lint/style/useNamingConvention: ui schema
    image_context: {
      enabled: false,
      // biome-ignore lint/style/useNamingConvention: api schema
      column_name: "",
    },
    // biome-ignore lint/style/useNamingConvention: api schema
    with_trace: "none",
    // biome-ignore lint/style/useNamingConvention: api schema
    extract_reasoning_content: false,
    scores: llmType === "judge" ? [] : undefined,
  };
}

export function makeModelProviderConfig(
  id: string,
  existing: NodeConfig[],
): ModelProviderConfig {
  return {
    id,
    kind: "model_provider",
    name: nextName(existing, "provider"),
    endpoint: "",
    // biome-ignore lint/style/useNamingConvention: api schema
    provider_type: "openai",
    // biome-ignore lint/style/useNamingConvention: api schema
    api_key_env: "",
    // biome-ignore lint/style/useNamingConvention: api schema
    api_key: "",
    // biome-ignore lint/style/useNamingConvention: api schema
    extra_headers: "",
    // biome-ignore lint/style/useNamingConvention: api schema
    extra_body: "",
  };
}

export function makeModelConfig(
  id: string,
  existing: NodeConfig[],
): ModelConfig {
  return {
    id,
    kind: "model_config",
    name: nextName(existing, "model"),
    model: "",
    provider: "",
    // biome-ignore lint/style/useNamingConvention: api schema
    inference_temperature: "0.7",
    // biome-ignore lint/style/useNamingConvention: api schema
    inference_max_tokens: "",
    // biome-ignore lint/style/useNamingConvention: api schema
    inference_top_p: "",
    // biome-ignore lint/style/useNamingConvention: api schema
    inference_timeout: "",
    // biome-ignore lint/style/useNamingConvention: api schema
    inference_extra_body: "",
    // biome-ignore lint/style/useNamingConvention: api schema
    skip_health_check: false,
  };
}

export function makeToolProfileConfig(
  id: string,
  existing: NodeConfig[],
): ToolProfileConfig {
  return {
    id,
    kind: "tool_config",
    name: nextName(existing, "tools"),
    // biome-ignore lint/style/useNamingConvention: ui schema
    mcp_providers: [],
    // biome-ignore lint/style/useNamingConvention: ui schema
    fetched_tools_by_provider: {},
    // biome-ignore lint/style/useNamingConvention: api schema
    allow_tools: [],
    // biome-ignore lint/style/useNamingConvention: api schema
    max_tool_call_turns: "5",
    // biome-ignore lint/style/useNamingConvention: api schema
    timeout_sec: "",
  };
}

export function makeExpressionConfig(
  id: string,
  existing: NodeConfig[],
): ExpressionConfig {
  return {
    id,
    kind: "expression",
    name: nextName(existing, "expr"),
    drop: false,
    expr: "",
    dtype: "str",
  };
}

export function makeValidatorConfig(
  id: string,
  validatorType: ValidatorType,
  codeLang: ValidatorCodeLang,
  existing: NodeConfig[],
): ValidatorConfig {
  const isSql = validatorType === "code" && codeLang.startsWith("sql:");
  const isOxc = validatorType === "oxc";
  let namePrefix = "validator_python";
  if (isSql) {
    namePrefix = "validator_sql";
  } else if (isOxc) {
    namePrefix = "validator_oxc";
  }
  return {
    id,
    kind: "validator",
    name: nextName(existing, namePrefix),
    drop: false,
    // biome-ignore lint/style/useNamingConvention: api schema
    target_columns: [],
    validator_type: validatorType,
    // biome-ignore lint/style/useNamingConvention: api schema
    code_lang: codeLang,
    oxc_validation_mode: "syntax",
    oxc_code_shape: "auto",
    batch_size: "10",
  };
}

export function makeMarkdownNoteConfig(
  id: string,
  existing: NodeConfig[],
): MarkdownNoteConfig {
  return {
    id,
    kind: "markdown_note",
    name: nextName(existing, "note"),
    markdown: "## Note\n\nAdd markdown here.",
    note_color: "#FDE68A",
    note_opacity: "35",
  };
}

export function makeSeedConfig(
  id: string,
  existing: NodeConfig[],
  seedSourceType: SeedSourceType = "hf",
): SeedConfig {
  return {
    id,
    kind: "seed",
    name: nextName(existing, "seed"),
    drop: false,
    seed_drop_columns: [],
    seed_source_type: seedSourceType,
    hf_repo_id: "",
    hf_subset: "",
    hf_split: "",
    hf_path: "",
    hf_token: "",
    hf_endpoint: "https://huggingface.co",
    local_file_name: "",
    unstructured_file_name: "",
    seed_preview_rows: [],
    unstructured_chunk_size: "1200",
    unstructured_chunk_overlap: "200",
    seed_splits: [],
    seed_globs_by_split: {},
    seed_columns: [],
    sampling_strategy: "ordered",
    selection_type: "none",
    selection_start: "0",
    selection_end: "10",
    selection_index: "0",
    selection_num_partitions: "1",
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/config-labels.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  ExpressionDtype,
  LlmType,
  SamplerType,
} from "../types";

const SAMPLER_LABELS: Record<SamplerType, string> = {
  category: "Category",
  subcategory: "Subcategory",
  uniform: "Random number",
  gaussian: "Bell-curve number",
  bernoulli: "Yes/no value",
  datetime: "Date and time",
  timedelta: "Time offset",
  uuid: "Unique ID",
  person: "Synthetic person",
  person_from_faker: "Synthetic person",
};

const LLM_LABELS: Record<LlmType, string> = {
  text: "AI text",
  structured: "AI structured data",
  code: "AI code",
  judge: "AI scorer",
};

const EXPRESSION_LABELS: Record<ExpressionDtype, string> = {
  str: "Text",
  int: "Int",
  float: "Float",
  bool: "Bool",
};

export function labelForSampler(type: SamplerType): string {
  return SAMPLER_LABELS[type] ?? "Generated field";
}

export function labelForLlm(type: LlmType): string {
  return LLM_LABELS[type] ?? "AI";
}

export function labelForExpression(type: ExpressionDtype): string {
  return EXPRESSION_LABELS[type] ?? "Formula";
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/config-type-guards.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  ExpressionConfig,
  LlmConfig,
  NodeConfig,
  SamplerConfig,
  ValidatorConfig,
} from "../types";

export function isSamplerConfig(
  config: NodeConfig | null | undefined,
): config is SamplerConfig {
  return Boolean(config && config.kind === "sampler");
}

export function isCategoryConfig(
  config: NodeConfig | null | undefined,
): config is SamplerConfig {
  return Boolean(
    config && config.kind === "sampler" && config.sampler_type === "category",
  );
}

export function isSubcategoryConfig(
  config: NodeConfig | null | undefined,
): config is SamplerConfig {
  return Boolean(
    config &&
      config.kind === "sampler" &&
      config.sampler_type === "subcategory",
  );
}

export function isLlmConfig(
  config: NodeConfig | null | undefined,
): config is LlmConfig {
  return Boolean(config && config.kind === "llm");
}

export function isExpressionConfig(
  config: NodeConfig | null | undefined,
): config is ExpressionConfig {
  return Boolean(config && config.kind === "expression");
}

export function isValidatorConfig(
  config: NodeConfig | null | undefined,
): config is ValidatorConfig {
  return Boolean(config && config.kind === "validator");
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/graph/derive-display-graph.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { Edge, Node, XYPosition } from "@xyflow/react";
import type { RecipeGraphAuxNodeData } from "../../components/recipe-graph-aux-node";
import { DEFAULT_NODE_HEIGHT, DEFAULT_NODE_WIDTH } from "../../constants";
import type { RecipeNode, LayoutDirection, NodeConfig } from "../../types";
import {
  getDefaultDataSourceHandle,
  getDefaultDataTargetHandle,
  getDefaultSemanticSourceHandle,
  getDefaultSemanticTargetHandle,
  HANDLE_IDS,
  isDataSourceHandle,
  isDataTargetHandle,
  isSemanticSourceHandle,
  isSemanticTargetHandle,
  normalizeRecipeHandleId,
} from "../handles";
import { readNodeHeight, readNodeWidth } from "../rf-node-dimensions";
import { isSemanticRelation } from "./relations";

type DisplayGraphInput = {
  nodes: RecipeNode[];
  edges: Edge[];
  configs: Record<string, NodeConfig>;
  layoutDirection: LayoutDirection;
  auxNodePositions: Record<string, XYPosition>;
  llmAuxVisibility: Record<string, boolean>;
  runtime?: {
    runningNodeId: string | null;
    doneNodeIds: Set<string>;
    activeEdgeIds: Set<string>;
    executionLocked: boolean;
  };
};

export type DisplayGraph = {
  nodes: Array<Node<RecipeNode["data"] | RecipeGraphAuxNodeData>>;
  edges: Edge[];
};

function isAuxEdge(edge: Edge): boolean {
  return edge.source.startsWith("aux-") || edge.target.startsWith("aux-");
}

function normalizeEdge(
  edge: Edge,
  configs: Record<string, NodeConfig>,
  layoutDirection: LayoutDirection,
  activeEdgeIds: Set<string>,
  runningNodeId: string | null,
  doneNodeIds: Set<string>,
): Edge {
  const isActiveByRuntimeTarget =
    Boolean(runningNodeId) &&
    edge.target === runningNodeId &&
    !isAuxEdge(edge);
  const isActiveEdge = activeEdgeIds.has(edge.id) || isActiveByRuntimeTarget;
  const isAux = isAuxEdge(edge);
  if (isAux) {
    return {
      ...edge,
      type: "canvas",
      data: { ...(edge.data ?? {}), path: "smoothstep", active: isActiveEdge },
      animated: isActiveEdge,
    };
  }

  const isActiveReversedRuntimeEdge =
    Boolean(runningNodeId) &&
    isActiveEdge &&
    edge.source === runningNodeId &&
    doneNodeIds.has(edge.target);
  const displayEdge = isActiveReversedRuntimeEdge
    ? {
        ...edge,
        source: edge.target,
        target: edge.source,
        sourceHandle: getDefaultDataSourceHandle(layoutDirection),
        targetHandle: getDefaultDataTargetHandle(layoutDirection),
      }
    : edge;

  const source = configs[displayEdge.source];
  const target = configs[displayEdge.target];
  const semantic =
    displayEdge.type === "semantic" ||
    (Boolean(source && target) && isSemanticRelation(source, target));
  const sourceHandleNormalized = normalizeRecipeHandleId(displayEdge.sourceHandle);
  const targetHandleNormalized = normalizeRecipeHandleId(displayEdge.targetHandle);
  const semanticSourceDefault =
    source?.kind === "llm"
      ? getDefaultDataSourceHandle(layoutDirection)
      : getDefaultSemanticSourceHandle(layoutDirection);
  const semanticTargetDefault =
    target?.kind === "llm"
      ? getDefaultDataTargetHandle(layoutDirection)
      : getDefaultSemanticTargetHandle(layoutDirection);
  let sourceHandle = getDefaultDataSourceHandle(layoutDirection);
  let targetHandle = getDefaultDataTargetHandle(layoutDirection);

  if (semantic) {
    sourceHandle =
      isSemanticSourceHandle(sourceHandleNormalized) ||
      isDataSourceHandle(sourceHandleNormalized)
        ? sourceHandleNormalized ?? semanticSourceDefault
        : semanticSourceDefault;
    targetHandle =
      isSemanticTargetHandle(targetHandleNormalized) ||
      isDataTargetHandle(targetHandleNormalized)
        ? targetHandleNormalized ?? semanticTargetDefault
        : semanticTargetDefault;
    // LLM nodes only expose data lane handles; coerce legacy semantic handles.
    if (source?.kind === "llm" && isSemanticSourceHandle(sourceHandle)) {
      sourceHandle = semanticSourceDefault;
    }
    if (target?.kind === "llm" && isSemanticTargetHandle(targetHandle)) {
      targetHandle = semanticTargetDefault;
    }
  } else {
    sourceHandle = isDataSourceHandle(sourceHandleNormalized)
      ? sourceHandleNormalized ?? getDefaultDataSourceHandle(layoutDirection)
      : getDefaultDataSourceHandle(layoutDirection);
    targetHandle = isDataTargetHandle(targetHandleNormalized)
      ? targetHandleNormalized ?? getDefaultDataTargetHandle(layoutDirection)
      : getDefaultDataTargetHandle(layoutDirection);
  }

  return {
    ...displayEdge,
    type: semantic ? "semantic" : "canvas",
    data: semantic
      ? { ...(displayEdge.data ?? {}), active: isActiveEdge }
      : { ...(displayEdge.data ?? {}), path: "smoothstep", active: isActiveEdge },
    sourceHandle,
    targetHandle,
    animated: isActiveEdge,
  };
}

type AuxNodeItem = {
  key: string;
  data: RecipeGraphAuxNodeData;
};

type Rect = {
  x: number;
  y: number;
  width: number;
  height: number;
};

function toRect(
  position: XYPosition,
  width: number,
  height: number,
): Rect {
  return {
    x: position.x,
    y: position.y,
    width,
    height,
  };
}

function intersects(a: Rect, b: Rect, pad = 18): boolean {
  return !(
    a.x + a.width + pad <= b.x ||
    b.x + b.width + pad <= a.x ||
    a.y + a.height + pad <= b.y ||
    b.y + b.height + pad <= a.y
  );
}

function findNonOverlappingPosition(
  preferred: XYPosition,
  width: number,
  height: number,
  occupied: Rect[],
): XYPosition {
  const step = 24;
  for (let ring = 0; ring <= 10; ring += 1) {
    for (let dx = -ring; dx <= ring; dx += 1) {
      for (let dy = -ring; dy <= ring; dy += 1) {
        if (ring > 0 && Math.max(Math.abs(dx), Math.abs(dy)) !== ring) {
          continue;
        }
        const candidate = {
          x: preferred.x + dx * step,
          y: preferred.y + dy * step,
        };
        const rect = toRect(candidate, width, height);
        if (!occupied.some((other) => intersects(rect, other))) {
          return candidate;
        }
      }
    }
  }
  return preferred;
}

type HandleSide = "left" | "right" | "top" | "bottom";

const SIDE_TO_TARGET_HANDLE: Record<HandleSide, string> = {
  left: HANDLE_IDS.dataIn,
  right: HANDLE_IDS.dataInRight,
  top: HANDLE_IDS.dataInTop,
  bottom: HANDLE_IDS.dataInBottom,
};

function getTargetSide(
  handleId: string | null | undefined,
  direction: LayoutDirection,
): HandleSide {
  const normalized = normalizeRecipeHandleId(handleId);
  if (!normalized) {
    return direction === "TB" ? "top" : "left";
  }
  if (
    normalized === HANDLE_IDS.dataInRight ||
    normalized === HANDLE_IDS.semanticInRight
  ) {
    return "right";
  }
  if (
    normalized === HANDLE_IDS.dataInBottom ||
    normalized === HANDLE_IDS.semanticInBottom
  ) {
    return "bottom";
  }
  if (
    normalized === HANDLE_IDS.dataInTop ||
    normalized === HANDLE_IDS.semanticInTop
  ) {
    return "top";
  }
  return "left";
}

function getSourceSide(
  handleId: string | null | undefined,
  direction: LayoutDirection,
): HandleSide {
  const normalized = normalizeRecipeHandleId(handleId);
  if (!normalized) {
    return direction === "TB" ? "bottom" : "right";
  }
  if (
    normalized === HANDLE_IDS.dataOutLeft ||
    normalized === HANDLE_IDS.semanticOutLeft
  ) {
    return "left";
  }
  if (
    normalized === HANDLE_IDS.dataOutTop ||
    normalized === HANDLE_IDS.semanticOutTop
  ) {
    return "top";
  }
  if (
    normalized === HANDLE_IDS.dataOutBottom ||
    normalized === HANDLE_IDS.semanticOutBottom
  ) {
    return "bottom";
  }
  return "right";
}

function pickAuxTargetHandle(
  llmId: string,
  direction: LayoutDirection,
  edges: Edge[],
): string {
  const occupied = new Set<HandleSide>();
  for (const edge of edges) {
    if (isAuxEdge(edge)) {
      continue;
    }
    if (edge.target === llmId) {
      occupied.add(getTargetSide(edge.targetHandle, direction));
    }
    if (edge.source === llmId) {
      occupied.add(getSourceSide(edge.sourceHandle, direction));
    }
  }

  const priority: HandleSide[] =
    direction === "LR"
      ? ["left", "right", "bottom", "top"]
      : ["top", "bottom", "right", "left"];
  for (const side of priority) {
    if (!occupied.has(side)) {
      return SIDE_TO_TARGET_HANDLE[side];
    }
  }

  const fallback: HandleSide = direction === "LR" ? "bottom" : "right";
  return SIDE_TO_TARGET_HANDLE[fallback];
}

function getHandleSideFromTargetHandle(targetHandle: string): HandleSide {
  if (targetHandle === HANDLE_IDS.dataInRight) {
    return "right";
  }
  if (targetHandle === HANDLE_IDS.dataInTop) {
    return "top";
  }
  if (targetHandle === HANDLE_IDS.dataInBottom) {
    return "bottom";
  }
  return "left";
}

function pickAuxSourceHandle(
  auxPosition: XYPosition,
  auxWidth: number,
  auxHeight: number,
  llmPosition: XYPosition,
  llmWidth: number,
  llmHeight: number,
): string {
  const auxCenter = {
    x: auxPosition.x + auxWidth / 2,
    y: auxPosition.y + auxHeight / 2,
  };
  const llmCenter = {
    x: llmPosition.x + llmWidth / 2,
    y: llmPosition.y + llmHeight / 2,
  };
  const dx = llmCenter.x - auxCenter.x;
  const dy = llmCenter.y - auxCenter.y;

  if (Math.abs(dx) >= Math.abs(dy)) {
    return dx >= 0 ? HANDLE_IDS.llmInputOutRight : HANDLE_IDS.llmInputOutLeft;
  }
  return dy >= 0 ? HANDLE_IDS.llmInputOutBottom : HANDLE_IDS.llmInputOutTop;
}

type AppendAuxNodeAndEdgeInput = {
  auxNodes: Node<RecipeGraphAuxNodeData>[];
  auxEdges: Edge[];
  entry: {
    item: AuxNodeItem;
    auxId: string;
    width: number;
    height: number;
  };
  position: XYPosition;
  parentNode: Node<RecipeNode["data"] | RecipeGraphAuxNodeData>;
  parentWidth: number;
  parentHeight: number;
  auxTargetHandle: string;
};

function appendAuxNodeAndEdge({
  auxNodes,
  auxEdges,
  entry,
  position,
  parentNode,
  parentWidth,
  parentHeight,
  auxTargetHandle,
}: AppendAuxNodeAndEdgeInput): void {
  auxNodes.push({
    id: entry.auxId,
    type: "aux",
    data: entry.item.data,
    position,
    width: entry.width,
    height: entry.height,
    style: {
      width: entry.width,
      height: entry.height,
    },
    draggable: true,
    selectable: true,
    focusable: true,
    connectable: false,
  });

  auxEdges.push({
    id: `e-${entry.auxId}-${parentNode.id}`,
    source: entry.auxId,
    sourceHandle: pickAuxSourceHandle(
      position,
      entry.width,
      entry.height,
      parentNode.position,
      parentWidth,
      parentHeight,
    ),
    target: parentNode.id,
    targetHandle: auxTargetHandle,
    type: "canvas",
    data: { path: "auto" },
    selectable: false,
    focusable: false,
  });
}

export function deriveDisplayGraph({
  nodes,
  edges,
  configs,
  layoutDirection,
  auxNodePositions,
  llmAuxVisibility,
  runtime,
}: DisplayGraphInput): DisplayGraph {
  const executionLocked = runtime?.executionLocked ?? false;
  const runningNodeId = runtime?.runningNodeId ?? null;
  const doneNodeIds = runtime?.doneNodeIds ?? new Set<string>();
  const activeEdgeIds = runtime?.activeEdgeIds ?? new Set<string>();
  const displayNodes = nodes.map((node) => {
    const hasWidth =
      typeof node.width === "number" ||
      typeof node.style?.width === "number" ||
      (typeof node.style?.width === "string" &&
        Number.isFinite(Number.parseFloat(node.style.width)));
    const runtimeState: "idle" | "running" | "done" =
      node.id === runningNodeId
        ? "running"
        : doneNodeIds.has(node.id)
          ? "done"
          : "idle";
    if (hasWidth) {
      return {
        ...node,
        data: {
          ...node.data,
          runtimeState,
          executionLocked,
        },
      };
    }
    return {
      ...node,
      data: {
        ...node.data,
        runtimeState,
        executionLocked,
      },
      style: { ...node.style, width: DEFAULT_NODE_WIDTH },
    };
  });
  const auxNodes: Node<RecipeGraphAuxNodeData>[] = [];
  const auxEdges: Edge[] = [];
  const occupiedRects: Rect[] = displayNodes.map((node) =>
    toRect(
      node.position,
      readNodeWidth(node) ?? DEFAULT_NODE_WIDTH,
      readNodeHeight(node) ?? DEFAULT_NODE_HEIGHT,
    ),
  );

  for (const node of displayNodes) {
    const config = configs[node.id];
    if (!(config && config.kind === "llm")) {
      continue;
    }
    if (!llmAuxVisibility[config.id]) {
      continue;
    }
    const llmDirection = node.data.layoutDirection ?? layoutDirection;
    const auxTargetHandle = pickAuxTargetHandle(node.id, llmDirection, edges);
    const auxTargetSide = getHandleSideFromTargetHandle(auxTargetHandle);
    const items: AuxNodeItem[] = [];

    if (config.system_prompt.trim()) {
      items.push({
        key: "system",
        data: {
          kind: "llm-prompt-input",
          llmId: config.id,
          field: "system_prompt",
          title: "System Prompt",
          executionLocked,
        },
      });
    }

    if (config.prompt.trim()) {
      items.push({
        key: "prompt",
        data: {
          kind: "llm-prompt-input",
          llmId: config.id,
          field: "prompt",
          title: "Prompt",
          executionLocked,
        },
      });
    }

    if (config.llm_type === "judge") {
      (config.scores ?? []).forEach((_score, scoreIndex) => {
        items.push({
          key: `score-${scoreIndex}`,
          data: {
            kind: "llm-judge-score",
            llmId: config.id,
            scoreIndex,
            executionLocked,
          },
        });
      });
    }

    if (items.length === 0) {
      continue;
    }

    const parentWidth = readNodeWidth(node) ?? DEFAULT_NODE_WIDTH;
    const parentHeight = readNodeHeight(node) ?? DEFAULT_NODE_HEIGHT;
    const itemsWithLayout = items.map((item) => {
      const auxId = `aux-${node.id}-${item.key}`;
      return {
        item,
        auxId,
        width: DEFAULT_NODE_WIDTH,
        height: DEFAULT_NODE_HEIGHT,
      };
    });

    const gap = 24;
    const sideOffset = 48;
    const stackHorizontal =
      auxTargetSide === "top" || auxTargetSide === "bottom";

    if (stackHorizontal) {
      const totalWidth =
        itemsWithLayout.reduce((sum, entry) => sum + entry.width, 0) +
        (itemsWithLayout.length - 1) * gap;
      const startX = node.position.x + (parentWidth - totalWidth) / 2;
      let xCursor = startX;

      for (const entry of itemsWithLayout) {
        const preferredPosition = {
          x: xCursor,
          y:
            auxTargetSide === "top"
              ? node.position.y - entry.height - sideOffset
              : node.position.y + parentHeight + sideOffset,
        };
        const defaultPosition = findNonOverlappingPosition(
          preferredPosition,
          entry.width,
          entry.height,
          occupiedRects,
        );
        const position = auxNodePositions[entry.auxId] ?? defaultPosition;
        xCursor += entry.width + gap;

        occupiedRects.push(toRect(position, entry.width, entry.height));
        appendAuxNodeAndEdge({
          auxNodes,
          auxEdges,
          entry,
          position,
          parentNode: node,
          parentWidth,
          parentHeight,
          auxTargetHandle,
        });
      }
      continue;
    }

    const totalHeight =
      itemsWithLayout.reduce((sum, entry) => sum + entry.height, 0) +
      (itemsWithLayout.length - 1) * gap;
    const maxWidth = Math.max(...itemsWithLayout.map((entry) => entry.width));
    const baseX =
      auxTargetSide === "right"
        ? node.position.x + parentWidth + sideOffset
        : node.position.x - maxWidth - sideOffset;
    let yCursor = node.position.y + (parentHeight - totalHeight) / 2;

    for (const entry of itemsWithLayout) {
      const preferredPosition = {
        x: baseX + (maxWidth - entry.width),
        y: yCursor,
      };
      const defaultPosition = findNonOverlappingPosition(
        preferredPosition,
        entry.width,
        entry.height,
        occupiedRects,
      );
      const position = auxNodePositions[entry.auxId] ?? defaultPosition;
      yCursor += entry.height + gap;

      occupiedRects.push(toRect(position, entry.width, entry.height));
      appendAuxNodeAndEdge({
        auxNodes,
        auxEdges,
        entry,
        position,
        parentNode: node,
        parentWidth,
        parentHeight,
        auxTargetHandle,
      });
    }
  }

  return {
    nodes: [...displayNodes, ...auxNodes],
    edges: [...edges, ...auxEdges].map((edge) =>
      normalizeEdge(
        edge,
        configs,
        layoutDirection,
        activeEdgeIds,
        runningNodeId,
        doneNodeIds,
      ),
    ),
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/graph/fit-view.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { Node } from "@xyflow/react";

function isMarkdownNoteNode(node: Node): boolean {
  if (node.type !== "builder") {
    return false;
  }
  if (!node.data || typeof node.data !== "object") {
    return false;
  }
  return (node.data as { kind?: string }).kind === "note";
}

export function getFitNodeIdsIgnoringNotes(nodes: Node[]): Array<{ id: string }> {
  const nodesWithoutNotes = nodes.filter((node) => !isMarkdownNoteNode(node));
  const targetNodes = nodesWithoutNotes.length > 0 ? nodesWithoutNotes : nodes;
  return targetNodes.map((node) => ({ id: node.id }));
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/graph/recipe-graph-connection.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { type Connection, type Edge, addEdge } from "@xyflow/react";
import type { LayoutDirection, NodeConfig, SamplerConfig } from "../../types";
import {
  HANDLE_IDS,
  isDataSourceHandle,
  isDataTargetHandle,
  isSemanticSourceHandle,
  isSemanticTargetHandle,
  normalizeRecipeHandleId,
} from "../handles";
import { isSemanticRelation } from "./relations";
import {
  isCategoryConfig,
  isExpressionConfig,
  isSubcategoryConfig,
} from "../index";
import {
  VALIDATOR_OXC_CODE_LANGS,
  VALIDATOR_SQL_CODE_LANGS,
} from "../validators/code-lang";

function buildTemplateWithRef(template: string, ref: string): string {
  if (template.includes(ref)) {
    return template;
  }
  if (template.trim()) {
    return `${template}\n${ref}`;
  }
  return ref;
}

function syncSubcategoryMapping(
  subcategory: SamplerConfig,
  parent: NodeConfig,
): SamplerConfig {
  if (!isCategoryConfig(parent)) {
    return {
      ...subcategory,
      // biome-ignore lint/style/useNamingConvention: api schema
      subcategory_parent: parent.name,
    };
  }
  const nextMapping: Record<string, string[]> = {
    ...(subcategory.subcategory_mapping ?? {}),
  };
  for (const value of parent.values ?? []) {
    if (!nextMapping[value]) {
      nextMapping[value] = [];
    }
  }
  return {
    ...subcategory,
    // biome-ignore lint/style/useNamingConvention: api schema
    subcategory_parent: parent.name,
    // biome-ignore lint/style/useNamingConvention: api schema
    subcategory_mapping: nextMapping,
  };
}

function isModelInfraNode(config: NodeConfig): boolean {
  return (
    config.kind === "model_provider" ||
    config.kind === "model_config" ||
    config.kind === "tool_config"
  );
}

function isSemanticLane(connection: Connection): boolean {
  return (
    (isSemanticSourceHandle(connection.sourceHandle) ||
      isDataSourceHandle(connection.sourceHandle)) &&
    (isSemanticTargetHandle(connection.targetHandle) ||
      isDataTargetHandle(connection.targetHandle))
  );
}

function isDataLane(connection: Connection): boolean {
  return (
    isDataSourceHandle(connection.sourceHandle) &&
    isDataTargetHandle(connection.targetHandle)
  );
}

type SingleRefRelation =
  | "provider"
  | "model_alias"
  | "tool_alias"
  | "reference_column_name"
  | "subcategory_parent"
  | "validator_target_columns";

function getSingleRefRelation(
  source: NodeConfig,
  target: NodeConfig,
): SingleRefRelation | null {
  if (source.kind === "model_provider" && target.kind === "model_config") {
    return "provider";
  }
  if (source.kind === "model_config" && target.kind === "llm") {
    return "model_alias";
  }
  if (source.kind === "tool_config" && target.kind === "llm") {
    return "tool_alias";
  }
  if (
    source.kind === "sampler" &&
    source.sampler_type === "datetime" &&
    target.kind === "sampler" &&
    target.sampler_type === "timedelta"
  ) {
    return "reference_column_name";
  }
  if (isCategoryConfig(source) && isSubcategoryConfig(target)) {
    return "subcategory_parent";
  }
  if (
    source.kind === "llm" &&
    source.llm_type === "code" &&
    target.kind === "validator"
  ) {
    return "validator_target_columns";
  }
  return null;
}

function isCompetingIncomingEdge(
  edge: Edge,
  targetId: string,
  relation: SingleRefRelation,
  configs: Record<string, NodeConfig>,
): boolean {
  if (edge.target !== targetId) {
    return false;
  }
  const source = configs[edge.source];
  if (!source) {
    return false;
  }
  if (relation === "provider") {
    return source.kind === "model_provider";
  }
  if (relation === "model_alias") {
    return source.kind === "model_config";
  }
  if (relation === "tool_alias") {
    return source.kind === "tool_config";
  }
  if (relation === "subcategory_parent") {
    return isCategoryConfig(source);
  }
  if (relation === "validator_target_columns") {
    return source.kind === "llm" && source.llm_type === "code";
  }
  return source.kind === "sampler" && source.sampler_type === "datetime";
}

function isModelSemanticRelation(source: NodeConfig, target: NodeConfig): boolean {
  return (
    (source.kind === "model_provider" && target.kind === "model_config") ||
    (source.kind === "model_config" && target.kind === "llm") ||
    (source.kind === "tool_config" && target.kind === "llm")
  );
}

function canApplyCodeLangToValidator(
  validator: Extract<NodeConfig, { kind: "validator" }>,
  codeLang: string,
): boolean {
  const normalized = codeLang.trim();
  if (!normalized) {
    return false;
  }
  if (validator.validator_type === "oxc") {
    return VALIDATOR_OXC_CODE_LANGS.includes(
      normalized as typeof validator.code_lang,
    );
  }
  if (normalized === "python") {
    return true;
  }
  return VALIDATOR_SQL_CODE_LANGS.includes(normalized as typeof validator.code_lang);
}

function countHandleUsage(
  edges: Edge[],
  nodeId: string,
  handleId: string,
  lane: "source" | "target",
): number {
  return edges.reduce((count, edge) => {
    const edgeNodeId = lane === "source" ? edge.source : edge.target;
    if (edgeNodeId !== nodeId) {
      return count;
    }
    const edgeHandleId =
      lane === "source"
        ? normalizeRecipeHandleId(edge.sourceHandle)
        : normalizeRecipeHandleId(edge.targetHandle);
    return edgeHandleId === handleId ? count + 1 : count;
  }, 0);
}

function pickLeastUsedHandle(
  candidates: string[],
  requested: string | null,
  usageFor: (handleId: string) => number,
): string {
  let bestHandle = candidates[0];
  let bestCount = Number.POSITIVE_INFINITY;
  const requestedNormalized = requested
    ? normalizeRecipeHandleId(requested)
    : null;

  for (const candidate of candidates) {
    const usage = usageFor(candidate);
    if (usage < bestCount) {
      bestHandle = candidate;
      bestCount = usage;
      continue;
    }
    if (usage === bestCount && requestedNormalized === candidate) {
      bestHandle = candidate;
    }
  }

  return bestHandle;
}

function chooseModelSemanticHandles(
  connection: Connection,
  source: NodeConfig,
  target: NodeConfig,
  edges: Edge[],
  layoutDirection: LayoutDirection,
): Connection {
  if (!isModelSemanticRelation(source, target)) {
    return connection;
  }

  const sourceCandidates =
    source.kind === "model_config" && target.kind === "llm"
      ? layoutDirection === "TB"
        ? [HANDLE_IDS.semanticOut]
        : [HANDLE_IDS.semanticOutBottom]
      : layoutDirection === "TB"
        ? [HANDLE_IDS.semanticOut, HANDLE_IDS.semanticOutBottom]
        : [HANDLE_IDS.semanticOutBottom, HANDLE_IDS.semanticOut];
  const targetCandidates =
    target.kind === "model_config"
      ? layoutDirection === "TB"
        ? [HANDLE_IDS.semanticIn, HANDLE_IDS.semanticInTop]
        : [HANDLE_IDS.semanticInTop, HANDLE_IDS.semanticIn]
      : [
          HANDLE_IDS.dataInTop,
          HANDLE_IDS.dataInBottom,
          HANDLE_IDS.dataIn,
          HANDLE_IDS.dataInRight,
        ];

  const sourceHandle = pickLeastUsedHandle(
    sourceCandidates,
    connection.sourceHandle ?? null,
    (handleId) => countHandleUsage(edges, source.id, handleId, "source"),
  );
  const targetHandle = pickLeastUsedHandle(
    targetCandidates,
    connection.targetHandle ?? null,
    (handleId) => countHandleUsage(edges, target.id, handleId, "target"),
  );

  return {
    ...connection,
    sourceHandle,
    targetHandle,
  };
}

function normalizeValidatorSemanticConnection(
  connection: Connection,
  source: NodeConfig,
  target: NodeConfig,
): Connection {
  if (
    source.kind === "validator" &&
    target.kind === "llm" &&
    target.llm_type === "code"
  ) {
    return {
      ...connection,
      source: target.id,
      target: source.id,
      sourceHandle: HANDLE_IDS.dataOut,
      targetHandle: HANDLE_IDS.dataIn,
    };
  }
  return connection;
}

export function isValidRecipeConnection(
  connection: Connection,
  configs: Record<string, NodeConfig>,
): boolean {
  if (!(connection.source && connection.target)) {
    return false;
  }
  if (connection.source === connection.target) {
    return false;
  }
  const source = configs[connection.source];
  const target = configs[connection.target];
  if (!(source && target)) {
    return false;
  }
  const semanticRelation = isSemanticRelation(source, target);
  if (semanticRelation) {
    return isSemanticLane(connection);
  }
  if (isModelInfraNode(source) || isModelInfraNode(target)) {
    return false;
  }
  return isDataLane(connection);
}

export function applyRecipeConnection(
  connection: Connection,
  configs: Record<string, NodeConfig>,
  edges: Edge[],
  layoutDirection: LayoutDirection = "LR",
): { edges: Edge[]; configs?: Record<string, NodeConfig> } {
  if (!isValidRecipeConnection(connection, configs)) {
    return { edges };
  }
  const initialSource = connection.source
    ? configs[connection.source]
    : null;
  const initialTarget = connection.target
    ? configs[connection.target]
    : null;
  if (!(initialSource && initialTarget)) {
    return { edges };
  }
  const normalizedConnection = normalizeValidatorSemanticConnection(
    connection,
    initialSource,
    initialTarget,
  );
  const source = normalizedConnection.source
    ? configs[normalizedConnection.source]
    : null;
  const target = normalizedConnection.target
    ? configs[normalizedConnection.target]
    : null;
  if (!(source && target)) {
    return { edges };
  }

  const semanticRelation = isSemanticRelation(source, target);
  const singleRefRelation = getSingleRefRelation(source, target);
  if (
    singleRefRelation === "subcategory_parent" &&
    isSubcategoryConfig(target)
  ) {
    const currentParent = target.subcategory_parent?.trim() ?? "";
    if (currentParent && currentParent !== source.name) {
      return { edges };
    }
  }
  const nextBaseEdges = singleRefRelation
    ? edges.filter(
        (edge) =>
          !isCompetingIncomingEdge(edge, target.id, singleRefRelation, configs),
      )
    : edges;
  const resolvedConnection = chooseModelSemanticHandles(
    normalizedConnection,
    source,
    target,
    nextBaseEdges,
    layoutDirection,
  );
  const nextEdges = addEdge(
    { ...resolvedConnection, type: semanticRelation ? "semantic" : "canvas" },
    nextBaseEdges,
  );
  if (source.kind === "model_provider" && target.kind === "model_config") {
    const next = { ...target, provider: source.name };
    return { edges: nextEdges, configs: { ...configs, [target.id]: next } };
  }
  if (source.kind === "model_config" && target.kind === "llm") {
    const next = { ...target, model_alias: source.name };
    return { edges: nextEdges, configs: { ...configs, [target.id]: next } };
  }
  if (source.kind === "tool_config" && target.kind === "llm") {
    const next = { ...target, tool_alias: source.name };
    return { edges: nextEdges, configs: { ...configs, [target.id]: next } };
  }
  if (
    source.kind === "sampler" &&
    source.sampler_type === "datetime" &&
    target.kind === "sampler" &&
    target.sampler_type === "timedelta"
  ) {
    const next = {
      ...target,
      // biome-ignore lint/style/useNamingConvention: api schema
      reference_column_name: source.name,
    };
    return { edges: nextEdges, configs: { ...configs, [target.id]: next } };
  }
  if (
    source.kind === "llm" &&
    source.llm_type === "code" &&
    target.kind === "validator"
  ) {
    const nextCodeLang = (source.code_lang ?? "").trim();
    const canUseCodeLangForTarget = canApplyCodeLangToValidator(
      target,
      nextCodeLang,
    );
    const next = {
      ...target,
      // biome-ignore lint/style/useNamingConvention: api schema
      target_columns: [source.name],
      // biome-ignore lint/style/useNamingConvention: api schema
      code_lang:
        (
          canUseCodeLangForTarget ? nextCodeLang : target.code_lang
        ) as typeof target.code_lang,
    };
    return { edges: nextEdges, configs: { ...configs, [target.id]: next } };
  }
  if (
    isExpressionConfig(target) &&
    !semanticRelation &&
    source.kind !== "seed" &&
    source.kind !== "model_provider" &&
    source.kind !== "model_config" &&
    source.kind !== "validator"
  ) {
    const ref = `{{ ${source.name} }}`;
    const next = {
      ...target,
      expr: buildTemplateWithRef(target.expr ?? "", ref),
    };
    return { edges: nextEdges, configs: { ...configs, [target.id]: next } };
  }
  if (isSubcategoryConfig(target) && isCategoryConfig(source)) {
    const next = syncSubcategoryMapping(target, source);
    return { edges: nextEdges, configs: { ...configs, [target.id]: next } };
  }
  return { edges: nextEdges };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/graph/relations.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { NodeConfig } from "../../types";

export function isSemanticRelation(
  source: NodeConfig,
  target: NodeConfig,
): boolean {
  if (source.kind === "model_provider" && target.kind === "model_config") {
    return true;
  }
  if (source.kind === "model_config" && target.kind === "llm") {
    return true;
  }
  if (source.kind === "tool_config" && target.kind === "llm") {
    return true;
  }
  if (
    source.kind === "llm" &&
    source.llm_type === "code" &&
    target.kind === "validator"
  ) {
    return true;
  }
  return (
    source.kind === "validator" &&
    target.kind === "llm" &&
    target.llm_type === "code"
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/graph/runtime-visual-state.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { Edge } from "@xyflow/react";
import type {
  RecipeExecutionBatch,
  RecipeExecutionRecord,
  RecipeExecutionStatus,
} from "../../execution-types";
import type { NodeConfig } from "../../types";
import { extractRefs } from "../refs";

const ACTIVE_STATUSES: ReadonlySet<RecipeExecutionStatus> = new Set([
  "pending",
  "running",
  "active",
  "cancelling",
]);
const FRESH_PENDING_WINDOW_MS = 60_000;

const DONE_UPSTREAM_KINDS: ReadonlySet<NodeConfig["kind"]> = new Set([
  "sampler",
  "seed",
  "expression",
  "llm",
  "model_config",
  "model_provider",
  "tool_config",
]);

export type GraphRuntimeVisualState = {
  executionLocked: boolean;
  runningNodeId: string | null;
  doneNodeIds: Set<string>;
  activeEdgeIds: Set<string>;
  batch: RecipeExecutionBatch | null;
};

function isAuxEdge(edge: Edge): boolean {
  return edge.source.startsWith("aux-") || edge.target.startsWith("aux-");
}

function collectTemplateRefs(config: NodeConfig | null): Set<string> {
  if (!config) {
    return new Set();
  }
  const refs = new Set<string>();
  if (config.kind === "llm") {
    for (const ref of extractRefs(config.prompt ?? "")) {
      refs.add(ref.trim());
    }
    for (const ref of extractRefs(config.system_prompt ?? "")) {
      refs.add(ref.trim());
    }
    if (typeof config.output_format === "string") {
      for (const ref of extractRefs(config.output_format)) {
        refs.add(ref.trim());
      }
    }
    return refs;
  }
  if (config.kind === "expression") {
    for (const ref of extractRefs(config.expr ?? "")) {
      refs.add(ref.trim());
    }
  }
  return refs;
}

function isReversedRuntimeReferenceEdge(input: {
  edge: Edge;
  runningNodeId: string;
  runningTemplateRefs: Set<string>;
  configs: Record<string, NodeConfig>;
}): boolean {
  const { edge, runningNodeId, runningTemplateRefs, configs } = input;
  if (edge.source !== runningNodeId) {
    return false;
  }
  const targetName = configs[edge.target]?.name?.trim() ?? "";
  return Boolean(targetName && runningTemplateRefs.has(targetName));
}

function hasLiveExecutionSignal(execution: RecipeExecutionRecord): boolean {
  if (execution.lastEventId !== null) {
    return true;
  }
  if (execution.current_column !== null) {
    return true;
  }
  if (execution.progress !== null || execution.column_progress !== null) {
    return true;
  }
  return Boolean(execution.batch?.idx ?? execution.batch?.total);
}

export function pickLatestActiveExecution(
  executions: RecipeExecutionRecord[],
): RecipeExecutionRecord | null {
  const now = Date.now();
  for (const execution of executions) {
    if (!ACTIVE_STATUSES.has(execution.status)) {
      continue;
    }
    if (!execution.jobId) {
      continue;
    }
    if (execution.finishedAt !== null) {
      continue;
    }

    const liveSignal = hasLiveExecutionSignal(execution);
    if (!liveSignal && execution.status === "pending") {
      const ageMs = Math.max(0, now - execution.createdAt);
      if (ageMs > FRESH_PENDING_WINDOW_MS) {
        continue;
      }
    }
    if (!liveSignal && execution.status !== "pending") {
      continue;
    }

    return execution;
  }
  return null;
}

export function deriveGraphRuntimeVisualState(input: {
  activeExecution: RecipeExecutionRecord | null;
  configs: Record<string, NodeConfig>;
  edges: Edge[];
}): GraphRuntimeVisualState {
  const { activeExecution, configs, edges } = input;
  if (!activeExecution) {
    return {
      executionLocked: false,
      runningNodeId: null,
      doneNodeIds: new Set(),
      activeEdgeIds: new Set(),
      batch: null,
    };
  }

  const nameToNodeId = new Map<string, string>();
  for (const config of Object.values(configs)) {
    const name = config.name.trim();
    if (!name) {
      continue;
    }
    nameToNodeId.set(name, config.id);
  }

  const doneNodeIds = new Set<string>();
  for (const columnName of activeExecution.completed_columns) {
    const nodeId = nameToNodeId.get(columnName.trim());
    if (nodeId) {
      doneNodeIds.add(nodeId);
    }
  }

  const runningNodeId = activeExecution.current_column
    ? nameToNodeId.get(activeExecution.current_column.trim()) ?? null
    : null;
  if (runningNodeId) {
    doneNodeIds.delete(runningNodeId);
  }

  const activeEdgeIds = new Set<string>();
  if (runningNodeId) {
    const runningConfig = configs[runningNodeId] ?? null;
    const runningTemplateRefs = collectTemplateRefs(runningConfig);
    for (const ref of runningTemplateRefs) {
      const refNodeId = nameToNodeId.get(ref);
      if (refNodeId && refNodeId !== runningNodeId) {
        doneNodeIds.add(refNodeId);
      }
    }
    for (const upstreamNodeId of collectUpstreamDoneNodeIds({
      rootNodeId: runningNodeId,
      edges,
      configs,
    })) {
      doneNodeIds.add(upstreamNodeId);
    }
    for (const edge of edges) {
      if (isAuxEdge(edge)) {
        continue;
      }
      if (edge.target === runningNodeId) {
        activeEdgeIds.add(edge.id);
        continue;
      }
      if (
        isReversedRuntimeReferenceEdge({
          edge,
          runningNodeId,
          runningTemplateRefs,
          configs,
        })
      ) {
        activeEdgeIds.add(edge.id);
      }
    }
  }

  const batch =
    activeExecution.batch &&
    typeof activeExecution.batch.total === "number" &&
    activeExecution.batch.total > 1
      ? activeExecution.batch
      : null;

  return {
    executionLocked: true,
    runningNodeId,
    doneNodeIds,
    activeEdgeIds,
    batch,
  };
}

function collectUpstreamDoneNodeIds(input: {
  rootNodeId: string;
  edges: Edge[];
  configs: Record<string, NodeConfig>;
}): Set<string> {
  const { rootNodeId, edges, configs } = input;
  const incoming = new Map<string, string[]>();
  for (const edge of edges) {
    if (isAuxEdge(edge)) {
      continue;
    }
    const list = incoming.get(edge.target) ?? [];
    list.push(edge.source);
    incoming.set(edge.target, list);
  }

  const visited = new Set<string>();
  const queue = [rootNodeId];
  let queueIndex = 0;
  const doneNodeIds = new Set<string>();
  while (queueIndex < queue.length) {
    const current = queue[queueIndex];
    queueIndex += 1;
    if (!current || visited.has(current)) {
      continue;
    }
    visited.add(current);
    const sources = incoming.get(current) ?? [];
    for (const sourceId of sources) {
      if (!visited.has(sourceId)) {
        queue.push(sourceId);
      }
      const config = configs[sourceId];
      if (config && DONE_UPSTREAM_KINDS.has(config.kind)) {
        doneNodeIds.add(sourceId);
      }
    }
  }

  return doneNodeIds;
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/graph-warnings.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { Edge } from "@xyflow/react";
import { INFRA_NODE_KINDS, type NodeConfig } from "../types";

export type GraphWarning = {
  nodeId?: string;
  nodeName?: string;
  global?: boolean;
  message: string;
  severity: "error" | "warning";
};

function checkDataSourceRequired(allConfigs: NodeConfig[]): GraphWarning[] {
  const hasLlm = allConfigs.some((c) => c.kind === "llm");
  const hasDataSource = allConfigs.some(
    (c) => c.kind === "seed" || c.kind === "sampler" || c.kind === "expression",
  );
  if (hasLlm && !hasDataSource) {
    return [
      {
        global: true,
        message:
          "Add a data source (seed, sampler, or expression) before LLM blocks can generate data.",
        severity: "warning",
      },
    ];
  }
  return [];
}

function checkLlmModelAlias(allConfigs: NodeConfig[]): GraphWarning[] {
  const warnings: GraphWarning[] = [];
  for (const config of allConfigs) {
    if (config.kind === "llm" && !config.model_alias?.trim()) {
      warnings.push({
        nodeId: config.id,
        nodeName: config.name,
        message: "Needs a model preset.",
        severity: "error",
      });
    }
  }
  return warnings;
}

function checkModelConfigProvider(allConfigs: NodeConfig[]): GraphWarning[] {
  const warnings: GraphWarning[] = [];
  for (const config of allConfigs) {
    if (config.kind === "model_config" && !config.provider?.trim()) {
      warnings.push({
        nodeId: config.id,
        nodeName: config.name,
        message: "Needs a provider connection.",
        severity: "error",
      });
    }
  }
  return warnings;
}

function checkSubcategoryParent(allConfigs: NodeConfig[]): GraphWarning[] {
  const categoryNames = new Set(
    allConfigs
      .filter((c) => c.kind === "sampler" && c.sampler_type === "category")
      .map((c) => c.name),
  );
  const warnings: GraphWarning[] = [];
  for (const config of allConfigs) {
    if (config.kind !== "sampler" || config.sampler_type !== "subcategory") {
      continue;
    }
    if (!config.subcategory_parent?.trim()) {
      warnings.push({
        nodeId: config.id,
        nodeName: config.name,
        message: "Needs a parent category block.",
        severity: "error",
      });
    } else if (!categoryNames.has(config.subcategory_parent)) {
      warnings.push({
        nodeId: config.id,
        nodeName: config.name,
        message: `Parent category "${config.subcategory_parent}" not found.`,
        severity: "error",
      });
    }
  }
  return warnings;
}

function checkValidatorTargets(allConfigs: NodeConfig[]): GraphWarning[] {
  const warnings: GraphWarning[] = [];
  for (const config of allConfigs) {
    if (
      config.kind === "validator" &&
      (!config.target_columns || config.target_columns.length === 0)
    ) {
      warnings.push({
        nodeId: config.id,
        nodeName: config.name,
        message: "Needs at least one target column.",
        severity: "warning",
      });
    }
  }
  return warnings;
}

function checkDisconnectedNodes(
  allConfigs: NodeConfig[],
  edges: Edge[],
): GraphWarning[] {
  const connectedIds = new Set<string>();
  for (const edge of edges) {
    connectedIds.add(edge.source);
    connectedIds.add(edge.target);
  }

  const warnings: GraphWarning[] = [];
  for (const config of allConfigs) {
    if (config.kind === "markdown_note") {
      continue;
    }
    if (connectedIds.has(config.id)) {
      continue;
    }

    warnings.push({
      nodeId: config.id,
      nodeName: config.name,
      message: "This block has no connections.",
      severity: "warning",
    });
  }
  return warnings;
}

function checkLlmMissingDataInput(
  allConfigs: NodeConfig[],
  edges: Edge[],
): GraphWarning[] {
  const configById = new Map(allConfigs.map((c) => [c.id, c]));

  /** LLM IDs that have at least one non-infra pipeline edge. */
  const llmWithPipelineEdge = new Set<string>();
  for (const edge of edges) {
    const sourceConfig = configById.get(edge.source);
    const targetConfig = configById.get(edge.target);

    if (
      sourceConfig?.kind === "llm" &&
      targetConfig &&
      !INFRA_NODE_KINDS.has(targetConfig.kind)
    ) {
      llmWithPipelineEdge.add(sourceConfig.id);
    }
    if (
      targetConfig?.kind === "llm" &&
      sourceConfig &&
      !INFRA_NODE_KINDS.has(sourceConfig.kind)
    ) {
      llmWithPipelineEdge.add(targetConfig.id);
    }
  }

  const warnings: GraphWarning[] = [];
  for (const config of allConfigs) {
    if (config.kind !== "llm") {
      continue;
    }
    if (llmWithPipelineEdge.has(config.id)) {
      continue;
    }

    const hasAnyEdge = edges.some(
      (e) => e.source === config.id || e.target === config.id,
    );
    if (!hasAnyEdge) {
      continue; // already caught by checkDisconnectedNodes
    }

    warnings.push({
      nodeId: config.id,
      nodeName: config.name,
      message: "No data-pipeline connection — connect it to a source or downstream step.",
      severity: "warning",
    });
  }
  return warnings;
}

export function getGraphWarnings(
  configs: Record<string, NodeConfig>,
  edges: Edge[] = [],
): GraphWarning[] {
  const allConfigs = Object.values(configs);
  return [
    ...checkDataSourceRequired(allConfigs),
    ...checkLlmModelAlias(allConfigs),
    ...checkModelConfigProvider(allConfigs),
    ...checkSubcategoryParent(allConfigs),
    ...checkValidatorTargets(allConfigs),
    ...checkDisconnectedNodes(allConfigs, edges),
    ...checkLlmMissingDataInput(allConfigs, edges),
  ];
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/graph.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export {
  applyRecipeConnection,
  isValidRecipeConnection,
} from "./graph/recipe-graph-connection";


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/handle-layout.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export const NODE_HANDLE_CLASS =
  "pointer-events-auto !size-2.5 !border-border/80 !bg-muted shadow-sm hover:!border-primary/70 hover:!bg-primary/20";

export const AUX_HANDLE_CLASS =
  "!size-2 !border-border/80 !bg-muted/80 shadow-sm";


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/handles.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { Connection } from "@xyflow/react";
import type { LayoutDirection } from "../types";

export const HANDLE_IDS = {
  // data flow lanes
  dataIn: "data-in",
  dataInTop: "data-in-top",
  dataInRight: "data-in-right",
  dataInBottom: "data-in-bottom",
  dataOut: "data-out",
  dataOutLeft: "data-out-left",
  dataOutTop: "data-out-top",
  dataOutBottom: "data-out-bottom",
  // semantic dependency lanes
  semanticIn: "semantic-in",
  semanticInTop: "semantic-in-top",
  semanticInRight: "semantic-in-right",
  semanticInBottom: "semantic-in-bottom",
  semanticInLeft: "semantic-in-left",
  semanticOut: "semantic-out",
  semanticOutLeft: "semantic-out-left",
  semanticOutTop: "semantic-out-top",
  semanticOutBottom: "semantic-out-bottom",
  semanticOutRight: "semantic-out-right",
  // llm prompt/scorer lanes
  llmInputOutLeft: "llm-input-out-left",
  llmInputOutRight: "llm-input-out-right",
  llmInputOutTop: "llm-input-out-top",
  llmInputOutBottom: "llm-input-out-bottom",
} as const;

export type RecipeHandleId = (typeof HANDLE_IDS)[keyof typeof HANDLE_IDS];

const LEGACY_HANDLE_ALIAS_MAP: Record<string, string> = {
  [HANDLE_IDS.semanticInLeft]: HANDLE_IDS.semanticIn,
  [HANDLE_IDS.semanticOutRight]: HANDLE_IDS.semanticOut,
};

const DATA_TARGET_HANDLES = new Set<string>([
  HANDLE_IDS.dataIn,
  HANDLE_IDS.dataInTop,
  HANDLE_IDS.dataInRight,
  HANDLE_IDS.dataInBottom,
]);

const DATA_SOURCE_HANDLES = new Set<string>([
  HANDLE_IDS.dataOut,
  HANDLE_IDS.dataOutLeft,
  HANDLE_IDS.dataOutTop,
  HANDLE_IDS.dataOutBottom,
]);

const SEMANTIC_TARGET_HANDLES = new Set<string>([
  HANDLE_IDS.semanticIn,
  HANDLE_IDS.semanticInTop,
  HANDLE_IDS.semanticInRight,
  HANDLE_IDS.semanticInBottom,
  HANDLE_IDS.semanticInLeft,
]);

const SEMANTIC_SOURCE_HANDLES = new Set<string>([
  HANDLE_IDS.semanticOut,
  HANDLE_IDS.semanticOutLeft,
  HANDLE_IDS.semanticOutTop,
  HANDLE_IDS.semanticOutBottom,
  HANDLE_IDS.semanticOutRight,
]);

const DATA_TARGET_HORIZONTAL_HANDLES = new Set<string>([
  HANDLE_IDS.dataIn,
  HANDLE_IDS.dataInRight,
]);

const DATA_TARGET_VERTICAL_HANDLES = new Set<string>([
  HANDLE_IDS.dataInTop,
  HANDLE_IDS.dataInBottom,
]);

const DATA_SOURCE_HORIZONTAL_HANDLES = new Set<string>([
  HANDLE_IDS.dataOut,
  HANDLE_IDS.dataOutLeft,
]);

const DATA_SOURCE_VERTICAL_HANDLES = new Set<string>([
  HANDLE_IDS.dataOutTop,
  HANDLE_IDS.dataOutBottom,
]);

const SEMANTIC_TARGET_HORIZONTAL_HANDLES = new Set<string>([
  HANDLE_IDS.semanticIn,
  HANDLE_IDS.semanticInRight,
  HANDLE_IDS.semanticInLeft,
]);

const SEMANTIC_TARGET_VERTICAL_HANDLES = new Set<string>([
  HANDLE_IDS.semanticInTop,
  HANDLE_IDS.semanticInBottom,
]);

const SEMANTIC_SOURCE_HORIZONTAL_HANDLES = new Set<string>([
  HANDLE_IDS.semanticOut,
  HANDLE_IDS.semanticOutLeft,
  HANDLE_IDS.semanticOutRight,
]);

const SEMANTIC_SOURCE_VERTICAL_HANDLES = new Set<string>([
  HANDLE_IDS.semanticOutTop,
  HANDLE_IDS.semanticOutBottom,
]);

export function normalizeRecipeHandleId(
  handleId: string | null | undefined,
): string | null {
  if (!handleId) {
    return null;
  }
  return LEGACY_HANDLE_ALIAS_MAP[handleId] ?? handleId;
}

export function normalizeRecipeConnectionHandles(
  connection: Connection,
): Connection {
  return {
    ...connection,
    sourceHandle: normalizeRecipeHandleId(connection.sourceHandle),
    targetHandle: normalizeRecipeHandleId(connection.targetHandle),
  };
}

function isKnownHandle(
  handleId: string | null | undefined,
  handles: Set<string>,
): boolean {
  if (!handleId) {
    return false;
  }
  return handles.has(normalizeRecipeHandleId(handleId) ?? "");
}

function remapHandleForDirection(
  handleId: string | null | undefined,
  direction: LayoutDirection,
  horizontalHandles: Set<string>,
  verticalHandles: Set<string>,
  defaultHandle: string,
): string {
  const normalizedHandleId = normalizeRecipeHandleId(handleId);
  if (!normalizedHandleId) {
    return defaultHandle;
  }
  if (direction === "LR") {
    if (verticalHandles.has(normalizedHandleId)) {
      return defaultHandle;
    }
    return normalizedHandleId;
  }
  if (horizontalHandles.has(normalizedHandleId)) {
    return defaultHandle;
  }
  return normalizedHandleId;
}

export function isDataTargetHandle(
  handleId: string | null | undefined,
): boolean {
  return isKnownHandle(handleId, DATA_TARGET_HANDLES);
}

export function isDataSourceHandle(
  handleId: string | null | undefined,
): boolean {
  return isKnownHandle(handleId, DATA_SOURCE_HANDLES);
}

export function isSemanticTargetHandle(
  handleId: string | null | undefined,
): boolean {
  return isKnownHandle(handleId, SEMANTIC_TARGET_HANDLES);
}

export function isSemanticSourceHandle(
  handleId: string | null | undefined,
): boolean {
  return isKnownHandle(handleId, SEMANTIC_SOURCE_HANDLES);
}

export function getDefaultDataTargetHandle(direction: LayoutDirection): string {
  return direction === "TB" ? HANDLE_IDS.dataInTop : HANDLE_IDS.dataIn;
}

export function getDefaultDataSourceHandle(direction: LayoutDirection): string {
  return direction === "TB" ? HANDLE_IDS.dataOutBottom : HANDLE_IDS.dataOut;
}

export function getDefaultSemanticTargetHandle(
  direction: LayoutDirection,
): string {
  return direction === "TB" ? HANDLE_IDS.semanticInTop : HANDLE_IDS.semanticIn;
}

export function getDefaultSemanticSourceHandle(
  direction: LayoutDirection,
): string {
  return direction === "TB" ? HANDLE_IDS.semanticOutBottom : HANDLE_IDS.semanticOut;
}

type RecipeEdgeHandles = {
  sourceHandle?: string | null;
  targetHandle?: string | null;
  type?: string | null;
};

export function remapRecipeEdgeHandlesForLayout(
  edge: RecipeEdgeHandles,
  direction: LayoutDirection,
): { sourceHandle: string; targetHandle: string } {
  const semantic =
    edge.type === "semantic" ||
    (isSemanticSourceHandle(edge.sourceHandle) &&
      isSemanticTargetHandle(edge.targetHandle));
  if (semantic) {
    const sourceIsData = isDataSourceHandle(edge.sourceHandle);
    const targetIsData = isDataTargetHandle(edge.targetHandle);
    return {
      sourceHandle: remapHandleForDirection(
        edge.sourceHandle,
        direction,
        sourceIsData
          ? DATA_SOURCE_HORIZONTAL_HANDLES
          : SEMANTIC_SOURCE_HORIZONTAL_HANDLES,
        sourceIsData
          ? DATA_SOURCE_VERTICAL_HANDLES
          : SEMANTIC_SOURCE_VERTICAL_HANDLES,
        sourceIsData
          ? getDefaultDataSourceHandle(direction)
          : getDefaultSemanticSourceHandle(direction),
      ),
      targetHandle: remapHandleForDirection(
        edge.targetHandle,
        direction,
        targetIsData
          ? DATA_TARGET_HORIZONTAL_HANDLES
          : SEMANTIC_TARGET_HORIZONTAL_HANDLES,
        targetIsData
          ? DATA_TARGET_VERTICAL_HANDLES
          : SEMANTIC_TARGET_VERTICAL_HANDLES,
        targetIsData
          ? getDefaultDataTargetHandle(direction)
          : getDefaultSemanticTargetHandle(direction),
      ),
    };
  }
  return {
    sourceHandle: remapHandleForDirection(
      edge.sourceHandle,
      direction,
      DATA_SOURCE_HORIZONTAL_HANDLES,
      DATA_SOURCE_VERTICAL_HANDLES,
      getDefaultDataSourceHandle(direction),
    ),
    targetHandle: remapHandleForDirection(
      edge.targetHandle,
      direction,
      DATA_TARGET_HORIZONTAL_HANDLES,
      DATA_TARGET_VERTICAL_HANDLES,
      getDefaultDataTargetHandle(direction),
    ),
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/image-preview.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export const MAX_IMAGE_PREVIEW_BYTES = 200 * 1024;

type PreviewImagePayload = {
  type?: unknown;
  mime?: unknown;
  data?: unknown;
};

type UnknownRecord = Record<string, unknown>;

export type ImagePreviewResult =
  | { kind: "ready"; src: string }
  | { kind: "too_large"; estimatedBytes: number };

function normalizeBase64(value: string): string {
  return value.replace(/\s+/g, "");
}

function estimateBase64Bytes(base64: string): number {
  const normalized = normalizeBase64(base64);
  const padding = normalized.endsWith("==")
    ? 2
    : normalized.endsWith("=")
      ? 1
      : 0;
  return Math.max(0, Math.floor((normalized.length * 3) / 4) - padding);
}

function inferMimeFromBase64(base64: string): string | null {
  const normalized = normalizeBase64(base64);
  if (normalized.startsWith("iVBORw0KGgo")) {
    return "image/png";
  }
  if (normalized.startsWith("/9j/")) {
    return "image/jpeg";
  }
  if (normalized.startsWith("R0lGOD")) {
    return "image/gif";
  }
  if (normalized.startsWith("UklGR")) {
    return "image/webp";
  }
  return null;
}

function isLikelyRawBase64Image(value: string): boolean {
  const normalized = normalizeBase64(value);
  if (normalized.length < 64) {
    return false;
  }
  if (!/^[A-Za-z0-9+/=]+$/.test(normalized)) {
    return false;
  }
  return inferMimeFromBase64(normalized) !== null;
}

function toDataUrlFromBase64(base64: string, mime: string): string {
  return `data:${mime};base64,${normalizeBase64(base64)}`;
}

function isRecord(value: unknown): value is UnknownRecord {
  return Boolean(value) && typeof value === "object" && !Array.isArray(value);
}

function isByteArray(value: unknown): value is number[] {
  if (!Array.isArray(value) || value.length === 0) {
    return false;
  }
  return value.every(
    (item) => typeof item === "number" && Number.isInteger(item) && item >= 0 && item <= 255,
  );
}

function byteArrayToBase64(bytes: number[]): string {
  let binary = "";
  const chunkSize = 0x8000;
  for (let idx = 0; idx < bytes.length; idx += chunkSize) {
    const chunk = bytes.slice(idx, idx + chunkSize);
    binary += String.fromCharCode(...chunk);
  }
  return btoa(binary);
}

function resolveStringCandidate(
  value: unknown,
  maxBytes: number,
): ImagePreviewResult | null {
  if (typeof value !== "string") {
    return null;
  }
  return resolveImagePreviewFromString(value, maxBytes);
}

function resolveImagePreviewFromString(
  value: string,
  maxBytes: number,
): ImagePreviewResult | null {
  const trimmed = value.trim();
  if (!trimmed) {
    return null;
  }
  if (trimmed.startsWith("http://") || trimmed.startsWith("https://")) {
    return { kind: "ready", src: trimmed };
  }
  if (trimmed.startsWith("data:image/")) {
    const marker = "base64,";
    const markerIdx = trimmed.indexOf(marker);
    if (markerIdx < 0) {
      return { kind: "ready", src: trimmed };
    }
    const encoded = trimmed.slice(markerIdx + marker.length);
    const estimatedBytes = estimateBase64Bytes(encoded);
    if (estimatedBytes > maxBytes) {
      return { kind: "too_large", estimatedBytes };
    }
    return { kind: "ready", src: trimmed };
  }
  if (isLikelyRawBase64Image(trimmed)) {
    const estimatedBytes = estimateBase64Bytes(trimmed);
    if (estimatedBytes > maxBytes) {
      return { kind: "too_large", estimatedBytes };
    }
    const mime = inferMimeFromBase64(trimmed) ?? "image/png";
    return { kind: "ready", src: toDataUrlFromBase64(trimmed, mime) };
  }
  return null;
}

function resolveImagePayloadObject(value: unknown, maxBytes: number): ImagePreviewResult | null {
  if (!isRecord(value)) {
    return null;
  }
  const payload = value as PreviewImagePayload;
  if (payload.type === "image" && typeof payload.data === "string") {
    const mime = typeof payload.mime === "string" ? payload.mime : "image/jpeg";
    const estimatedBytes = estimateBase64Bytes(payload.data);
    if (estimatedBytes > maxBytes) {
      return { kind: "too_large", estimatedBytes };
    }
    return {
      kind: "ready",
      src: toDataUrlFromBase64(payload.data, mime),
    };
  }

  const imageUrl = value.image_url;
  const directImageUrl = resolveStringCandidate(imageUrl, maxBytes);
  if (directImageUrl !== null) {
    return directImageUrl;
  }
  if (isRecord(imageUrl)) {
    const nestedImageUrl = resolveStringCandidate(imageUrl.url, maxBytes);
    if (nestedImageUrl !== null) {
      return nestedImageUrl;
    }
  }

  const scalarCandidates = [
    value.url,
    value.data,
    value.bytes,
    value.base64,
    value.base64_image,
    value.image,
    value.path,
  ];
  for (const candidate of scalarCandidates) {
    const resolved = resolveStringCandidate(candidate, maxBytes);
    if (resolved !== null) {
      return resolved;
    }
  }

  if (isByteArray(value.bytes)) {
    const resolved = resolveStringCandidate(byteArrayToBase64(value.bytes), maxBytes);
    if (resolved !== null) {
      return resolved;
    }
  }

  if (isRecord(value.image)) {
    return resolveImagePayloadObject(value.image, maxBytes);
  }

  return null;
}

export function resolveImagePreview(
  value: unknown,
  maxBytes = MAX_IMAGE_PREVIEW_BYTES,
): ImagePreviewResult | null {
  const payloadPreview = resolveImagePayloadObject(value, maxBytes);
  if (payloadPreview) {
    return payloadPreview;
  }

  if (typeof value !== "string") {
    return null;
  }
  return resolveImagePreviewFromString(value, maxBytes);
}

export function isLikelyImageValue(value: unknown): boolean {
  return resolveImagePreview(value, Number.POSITIVE_INFINITY) !== null;
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/import/edges.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { Edge } from "@xyflow/react";
import type { LayoutDirection, NodeConfig } from "../../types";
import {
  getDefaultDataSourceHandle,
  getDefaultDataTargetHandle,
  getDefaultSemanticSourceHandle,
  getDefaultSemanticTargetHandle,
  isDataSourceHandle,
  isDataTargetHandle,
  isSemanticSourceHandle,
  isSemanticTargetHandle,
  normalizeRecipeHandleId,
} from "../handles";
import { extractRefs } from "./helpers";

function isSemanticConnection(source: NodeConfig, target: NodeConfig): boolean {
  if (source.kind === "model_provider" && target.kind === "model_config") {
    return true;
  }
  if (source.kind === "model_config" && target.kind === "llm") {
    return true;
  }
  if (source.kind === "tool_config" && target.kind === "llm") {
    return true;
  }
  if (
    source.kind === "llm" &&
    source.llm_type === "code" &&
    target.kind === "validator"
  ) {
    return true;
  }
  return (
    source.kind === "validator" &&
    target.kind === "llm" &&
    target.llm_type === "code"
  );
}

export function buildEdges(
  configs: NodeConfig[],
  nameToId: Map<string, string>,
  uiEdges:
    | Array<{
        from: string;
        to: string;
        type?: string;
        sourceHandle?: string;
        targetHandle?: string;
      }>
    | null,
  layoutDirection: LayoutDirection,
): Edge[] {
  const edges: Edge[] = [];
  const seen = new Set<string>();
  const configByName = new Map(configs.map((config) => [config.name, config]));
  const addEdgeByName = (
    from: string,
    to: string,
    sourceHandleInput?: string,
    targetHandleInput?: string,
  ): void => {
    const sourceId = nameToId.get(from);
    const targetId = nameToId.get(to);
    if (!(sourceId && targetId)) {
      return;
    }
    const key = `${sourceId}-${targetId}`;
    if (seen.has(key)) {
      return;
    }
    seen.add(key);
    const source = configByName.get(from);
    const target = configByName.get(to);
    const isSemantic = Boolean(
      source && target && isSemanticConnection(source, target),
    );
    const normalizedType = isSemantic ? "semantic" : "canvas";
    const sourceHandleNormalized = normalizeRecipeHandleId(sourceHandleInput);
    const targetHandleNormalized = normalizeRecipeHandleId(targetHandleInput);
    const semanticSourceDefault =
      source?.kind === "llm"
        ? getDefaultDataSourceHandle(layoutDirection)
        : getDefaultSemanticSourceHandle(layoutDirection);
    const semanticTargetDefault =
      target?.kind === "llm"
        ? getDefaultDataTargetHandle(layoutDirection)
        : getDefaultSemanticTargetHandle(layoutDirection);
    let sourceHandle = getDefaultDataSourceHandle(layoutDirection);
    let targetHandle = getDefaultDataTargetHandle(layoutDirection);

    if (isSemantic) {
      sourceHandle =
        isSemanticSourceHandle(sourceHandleNormalized) ||
        isDataSourceHandle(sourceHandleNormalized)
          ? sourceHandleNormalized ?? semanticSourceDefault
          : semanticSourceDefault;
      targetHandle =
        isSemanticTargetHandle(targetHandleNormalized) ||
        isDataTargetHandle(targetHandleNormalized)
          ? targetHandleNormalized ?? semanticTargetDefault
          : semanticTargetDefault;
    } else {
      sourceHandle = isDataSourceHandle(sourceHandleNormalized)
        ? sourceHandleNormalized ?? getDefaultDataSourceHandle(layoutDirection)
        : getDefaultDataSourceHandle(layoutDirection);
      targetHandle = isDataTargetHandle(targetHandleNormalized)
        ? targetHandleNormalized ?? getDefaultDataTargetHandle(layoutDirection)
        : getDefaultDataTargetHandle(layoutDirection);
    }
    edges.push({
      id: `e-${key}`,
      source: sourceId,
      target: targetId,
      type: normalizedType,
      sourceHandle,
      targetHandle,
    });
  };

  if (uiEdges && uiEdges.length > 0) {
    for (const edge of uiEdges) {
      addEdgeByName(
        edge.from,
        edge.to,
        edge.sourceHandle,
        edge.targetHandle,
      );
    }
    if (edges.length > 0) {
      return edges;
    }
  }

  for (const config of configs) {
    if (config.kind === "llm") {
      for (const ref of extractRefs(config.prompt ?? "")) {
        addEdgeByName(ref, config.name);
      }
      for (const ref of extractRefs(config.system_prompt ?? "")) {
        addEdgeByName(ref, config.name);
      }
    }
    if (config.kind === "expression") {
      for (const ref of extractRefs(config.expr)) {
        addEdgeByName(ref, config.name);
      }
    }
    if (
      config.kind === "sampler" &&
      config.sampler_type === "subcategory" &&
      config.subcategory_parent
    ) {
      addEdgeByName(config.subcategory_parent, config.name);
    }
    if (config.kind === "model_config" && config.provider) {
      addEdgeByName(config.provider, config.name);
    }
    if (
      config.kind === "sampler" &&
      config.sampler_type === "timedelta" &&
      config.reference_column_name
    ) {
      addEdgeByName(config.reference_column_name, config.name);
    }
    if (config.kind === "llm" && config.model_alias) {
      addEdgeByName(config.model_alias, config.name);
    }
    if (config.kind === "llm" && config.tool_alias) {
      addEdgeByName(config.tool_alias, config.name);
    }
    if (config.kind === "validator") {
      for (const targetColumn of config.target_columns ?? []) {
        if (targetColumn.trim()) {
          addEdgeByName(targetColumn, config.name);
        }
      }
    }
  }

  return edges;
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/import/helpers.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { extractRefs as extractJinjaRefs } from "../refs";

export function isRecord(value: unknown): value is Record<string, unknown> {
  return Boolean(value && typeof value === "object" && !Array.isArray(value));
}

export function readString(value: unknown): string | null {
  return typeof value === "string" ? value : null;
}

export function readNumberString(value: unknown): string {
  if (typeof value === "number" && Number.isFinite(value)) {
    return String(value);
  }
  if (typeof value === "string") {
    return value;
  }
  return "";
}

export function parseJson(
  input: string,
): { data: unknown | null; error?: string } {
  try {
    return { data: JSON.parse(input) };
  } catch (error) {
    return {
      data: null,
      error: error instanceof Error ? error.message : "Invalid JSON.",
    };
  }
}

export function normalizeOutputFormat(value: unknown): string {
  if (typeof value === "string") {
    return value;
  }
  if (isRecord(value)) {
    return JSON.stringify(value, null, 2);
  }
  return "";
}

export function extractRefs(template: string): string[] {
  return extractJinjaRefs(template);
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/import/importer.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  LlmConfig,
  LlmMcpProviderConfig,
  LlmToolConfig,
  MarkdownNoteConfig,
  NodeConfig,
  RecipeProcessorConfig,
  SeedConfig,
  SamplerConfig,
  SeedSourceType,
  ToolProfileConfig,
  ValidatorConfig,
} from "../../types";
import { buildEdges } from "./edges";
import { isRecord, parseJson, readString } from "./helpers";
import {
  parseColumn,
  parseModelConfig,
  parseModelProvider,
} from "./parsers";
import { parseSeedConfig } from "./parsers/seed-config-parser";
import { buildNodes, parseUi } from "./ui";
import type { ImportResult } from "./types";

type RecipeInput = {
  columns?: unknown;
  model_configs?: unknown;
  model_providers?: unknown;
  mcp_providers?: unknown;
  tool_configs?: unknown;
  processors?: unknown;
  seed_config?: unknown;
};

type UiInput = {
  nodes?: unknown;
  edges?: unknown;
  seed_source_type?: unknown;
  seed_columns?: unknown;
  seed_drop_columns?: unknown;
  seed_preview_rows?: unknown;
  local_file_name?: unknown;
  unstructured_file_name?: unknown;
  unstructured_chunk_size?: unknown;
  unstructured_chunk_overlap?: unknown;
  advanced_open_by_node?: unknown;
};

type UiMarkdownNoteNode = {
  name: string;
  markdown: string;
  note_color?: string;
  note_opacity?: string;
};

function readStringNumber(value: unknown): string | undefined {
  if (typeof value === "string") {
    return value;
  }
  if (typeof value === "number" && Number.isFinite(value)) {
    return String(value);
  }
  return undefined;
}

function parseProcessors(input: unknown): RecipeProcessorConfig[] {
  if (!Array.isArray(input)) {
    return [];
  }
  const processors: RecipeProcessorConfig[] = [];
  input.forEach((item, index) => {
    if (!isRecord(item)) {
      return;
    }
    const type = readString(item.processor_type);
    const templateRaw = item.template;
    const isSchemaTransform =
      type === "schema_transform" || isRecord(templateRaw);
    if (!isSchemaTransform) {
      return;
    }
    const name = readString(item.name) ?? `schema_transform_${index + 1}`;
    const template =
      typeof templateRaw === "string"
        ? templateRaw
        : isRecord(templateRaw)
          ? JSON.stringify(templateRaw, null, 2)
          : "{\n  \"text\": \"{{ column_name }}\"\n}";
    processors.push({
      id: `p${index + 1}`,
      // biome-ignore lint/style/useNamingConvention: api schema
      processor_type: "schema_transform",
      name,
      template,
    });
  });
  return processors;
}

function parseSeedDropColumns(input: unknown): string[] {
  if (!Array.isArray(input)) {
    return [];
  }
  const values = new Set<string>();
  for (const item of input) {
    if (!isRecord(item)) {
      continue;
    }
    const type = readString(item.processor_type);
    if (type !== "drop_columns") {
      continue;
    }
    const name = readString(item.name);
    if (name !== "drop_seed_columns") {
      continue;
    }
    const columnNames = Array.isArray(item.column_names)
      ? item.column_names
      : [];
    for (const columnName of columnNames) {
      if (typeof columnName !== "string") {
        continue;
      }
      const next = columnName.trim();
      if (next) {
        values.add(next);
      }
    }
  }
  return Array.from(values);
}

function parseMcpProviders(
  input: unknown,
): Map<string, LlmMcpProviderConfig> {
  const providers = new Map<string, LlmMcpProviderConfig>();
  if (!Array.isArray(input)) {
    return providers;
  }
  input.forEach((item, index) => {
    if (!isRecord(item)) {
      return;
    }
    const name = readString(item.name)?.trim();
    if (!name) {
      return;
    }
    const providerTypeRaw = readString(item.provider_type);
    const providerType =
      providerTypeRaw === "stdio" ? "stdio" : "streamable_http";
    const args = Array.isArray(item.args)
      ? item.args.map((value) => String(value))
      : [];
    const envPairs =
      isRecord(item.env)
        ? Object.entries(item.env).map(([key, value]) => ({
            key: String(key),
            value: String(value),
          }))
        : [];
    providers.set(name, {
      id: `mcp-${index + 1}`,
      name,
      // biome-ignore lint/style/useNamingConvention: ui schema
      provider_type: providerType,
      command: readString(item.command) ?? "",
      args,
      env: envPairs,
      endpoint: readString(item.endpoint) ?? "",
      // biome-ignore lint/style/useNamingConvention: api schema
      api_key: readString(item.api_key) ?? "",
      // biome-ignore lint/style/useNamingConvention: api schema
      api_key_env: readString(item.api_key_env) ?? "",
    });
  });
  return providers;
}

function parseToolConfigs(input: unknown): Map<string, LlmToolConfig> {
  const toolConfigs = new Map<string, LlmToolConfig>();
  if (!Array.isArray(input)) {
    return toolConfigs;
  }
  input.forEach((item, index) => {
    if (!isRecord(item)) {
      return;
    }
    const toolAlias = readString(item.tool_alias)?.trim();
    if (!toolAlias) {
      return;
    }
    const providers = Array.isArray(item.providers)
      ? item.providers.map((value) => String(value).trim()).filter(Boolean)
      : [];
    const allowTools = Array.isArray(item.allow_tools)
      ? item.allow_tools.map((value) => String(value).trim()).filter(Boolean)
      : [];
    toolConfigs.set(toolAlias, {
      id: `tool-${index + 1}`,
      // biome-ignore lint/style/useNamingConvention: api schema
      tool_alias: toolAlias,
      providers,
      // biome-ignore lint/style/useNamingConvention: api schema
      allow_tools: allowTools,
      // biome-ignore lint/style/useNamingConvention: api schema
      max_tool_call_turns:
        item.max_tool_call_turns === null || item.max_tool_call_turns === undefined
          ? "5"
          : String(item.max_tool_call_turns),
      // biome-ignore lint/style/useNamingConvention: api schema
      timeout_sec:
        item.timeout_sec === null || item.timeout_sec === undefined
          ? ""
          : String(item.timeout_sec),
    });
  });
  return toolConfigs;
}

function cloneMcpProvider(config: LlmMcpProviderConfig): LlmMcpProviderConfig {
  return {
    ...config,
    args: [...(config.args ?? [])],
    env: [...(config.env ?? [])],
  };
}

function parseUiMarkdownNoteNodes(input: unknown): UiMarkdownNoteNode[] {
  if (!Array.isArray(input)) {
    return [];
  }
  const noteNodes: UiMarkdownNoteNode[] = [];
  for (const node of input) {
    if (!isRecord(node)) {
      continue;
    }
    const nodeType = readString(node.node_type) ?? readString(node.type);
    if (nodeType !== "markdown_note") {
      continue;
    }
    const name = readString(node.name) ?? readString(node.id);
    if (!name?.trim()) {
      continue;
    }
    noteNodes.push({
      name: name.trim(),
      markdown: readString(node.markdown) ?? "",
      note_color: readString(node.note_color) ?? undefined,
      note_opacity: readStringNumber(node.note_opacity) ?? undefined,
    });
  }
  return noteNodes;
}

function parseUiToolProfileNodes(input: unknown): Map<string, Record<string, string[]>> {
  const toolProfiles = new Map<string, Record<string, string[]>>();
  if (!Array.isArray(input)) {
    return toolProfiles;
  }
  for (const node of input) {
    if (!isRecord(node)) {
      continue;
    }
    const nodeType = readString(node.node_type) ?? readString(node.type);
    if (nodeType !== "tool_config") {
      continue;
    }
    const name = readString(node.name) ?? readString(node.id);
    if (!name?.trim()) {
      continue;
    }
    const rawToolsByProvider = isRecord(node.tools_by_provider)
      ? node.tools_by_provider
      : null;
    if (!rawToolsByProvider) {
      continue;
    }
    const toolsByProvider = Object.fromEntries(
      Object.entries(rawToolsByProvider).flatMap(([providerName, tools]) => {
        const trimmedName = providerName.trim();
        if (!trimmedName || !Array.isArray(tools)) {
          return [];
        }
        const values = Array.from(
          new Set(tools.map((value) => String(value).trim()).filter(Boolean)),
        );
        return values.length > 0 ? [[trimmedName, values]] : [];
      }),
    );
    toolProfiles.set(name.trim(), toolsByProvider);
  }
  return toolProfiles;
}

function parseAdvancedOpenByNode(input: unknown): Record<string, boolean> {
  if (!isRecord(input)) {
    return {};
  }
  const out: Record<string, boolean> = {};
  for (const [nameRaw, value] of Object.entries(input)) {
    const name = nameRaw.trim();
    if (!name || typeof value !== "boolean") {
      continue;
    }
    out[name] = value;
  }
  return out;
}

type AdvancedOpenConfig = LlmConfig | SamplerConfig | SeedConfig | ValidatorConfig;

function isAdvancedOpenConfig(config: NodeConfig): config is AdvancedOpenConfig {
  return (
    config.kind === "llm" ||
    config.kind === "sampler" ||
    config.kind === "seed" ||
    config.kind === "validator"
  );
}

function applyAdvancedOpen(
  config: NodeConfig,
  advancedOpenByNode: Record<string, boolean>,
): void {
  if (!isAdvancedOpenConfig(config)) {
    return;
  }
  config.advancedOpen = advancedOpenByNode[config.name] === true;
}

function buildToolProfileConfig(
  toolConfig: LlmToolConfig,
  toolConfigsByAlias: Map<string, LlmToolConfig>,
  mcpProvidersByName: Map<string, LlmMcpProviderConfig>,
  fetchedToolsByProfileName: Map<string, Record<string, string[]>>,
  id: string,
): ToolProfileConfig {
  const canonical = toolConfigsByAlias.get(toolConfig.tool_alias) ?? toolConfig;
  return {
    id,
    kind: "tool_config",
    name: canonical.tool_alias,
    // biome-ignore lint/style/useNamingConvention: ui schema
    mcp_providers: canonical.providers
      .map((providerName) => mcpProvidersByName.get(providerName))
      .flatMap((provider) => (provider ? [cloneMcpProvider(provider)] : [])),
    // biome-ignore lint/style/useNamingConvention: ui schema
    fetched_tools_by_provider: fetchedToolsByProfileName.get(canonical.tool_alias) ?? {},
    // biome-ignore lint/style/useNamingConvention: api schema
    allow_tools: [...(canonical.allow_tools ?? [])],
    // biome-ignore lint/style/useNamingConvention: api schema
    max_tool_call_turns: canonical.max_tool_call_turns ?? "5",
    // biome-ignore lint/style/useNamingConvention: api schema
    timeout_sec: canonical.timeout_sec ?? "",
  };
}

export function importRecipePayload(input: string): ImportResult {
  const parsed = parseJson(input);
  if (!parsed.data || !isRecord(parsed.data)) {
    return {
      errors: [parsed.error ?? "Invalid JSON payload."],
      snapshot: null,
    };
  }

  const recipe = (isRecord(parsed.data.recipe)
    ? parsed.data.recipe
    : parsed.data) as RecipeInput;
  const ui = isRecord(parsed.data.ui) ? (parsed.data.ui as UiInput) : null;

  if (!Array.isArray(recipe.columns)) {
    return { errors: ["Recipe must include columns."], snapshot: null };
  }

  const errors: string[] = [];
  const configs: NodeConfig[] = [];
  const processors = parseProcessors(recipe.processors);
  const mcpProvidersByName = parseMcpProviders(recipe.mcp_providers);
  const toolConfigsByAlias = parseToolConfigs(recipe.tool_configs);
  const nameToId = new Map<string, string>();

  let nextId = 1;
  const uiSeedSourceTypeRaw = readString(ui?.seed_source_type);
  const uiSeedSourceType: SeedSourceType | undefined =
    uiSeedSourceTypeRaw === "hf" ||
    uiSeedSourceTypeRaw === "local" ||
    uiSeedSourceTypeRaw === "unstructured"
      ? uiSeedSourceTypeRaw
      : undefined;
  const uiSeedColumns = Array.isArray(ui?.seed_columns)
    ? ui.seed_columns
        .map((value) => (typeof value === "string" ? value.trim() : ""))
        .filter(Boolean)
    : undefined;
  const uiSeedDropColumns = Array.isArray(ui?.seed_drop_columns)
    ? ui.seed_drop_columns
        .map((value) => (typeof value === "string" ? value.trim() : ""))
        .filter(Boolean)
    : undefined;
  const payloadSeedDropColumns = parseSeedDropColumns(recipe.processors);
  const uiSeedPreviewRows = Array.isArray(ui?.seed_preview_rows)
    ? ui.seed_preview_rows
        .filter((row): row is Record<string, unknown> => isRecord(row))
        .map((row) => ({ ...row }))
    : undefined;
  const uiLocalFileName = readString(ui?.local_file_name) ?? undefined;
  const uiUnstructuredFileName =
    readString(ui?.unstructured_file_name) ?? undefined;
  const uiUnstructuredChunkSize = readStringNumber(ui?.unstructured_chunk_size);
  const uiUnstructuredChunkOverlap = readStringNumber(
    ui?.unstructured_chunk_overlap,
  );
  const uiAdvancedOpenByNode = parseAdvancedOpenByNode(ui?.advanced_open_by_node);
  const uiMarkdownNotes = parseUiMarkdownNoteNodes(ui?.nodes);
  const uiToolProfilesByName = parseUiToolProfileNodes(ui?.nodes);

  for (const note of uiMarkdownNotes) {
    const id = `n${nextId}`;
    nextId += 1;
    const config: MarkdownNoteConfig = {
      id,
      kind: "markdown_note",
      name: note.name,
      markdown: note.markdown,
      note_color: note.note_color ?? "#FDE68A",
      note_opacity: note.note_opacity ?? "35",
    };
    if (nameToId.has(config.name)) {
      errors.push(`Duplicate column name: ${config.name}.`);
      continue;
    }
    nameToId.set(config.name, config.id);
    configs.push(config);
  }

  if (recipe.seed_config) {
    const id = `n${nextId}`;
    nextId += 1;
    const seedConfig = parseSeedConfig(recipe.seed_config, id, {
      preferredSourceType: uiSeedSourceType,
      seed_columns: uiSeedColumns,
      seed_drop_columns:
        uiSeedDropColumns && uiSeedDropColumns.length > 0
          ? uiSeedDropColumns
          : payloadSeedDropColumns,
      seed_preview_rows: uiSeedPreviewRows,
      local_file_name: uiLocalFileName,
      unstructured_file_name: uiUnstructuredFileName,
      unstructured_chunk_size: uiUnstructuredChunkSize,
      unstructured_chunk_overlap: uiUnstructuredChunkOverlap,
    });
    if (seedConfig) {
      applyAdvancedOpen(seedConfig, uiAdvancedOpenByNode);
      if (nameToId.has(seedConfig.name)) {
        errors.push(`Duplicate column name: ${seedConfig.name}.`);
      } else {
        nameToId.set(seedConfig.name, seedConfig.id);
      }
      configs.push(seedConfig);
    }
  }

  if (Array.isArray(recipe.model_providers)) {
    recipe.model_providers.forEach((provider, index) => {
      if (!isRecord(provider)) {
        errors.push(`Model provider ${index + 1}: invalid object.`);
        return;
      }
      const name = readString(provider.name);
      if (!name) {
        errors.push(`Model provider ${index + 1}: missing name.`);
        return;
      }
      const id = `n${nextId}`;
      nextId += 1;
      const config = parseModelProvider(provider, name, id);
      if (nameToId.has(config.name)) {
        errors.push(`Duplicate column name: ${config.name}.`);
        return;
      }
      nameToId.set(config.name, config.id);
      configs.push(config);
    });
  }

  if (Array.isArray(recipe.model_configs)) {
    recipe.model_configs.forEach((model, index) => {
      if (!isRecord(model)) {
        errors.push(`Model config ${index + 1}: invalid object.`);
        return;
      }
      const name = readString(model.alias) ?? readString(model.name);
      if (!name) {
        errors.push(`Model config ${index + 1}: missing alias.`);
        return;
      }
      const id = `n${nextId}`;
      nextId += 1;
      const config = parseModelConfig(model, name, id);
      if (nameToId.has(config.name)) {
        errors.push(`Duplicate column name: ${config.name}.`);
        return;
      }
      nameToId.set(config.name, config.id);
      configs.push(config);
    });
  }

  for (const toolConfig of toolConfigsByAlias.values()) {
    const id = `n${nextId}`;
    nextId += 1;
    const config = buildToolProfileConfig(
      toolConfig,
      toolConfigsByAlias,
      mcpProvidersByName,
      uiToolProfilesByName,
      id,
    );
    if (nameToId.has(config.name)) {
      errors.push(`Duplicate column name: ${config.name}.`);
      continue;
    }
    nameToId.set(config.name, config.id);
    configs.push(config);
  }

  recipe.columns.forEach((column, index) => {
    if (!isRecord(column)) {
      errors.push(`Column ${index + 1}: invalid object.`);
      return;
    }
    const id = `n${nextId}`;
    nextId += 1;
    const config = parseColumn(column, id, errors);
    if (!config) {
      return;
    }
    applyAdvancedOpen(config, uiAdvancedOpenByNode);
    if (nameToId.has(config.name)) {
      errors.push(`Duplicate column name: ${config.name}.`);
      return;
    }
    nameToId.set(config.name, config.id);
    configs.push(config);
  });

  if (errors.length > 0) {
    return { errors, snapshot: null };
  }

  const { layouts, auxNodes, edges: uiEdges, layoutDirection } = parseUi(ui);
  const resolvedLayoutDirection = layoutDirection ?? "LR";
  const nodes = buildNodes(configs, layouts);
  const edges = buildEdges(
    configs,
    nameToId,
    uiEdges,
    resolvedLayoutDirection,
  );
  const auxNodePositions = Object.fromEntries(
    auxNodes.flatMap((item) => {
      const llmId = nameToId.get(item.llm);
      if (!llmId) {
        return [];
      }
      return [[`aux-${llmId}-${item.key}`, { x: item.x, y: item.y }]];
    }),
  );

  const maxY = nodes.reduce(
    (acc, node) => Math.max(acc, node.position.y),
    0,
  );

  return {
    errors: [],
    snapshot: {
      configs: Object.fromEntries(configs.map((config) => [config.id, config])),
      nodes,
      edges,
      auxNodePositions,
      processors,
      layoutDirection: resolvedLayoutDirection,
      nextId,
      nextY: maxY + 140,
    },
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/import/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export { importRecipePayload } from "./importer";
export type { RecipeSnapshot, ImportResult } from "./types";


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/import/parsers/expression-parser.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  ExpressionConfig,
  ExpressionDtype,
} from "../../../types";
import { readString } from "../helpers";

const EXPRESSION_DTYPES: ExpressionDtype[] = ["str", "int", "float", "bool"];

export function parseExpression(
  column: Record<string, unknown>,
  name: string,
  id: string,
): ExpressionConfig {
  const dtype = readString(column.dtype);
  const normalized = EXPRESSION_DTYPES.includes(dtype as ExpressionDtype)
    ? (dtype as ExpressionDtype)
    : "str";
  return {
    id,
    kind: "expression",
    name,
    drop: column.drop === true,
    expr: readString(column.expr) ?? "",
    dtype: normalized,
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/import/parsers/llm-parser.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  LlmConfig,
  Score,
  ScoreOption,
} from "../../../types";
import {
  isRecord,
  normalizeOutputFormat,
  readString,
} from "../helpers";

function parseTraceMode(value: unknown): LlmConfig["with_trace"] {
  const traceRaw = readString(value) ?? "none";
  if (traceRaw === "last_message" || traceRaw === "all_messages") {
    return traceRaw;
  }
  return "none";
}

export function parseLlm(
  column: Record<string, unknown>,
  name: string,
  id: string,
): LlmConfig {
  const columnType = readString(column.column_type) ?? "llm-text";
  let llmType: LlmConfig["llm_type"] = "text";
  if (columnType === "llm-structured") {
    llmType = "structured";
  } else if (columnType === "llm-code") {
    llmType = "code";
  } else if (columnType === "llm-judge") {
    llmType = "judge";
  }

  const scores: Score[] =
    columnType === "llm-judge" && Array.isArray(column.scores)
      ? column.scores
          .filter((score) => isRecord(score))
          .map((score) => {
            const options: ScoreOption[] = [];
            const rawOptions = isRecord(score.options) ? score.options : {};
            for (const [key, value] of Object.entries(rawOptions)) {
              const description =
                typeof value === "string" ? value : JSON.stringify(value);
              options.push({ value: String(key), description });
            }
            return {
              name: readString(score.name) ?? "",
              description: readString(score.description) ?? "",
              options,
            };
          })
      : [];

  let imageContext: LlmConfig["image_context"] = {
    enabled: false,
    // biome-ignore lint/style/useNamingConvention: api schema
    column_name: "",
  };
  if (Array.isArray(column.multi_modal_context)) {
    const first = column.multi_modal_context.find((entry) => isRecord(entry));
    if (first && isRecord(first)) {
      const modality = readString(first.modality);
      const columnName = readString(first.column_name) ?? "";
      if (modality === "image" && columnName) {
        imageContext = {
          enabled: true,
          // biome-ignore lint/style/useNamingConvention: api schema
          column_name: columnName,
        };
      }
    }
  }

  const withTrace = parseTraceMode(column.with_trace);
  const extractReasoningContent = column.extract_reasoning_content === true;

  return {
    id,
    kind: "llm",
    // biome-ignore lint/style/useNamingConvention: api schema
    llm_type: llmType,
    name,
    drop: column.drop === true,
    // biome-ignore lint/style/useNamingConvention: api schema
    model_alias: readString(column.model_alias) ?? "",
    prompt: readString(column.prompt) ?? "",
    // biome-ignore lint/style/useNamingConvention: api schema
    system_prompt: readString(column.system_prompt) ?? "",
    // biome-ignore lint/style/useNamingConvention: api schema
    code_lang: readString(column.code_lang) ?? "",
    // biome-ignore lint/style/useNamingConvention: api schema
    output_format: normalizeOutputFormat(column.output_format),
    // biome-ignore lint/style/useNamingConvention: api schema
    tool_alias: readString(column.tool_alias) ?? "",
    // biome-ignore lint/style/useNamingConvention: api schema
    with_trace: withTrace,
    // biome-ignore lint/style/useNamingConvention: api schema
    extract_reasoning_content: extractReasoningContent,
    scores: llmType === "judge" ? scores : undefined,
    // biome-ignore lint/style/useNamingConvention: ui schema
    image_context: imageContext,
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/import/parsers/model-parser.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  ModelConfig,
  ModelProviderConfig,
} from "../../../types";
import {
  isRecord,
  readNumberString,
  readString,
} from "../helpers";

export function parseModelProvider(
  provider: Record<string, unknown>,
  name: string,
  id: string,
): ModelProviderConfig {
  return {
    id,
    kind: "model_provider",
    name,
    endpoint: readString(provider.endpoint) ?? "",
    // biome-ignore lint/style/useNamingConvention: api schema
    provider_type: readString(provider.provider_type) ?? "openai",
    // biome-ignore lint/style/useNamingConvention: api schema
    api_key_env: readString(provider.api_key_env) ?? "",
    // biome-ignore lint/style/useNamingConvention: api schema
    api_key: readString(provider.api_key) ?? "",
    // biome-ignore lint/style/useNamingConvention: api schema
    extra_headers: isRecord(provider.extra_headers)
      ? JSON.stringify(provider.extra_headers, null, 2)
      : "",
    // biome-ignore lint/style/useNamingConvention: api schema
    extra_body: isRecord(provider.extra_body)
      ? JSON.stringify(provider.extra_body, null, 2)
      : "",
  };
}

export function parseModelConfig(
  model: Record<string, unknown>,
  name: string,
  id: string,
): ModelConfig {
  const inference = isRecord(model.inference_parameters)
    ? (model.inference_parameters as Record<string, unknown>)
    : {};
  return {
    id,
    kind: "model_config",
    name,
    model: readString(model.model) ?? "",
    provider: readString(model.provider) ?? "",
    // biome-ignore lint/style/useNamingConvention: api schema
    inference_temperature: readNumberString(inference.temperature),
    // biome-ignore lint/style/useNamingConvention: api schema
    inference_top_p: readNumberString(inference.top_p),
    // biome-ignore lint/style/useNamingConvention: api schema
    inference_max_tokens: readNumberString(inference.max_tokens),
    // biome-ignore lint/style/useNamingConvention: api schema
    inference_timeout: readNumberString(inference.timeout),
    // biome-ignore lint/style/useNamingConvention: api schema
    inference_extra_body: isRecord(inference.extra_body)
      ? JSON.stringify(inference.extra_body, null, 2)
      : "",
    // biome-ignore lint/style/useNamingConvention: api schema
    skip_health_check:
      typeof model.skip_health_check === "boolean"
        ? model.skip_health_check
        : false,
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/import/parsers/sampler-parser.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  SamplerConfig,
  SamplerType,
} from "../../../types";
import {
  isRecord,
  readNumberString,
  readString,
} from "../helpers";

const SAMPLER_TYPES: SamplerType[] = [
  "category",
  "subcategory",
  "uniform",
  "gaussian",
  "bernoulli",
  "datetime",
  "timedelta",
  "uuid",
  "person",
  "person_from_faker",
];

const TIMEDELTA_UNITS = new Set(["D", "h", "m", "s"]);

function parseCategoryConditionalParams(
  column: Record<string, unknown>,
): SamplerConfig["conditional_params"] {
  if (!isRecord(column.conditional_params)) {
    return undefined;
  }
  const conditional: NonNullable<SamplerConfig["conditional_params"]> = {};
  for (const [condition, rawParams] of Object.entries(column.conditional_params)) {
    if (!isRecord(rawParams)) {
      continue;
    }
    if (readString(rawParams.sampler_type) !== "category") {
      continue;
    }
    const values = Array.isArray(rawParams.values)
      ? rawParams.values.filter((item) => typeof item === "string")
      : [];
    if (values.length === 0) {
      continue;
    }
    const weights = Array.isArray(rawParams.weights)
      ? rawParams.weights.map((item) => (typeof item === "number" ? item : null))
      : undefined;
    conditional[condition] = {
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "category",
      values,
      weights,
    };
  }
  return Object.keys(conditional).length > 0 ? conditional : undefined;
}

export function parseSampler(
  column: Record<string, unknown>,
  name: string,
  id: string,
  errors: string[],
): SamplerConfig | null {
  const drop = column.drop === true;
  const samplerType = readString(column.sampler_type);
  if (!samplerType || !SAMPLER_TYPES.includes(samplerType as SamplerType)) {
    errors.push(`Sampler ${name}: unsupported sampler_type.`);
    return null;
  }
  const convertTo = readString(column.convert_to);
  const normalizedConvertTo =
    convertTo && ["float", "int", "str"].includes(convertTo)
      ? (convertTo as "float" | "int" | "str")
      : undefined;
  const params =
    typeof column.params === "object" && column.params
      ? (column.params as Record<string, unknown>)
      : {};

  if (samplerType === "category") {
    const values = Array.isArray(params.values)
      ? params.values.filter((item) => typeof item === "string")
      : [];
    const weights = Array.isArray(params.weights)
      ? params.weights.map((item) => (typeof item === "number" ? item : null))
      : [];
    return {
      id,
      kind: "sampler",
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "category",
      name,
      drop,
      // biome-ignore lint/style/useNamingConvention: api schema
      convert_to: normalizedConvertTo,
      values,
      weights,
      // biome-ignore lint/style/useNamingConvention: api schema
      conditional_params: parseCategoryConditionalParams(column),
    };
  }

  if (samplerType === "subcategory") {
    const mapping: Record<string, string[]> = {};
    if (params.values && typeof params.values === "object") {
      for (const [key, value] of Object.entries(params.values)) {
        if (Array.isArray(value)) {
          mapping[key] = value.filter((item) => typeof item === "string");
        }
      }
    }
    return {
      id,
      kind: "sampler",
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "subcategory",
      name,
      drop,
      // biome-ignore lint/style/useNamingConvention: api schema
      convert_to: normalizedConvertTo,
      // biome-ignore lint/style/useNamingConvention: api schema
      subcategory_parent: readString(params.category) ?? "",
      // biome-ignore lint/style/useNamingConvention: api schema
      subcategory_mapping: mapping,
    };
  }

  if (samplerType === "uniform") {
    return {
      id,
      kind: "sampler",
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "uniform",
      name,
      drop,
      // biome-ignore lint/style/useNamingConvention: api schema
      convert_to: normalizedConvertTo,
      low: readNumberString(params.low),
      high: readNumberString(params.high),
    };
  }

  if (samplerType === "gaussian") {
    return {
      id,
      kind: "sampler",
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "gaussian",
      name,
      drop,
      // biome-ignore lint/style/useNamingConvention: api schema
      convert_to: normalizedConvertTo,
      mean: readNumberString(params.mean),
      std: readNumberString(params.std),
    };
  }

  if (samplerType === "bernoulli") {
    return {
      id,
      kind: "sampler",
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "bernoulli",
      name,
      drop,
      // biome-ignore lint/style/useNamingConvention: api schema
      convert_to: normalizedConvertTo,
      p: readNumberString(params.p),
    };
  }

  if (samplerType === "datetime") {
    return {
      id,
      kind: "sampler",
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "datetime",
      name,
      drop,
      // biome-ignore lint/style/useNamingConvention: api schema
      convert_to: normalizedConvertTo,
      // biome-ignore lint/style/useNamingConvention: api schema
      datetime_start: readString(params.start) ?? "",
      // biome-ignore lint/style/useNamingConvention: api schema
      datetime_end: readString(params.end) ?? "",
      // biome-ignore lint/style/useNamingConvention: api schema
      datetime_unit: readString(params.unit) ?? "",
    };
  }

  if (samplerType === "timedelta") {
    const rawUnit = readString(params.unit);
    const unit =
      rawUnit && TIMEDELTA_UNITS.has(rawUnit)
        ? (rawUnit as "D" | "h" | "m" | "s")
        : "D";
    return {
      id,
      kind: "sampler",
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "timedelta",
      name,
      drop,
      // biome-ignore lint/style/useNamingConvention: api schema
      convert_to: normalizedConvertTo,
      // biome-ignore lint/style/useNamingConvention: api schema
      dt_min: readNumberString(params.dt_min),
      // biome-ignore lint/style/useNamingConvention: api schema
      dt_max: readNumberString(params.dt_max),
      // biome-ignore lint/style/useNamingConvention: api schema
      reference_column_name: readString(params.reference_column_name) ?? "",
      // biome-ignore lint/style/useNamingConvention: api schema
      timedelta_unit: unit,
    };
  }

  if (samplerType === "uuid") {
    return {
      id,
      kind: "sampler",
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "uuid",
      name,
      drop,
      // biome-ignore lint/style/useNamingConvention: api schema
      convert_to: normalizedConvertTo,
      // biome-ignore lint/style/useNamingConvention: api schema
      uuid_format: readString(params.format) ?? "",
    };
  }

  const ageRange =
    Array.isArray(params.age_range) &&
    params.age_range.length === 2 &&
    params.age_range.every((item) => typeof item === "number")
      ? `${params.age_range[0]}-${params.age_range[1]}`
      : readString(params.age_range) ?? "";

  const base: SamplerConfig = {
    id,
    kind: "sampler",
    name,
    drop,
    // biome-ignore lint/style/useNamingConvention: api schema
    sampler_type: samplerType as SamplerType,
    // biome-ignore lint/style/useNamingConvention: api schema
    convert_to: normalizedConvertTo,
    // biome-ignore lint/style/useNamingConvention: api schema
    person_locale: readString(params.locale) ?? "",
    // biome-ignore lint/style/useNamingConvention: api schema
    person_sex: readString(params.sex) ?? "",
    // biome-ignore lint/style/useNamingConvention: api schema
    person_age_range: ageRange,
    // biome-ignore lint/style/useNamingConvention: api schema
    person_city: readString(params.city) ?? "",
  };

  if (samplerType === "person") {
    return {
      ...base,
      // biome-ignore lint/style/useNamingConvention: api schema
      person_with_synthetic_personas:
        typeof params.with_synthetic_personas === "boolean"
          ? params.with_synthetic_personas
          : false,
    };
  }

  return base;
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/import/parsers/seed-config-parser.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  SeedConfig,
  SeedSamplingStrategy,
  SeedSelectionType,
  SeedSourceType,
} from "../../../types";
import { isRecord, readNumberString, readString } from "../helpers";

function normalizeSampling(value: unknown): SeedSamplingStrategy {
  const raw = readString(value);
  if (raw === "shuffle") return "shuffle";
  return "ordered";
}

function makeDefaultSeedConfig(id: string): SeedConfig {
  return {
    id,
    kind: "seed",
    name: "seed",
    drop: false,
    seed_drop_columns: [],
    seed_source_type: "hf",
    hf_repo_id: "",
    hf_subset: "",
    hf_split: "",
    hf_path: "",
    hf_token: "",
    hf_endpoint: "https://huggingface.co",
    local_file_name: "",
    unstructured_file_name: "",
    seed_preview_rows: [],
    unstructured_chunk_size: "1200",
    unstructured_chunk_overlap: "200",
    seed_splits: [],
    seed_globs_by_split: {},
    seed_columns: [],
    sampling_strategy: "ordered",
    selection_type: "none",
    selection_start: "0",
    selection_end: "10",
    selection_index: "0",
    selection_num_partitions: "1",
  };
}

function inferRepoIdFromSeedPath(path: string): string {
  const trimmed = path.trim();
  if (!trimmed) return "";
  const parts = trimmed.split("/").filter(Boolean);
  if (parts.length >= 3 && parts[0] === "datasets") {
    return `${parts[1]}/${parts[2]}`;
  }
  if (parts.length >= 2) {
    return `${parts[0]}/${parts[1]}`;
  }
  return "";
}

function parseSeedSettings(seedConfigRaw: unknown): Partial<SeedConfig> {
  if (!isRecord(seedConfigRaw)) {
    return {};
  }

  const sampling_strategy = normalizeSampling(seedConfigRaw.sampling_strategy);

  let seed_source_type: SeedSourceType = "hf";
  let hf_path = "";
  let hf_token = "";
  let hf_endpoint = "https://huggingface.co";
  let hf_repo_id = "";
  let local_file_name = "";
  let unstructured_file_name = "";
  let unstructured_chunk_size = "1200";
  let unstructured_chunk_overlap = "200";
  const sourceRaw = seedConfigRaw.source;
  if (isRecord(sourceRaw)) {
    const seedType = readString(sourceRaw.seed_type);
    const sourcePath = readString(sourceRaw.path) ?? "";
    if (seedType === "hf") {
      seed_source_type = "hf";
      hf_path = sourcePath;
      hf_token = readString(sourceRaw.token) ?? "";
      hf_endpoint = readString(sourceRaw.endpoint) ?? hf_endpoint;
      hf_repo_id = inferRepoIdFromSeedPath(hf_path);
    } else if (seedType === "local") {
      seed_source_type = "local";
      hf_path = sourcePath;
      local_file_name = sourcePath.split("/").pop() ?? sourcePath;
    } else if (seedType === "unstructured") {
      seed_source_type = "unstructured";
      hf_path = sourcePath;
      unstructured_file_name = sourcePath.split("/").pop() ?? sourcePath;
      unstructured_chunk_size = readNumberString(sourceRaw.chunk_size) || "1200";
      unstructured_chunk_overlap = readNumberString(sourceRaw.chunk_overlap) || "200";
    }
  }

  let selection_type: SeedSelectionType = "none";
  let selection_start = "0";
  let selection_end = "10";
  let selection_index = "0";
  let selection_num_partitions = "1";
  const selectionRaw = seedConfigRaw.selection_strategy;
  if (isRecord(selectionRaw)) {
    if (
      typeof selectionRaw.start === "number" &&
      typeof selectionRaw.end === "number"
    ) {
      selection_type = "index_range";
      selection_start = String(selectionRaw.start);
      selection_end = String(selectionRaw.end);
    } else if (
      typeof selectionRaw.index === "number" &&
      typeof selectionRaw.num_partitions === "number"
    ) {
      selection_type = "partition_block";
      selection_index = String(selectionRaw.index);
      selection_num_partitions = String(selectionRaw.num_partitions);
    }
  }

  return {
    seed_source_type,
    hf_repo_id,
    hf_path,
    hf_token,
    hf_endpoint,
    local_file_name,
    unstructured_file_name,
    unstructured_chunk_size,
    unstructured_chunk_overlap,
    sampling_strategy,
    selection_type,
    selection_start,
    selection_end,
    selection_index,
    selection_num_partitions,
  };
}

export function parseSeedConfig(
  seedConfigRaw: unknown,
  id: string,
  options?: {
    preferredSourceType?: SeedSourceType;
    seed_columns?: string[];
    seed_drop_columns?: string[];
    seed_preview_rows?: Record<string, unknown>[];
    local_file_name?: string;
    unstructured_file_name?: string;
    unstructured_chunk_size?: string;
    unstructured_chunk_overlap?: string;
  },
): SeedConfig | null {
  if (!seedConfigRaw) {
    return null;
  }
  const parsed = parseSeedSettings(seedConfigRaw);
  let sourceType: SeedSourceType = "hf";
  if (parsed.seed_source_type === "hf") {
    sourceType = "hf";
  } else if (options?.preferredSourceType) {
    sourceType = options.preferredSourceType;
  } else if (parsed.seed_source_type) {
    sourceType = parsed.seed_source_type;
  }
  return {
    ...makeDefaultSeedConfig(id),
    ...parsed, // payload-only fields override ui defaults
    seed_source_type: sourceType,
    ...(options?.seed_columns ? { seed_columns: options.seed_columns } : {}),
    ...(options?.seed_drop_columns
      ? { seed_drop_columns: options.seed_drop_columns }
      : {}),
    ...(options?.seed_preview_rows
      ? { seed_preview_rows: options.seed_preview_rows }
      : {}),
    ...(options?.local_file_name !== undefined
      ? { local_file_name: options.local_file_name }
      : {}),
    ...(options?.unstructured_file_name !== undefined
      ? { unstructured_file_name: options.unstructured_file_name }
      : {}),
    ...(options?.unstructured_chunk_size !== undefined
      ? { unstructured_chunk_size: options.unstructured_chunk_size }
      : {}),
    ...(options?.unstructured_chunk_overlap !== undefined
      ? { unstructured_chunk_overlap: options.unstructured_chunk_overlap }
      : {}),
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/import/parsers/validator-parser.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ValidatorConfig } from "../../../types";
import { readNumberString } from "../helpers";
import { normalizeValidatorCodeLang } from "../../validators/code-lang";
import { normalizeOxcCodeShape } from "../../validators/oxc-code-shape";
import { normalizeOxcValidationMode } from "../../validators/oxc-mode";

const OXC_VALIDATION_FN_MARKER = "unsloth_oxc_validator";

function parseOxcValidationMarker(
  validationFunctionRaw: string,
): { codeLang: string; mode: string; codeShape: string } {
  const marker = `${OXC_VALIDATION_FN_MARKER}:`;
  if (!validationFunctionRaw.startsWith(marker)) {
    return { codeLang: "", mode: "syntax", codeShape: "auto" };
  }
  const parts = validationFunctionRaw
    .slice(marker.length)
    .split(":")
    .map((value) => value.trim())
    .filter(Boolean);
  if (parts.length < 2) {
    return { codeLang: "", mode: "syntax", codeShape: "auto" };
  }
  return {
    codeLang: parts[0],
    mode: parts[1],
    codeShape: parts[2] ?? "auto",
  };
}

export function parseValidator(
  column: Record<string, unknown>,
  name: string,
  id: string,
): ValidatorConfig {
  const targetColumns = Array.isArray(column.target_columns)
    ? column.target_columns
        .filter((value): value is string => typeof value === "string")
        .map((value) => value.trim())
        .filter(Boolean)
    : [];
  const params =
    column.validator_params && typeof column.validator_params === "object"
      ? (column.validator_params as Record<string, unknown>)
      : {};
  const validationFunctionRaw =
    typeof params.validation_function === "string"
      ? params.validation_function.trim()
      : "";
  const isOxc =
    String(column.validator_type ?? "").trim() === "local_callable" &&
    validationFunctionRaw.startsWith(OXC_VALIDATION_FN_MARKER);
  const marker = isOxc
    ? parseOxcValidationMarker(validationFunctionRaw)
    : { codeLang: "", mode: "syntax", codeShape: "auto" };
  return {
    id,
    kind: "validator",
    name,
    drop: column.drop === true,
    // biome-ignore lint/style/useNamingConvention: api schema
    target_columns: targetColumns,
    validator_type: isOxc ? "oxc" : "code",
    // biome-ignore lint/style/useNamingConvention: api schema
    code_lang: normalizeValidatorCodeLang(
      isOxc ? marker.codeLang || "javascript" : params.code_lang,
    ),
    oxc_validation_mode: isOxc
      ? normalizeOxcValidationMode(marker.mode)
      : "syntax",
    oxc_code_shape: isOxc
      ? normalizeOxcCodeShape(marker.codeShape)
      : "auto",
    batch_size: readNumberString(column.batch_size) || "10",
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/import/parsers.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { NodeConfig } from "../../types";
import { readString } from "./helpers";
import { parseExpression } from "./parsers/expression-parser";
import { parseLlm } from "./parsers/llm-parser";
export { parseModelConfig, parseModelProvider } from "./parsers/model-parser";
import { parseSampler } from "./parsers/sampler-parser";
import { parseValidator } from "./parsers/validator-parser";

type ColumnParser = (
  column: Record<string, unknown>,
  name: string,
  id: string,
  errors: string[],
) => NodeConfig | null;

const COLUMN_PARSERS: Record<string, ColumnParser> = {
  sampler: (column, name, id, errors) =>
    parseSampler(column, name, id, errors),
  expression: (column, name, id) => parseExpression(column, name, id),
  "llm-text": (column, name, id) => parseLlm(column, name, id),
  "llm-structured": (column, name, id) => parseLlm(column, name, id),
  "llm-code": (column, name, id) => parseLlm(column, name, id),
  "llm-judge": (column, name, id) => parseLlm(column, name, id),
  validation: (column, name, id) => parseValidator(column, name, id),
};

export function parseColumn(
  column: Record<string, unknown>,
  id: string,
  errors: string[],
): NodeConfig | null {
  const name = readString(column.name);
  if (!name) {
    errors.push("Column missing name.");
    return null;
  }
  const columnType = readString(column.column_type);
  const parser = columnType ? COLUMN_PARSERS[columnType] : null;
  if (parser) {
    return parser(column, name, id, errors);
  }
  errors.push(`Column ${name}: unsupported column_type.`);
  return null;
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/import/types.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { Edge, XYPosition } from "@xyflow/react";
import type {
  LayoutDirection,
  RecipeNode,
  RecipeProcessorConfig,
  NodeConfig,
} from "../../types";

export type RecipeSnapshot = {
  configs: Record<string, NodeConfig>;
  nodes: RecipeNode[];
  edges: Edge[];
  auxNodePositions: Record<string, XYPosition>;
  processors: RecipeProcessorConfig[];
  layoutDirection: LayoutDirection;
  nextId: number;
  nextY: number;
};

export type ImportResult = {
  errors: string[];
  snapshot: RecipeSnapshot | null;
};


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/import/ui.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { RecipeNode, NodeConfig } from "../../types";
import { DEFAULT_NODE_WIDTH } from "../../constants";
import { nodeDataFromConfig } from "../index";
import { normalizeRecipeHandleId } from "../handles";
import { isRecord, readString } from "./helpers";

type UiInput = {
  nodes?: unknown;
  edges?: unknown;
  aux_nodes?: unknown;
  layout_direction?: unknown;
  layoutDirection?: unknown;
};

type ParsedAuxNode = {
  llm: string;
  key: string;
  x: number;
  y: number;
};

export function parseUi(
  ui: UiInput | null,
): {
  layouts: Map<string, { x: number; y: number; width?: number }>;
  auxNodes: ParsedAuxNode[];
  edges: Array<{
    from: string;
    to: string;
    type?: string;
    sourceHandle?: string;
    targetHandle?: string;
  }> | null;
  layoutDirection: "LR" | "TB" | null;
} {
  const layouts = new Map<string, { x: number; y: number; width?: number }>();
  const auxNodes: ParsedAuxNode[] = [];
  const edges: Array<{
    from: string;
    to: string;
    type?: string;
    sourceHandle?: string;
    targetHandle?: string;
  }> = [];
  if (ui && Array.isArray(ui.nodes)) {
    for (const node of ui.nodes) {
      if (isRecord(node)) {
        const id = readString(node.id);
        const x = typeof node.x === "number" ? node.x : null;
        const y = typeof node.y === "number" ? node.y : null;
        const width = typeof node.width === "number" ? node.width : null;
        if (id && x !== null && y !== null) {
          layouts.set(id, {
            x,
            y,
            ...(width && width > 0 ? { width } : {}),
          });
        }
      }
    }
  }
  if (ui && Array.isArray(ui.edges)) {
    for (const edge of ui.edges) {
      if (isRecord(edge)) {
        const from = readString(edge.from);
        const to = readString(edge.to);
        if (from && to) {
          const sourceHandle = normalizeRecipeHandleId(
            readString(edge.source_handle) ?? readString(edge.sourceHandle),
          );
          const targetHandle = normalizeRecipeHandleId(
            readString(edge.target_handle) ?? readString(edge.targetHandle),
          );
          edges.push({
            from,
            to,
            type: readString(edge.type) ?? undefined,
            sourceHandle: sourceHandle ?? undefined,
            targetHandle: targetHandle ?? undefined,
          });
        }
      }
    }
  }
  if (ui && Array.isArray(ui.aux_nodes)) {
    for (const node of ui.aux_nodes) {
      if (!isRecord(node)) {
        continue;
      }
      const llm = readString(node.llm);
      const key = readString(node.key);
      const x = typeof node.x === "number" ? node.x : null;
      const y = typeof node.y === "number" ? node.y : null;
      if (!(llm && key && x !== null && y !== null)) {
        continue;
      }
      auxNodes.push({ llm, key, x, y });
    }
  }
  const layoutDirectionRaw =
    readString(ui?.layout_direction) ?? readString(ui?.layoutDirection);
  const layoutDirection =
    layoutDirectionRaw === "TB"
      ? "TB"
      : layoutDirectionRaw === "LR"
        ? "LR"
        : null;

  return {
    layouts,
    auxNodes,
    edges: edges.length > 0 ? edges : null,
    layoutDirection,
  };
}

export function buildNodes(
  configs: NodeConfig[],
  layouts: Map<string, { x: number; y: number; width?: number }>,
): RecipeNode[] {
  return configs.map((config, index) => {
    const fallbackLayout: { x: number; y: number; width?: number } = {
      x: 0,
      y: index * 140,
    };
    const layout =
      layouts.get(config.name) ?? fallbackLayout;
    return {
      id: config.id,
      type: "builder",
      position: { x: layout.x, y: layout.y },
      data: nodeDataFromConfig(config),
      style: { width: layout.width ?? DEFAULT_NODE_WIDTH },
    };
  });
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export {
  makeExpressionConfig,
  makeLlmConfig,
  makeMarkdownNoteConfig,
  makeModelConfig,
  makeModelProviderConfig,
  makeSamplerConfig,
  makeSeedConfig,
  makeToolProfileConfig,
  makeValidatorConfig,
} from "./config-factories";
export {
  labelForExpression,
  labelForLlm,
  labelForSampler,
} from "./config-labels";
export {
  isCategoryConfig,
  isExpressionConfig,
  isLlmConfig,
  isSamplerConfig,
  isSubcategoryConfig,
  isValidatorConfig,
} from "./config-type-guards";
export { getGraphWarnings, type GraphWarning } from "./graph-warnings";
export { nextName } from "./naming";
export { nodeDataFromConfig } from "./node-data";
export { getConfigErrors } from "./validation";


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/layout.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import dagre from "@dagrejs/dagre";
import type { Edge, Node } from "@xyflow/react";
import { DEFAULT_NODE_HEIGHT, DEFAULT_NODE_WIDTH } from "../constants";
import { INFRA_NODE_KINDS, type LayoutDirection, type NodeConfig } from "../types";
import { readNodeHeight, readNodeWidth } from "./rf-node-dimensions";

type LayoutOptions = {
  direction?: LayoutDirection;
  nodesep?: number;
  ranksep?: number;
  edgesep?: number;
  nodeWidth?: number;
  nodeHeight?: number;
  configs?: Record<string, NodeConfig>;
};

/**
 * Pipeline rank order used to enforce a logical flow even for disconnected nodes.
 * Lower rank = earlier in the pipeline.
 */
function getPipelineRank(config: NodeConfig | undefined): number {
  if (!config) {
    return 2;
  }
  switch (config.kind) {
    case "seed":
      return 0;
    case "sampler":
      return 1;
    case "expression":
      return 2;
    case "llm":
      return 3;
    case "validator":
      return 4;
    default:
      return 2;
  }
}

function isInfraNode(
  nodeId: string,
  configs: Record<string, NodeConfig>,
): boolean {
  const config = configs[nodeId];
  return config ? INFRA_NODE_KINDS.has(config.kind) : false;
}

function isAuxNode(nodeId: string): boolean {
  return nodeId.startsWith("aux-");
}

function getEdgeWeight(edgeType: string | undefined): number {
  if (edgeType === "phantom") {
    return 0;
  }
  if (edgeType === "semantic") {
    return 10;
  }
  return 3;
}

/**
 * Build phantom edges between disconnected data-pipeline nodes so dagre
 * respects the pipeline rank order even when blocks aren't wired together.
 *
 * Groups nodes by rank, then inserts invisible edges from the last node of
 * rank N to the first node of rank N+1 when no real edge already connects them.
 */
function buildPhantomEdges(
  nodes: Node[],
  edges: Edge[],
  configs: Record<string, NodeConfig>,
): Edge[] {
  // Group nodes by rank
  const byRank = new Map<number, string[]>();
  for (const node of nodes) {
    const rank = getPipelineRank(configs[node.id]);
    const list = byRank.get(rank) ?? [];
    list.push(node.id);
    byRank.set(rank, list);
  }

  const ranks = Array.from(byRank.keys()).sort((a, b) => a - b);
  const phantoms: Edge[] = [];

  for (let i = 0; i < ranks.length - 1; i++) {
    const currentIds = byRank.get(ranks[i]) ?? [];
    const nextIds = byRank.get(ranks[i + 1]) ?? [];
    if (currentIds.length === 0 || nextIds.length === 0) {
      continue;
    }

    // Check if any real edge already connects these rank groups
    const hasRealEdge = edges.some(
      (e) => currentIds.includes(e.source) && nextIds.includes(e.target),
    );
    if (hasRealEdge) {
      continue;
    }

    // Insert one phantom edge from last node in current rank to first in next
    phantoms.push({
      id: `phantom-${ranks[i]}-${ranks[i + 1]}`,
      source: currentIds[currentIds.length - 1],
      target: nextIds[0],
      type: "phantom",
    });
  }

  return phantoms;
}

export function getLayoutedElements<TNode extends Node>(
  nodes: TNode[],
  edges: Edge[],
  options: LayoutOptions = {},
): { nodes: TNode[]; edges: Edge[] } {
  const {
    direction = "LR",
    nodesep = 80,
    ranksep = 80,
    edgesep = 28,
    nodeWidth = DEFAULT_NODE_WIDTH,
    nodeHeight = DEFAULT_NODE_HEIGHT,
    configs,
  } = options;

  // When configs are provided, filter out infra and aux nodes from dagre
  const hasConfigs = configs && Object.keys(configs).length > 0;
  const dataNodes = hasConfigs
    ? nodes.filter((n) => !(isInfraNode(n.id, configs) || isAuxNode(n.id)))
    : nodes;
  const dataEdges = hasConfigs
    ? edges.filter(
        (e) =>
          !(
            isInfraNode(e.source, configs) ||
            isInfraNode(e.target, configs) ||
            isAuxNode(e.source) ||
            isAuxNode(e.target)
          ),
      )
    : edges;

  // Build phantom edges to enforce pipeline rank ordering for disconnected nodes
  const phantomEdges = hasConfigs
    ? buildPhantomEdges(dataNodes, dataEdges, configs)
    : [];

  const graph = new dagre.graphlib.Graph();
  graph.setDefaultEdgeLabel(() => ({}));
  graph.setGraph({
    rankdir: direction,
    nodesep,
    ranksep,
    edgesep,
    ranker: "network-simplex",
  });

  for (const node of dataNodes) {
    const width = readNodeWidth(node) ?? nodeWidth;
    const height = readNodeHeight(node) ?? nodeHeight;
    graph.setNode(node.id, { width, height });
  }

  const allDagreEdges = [...dataEdges, ...phantomEdges];
  for (const edge of allDagreEdges) {
    const weight = getEdgeWeight(edge.type);
    graph.setEdge(edge.source, edge.target, { minlen: 1, weight });
  }

  dagre.layout(graph);

  // Build position map from dagre results (data nodes only)
  const layoutedPositions = new Map<string, { x: number; y: number }>();
  for (const node of dataNodes) {
    const pos = graph.node(node.id);
    const width = readNodeWidth(node) ?? nodeWidth;
    const height = readNodeHeight(node) ?? nodeHeight;
    layoutedPositions.set(node.id, {
      x: pos.x - width / 2,
      y: pos.y - height / 2,
    });
  }

  // Apply positions: data nodes get dagre positions, infra/aux keep original
  const layoutedNodes = nodes.map((node) => {
    const position = layoutedPositions.get(node.id);
    if (!position) {
      return node;
    }
    return {
      ...node,
      position,
    };
  });

  return { nodes: layoutedNodes, edges };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/naming.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { NodeConfig } from "../types";

export function nextName(existing: NodeConfig[], prefix: string): string {
  const counts = existing
    .map((item) => item.name)
    .filter((name) => name.startsWith(prefix))
    .map((name) => {
      const suffix = name.slice(prefix.length);
      const num = Number.parseInt(suffix.replace("_", ""), 10);
      return Number.isNaN(num) ? 0 : num;
    });
  const next = counts.length > 0 ? Math.max(...counts) + 1 : 1;
  return `${prefix}_${next}`;
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/node-data.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { RecipeNodeData, LayoutDirection, NodeConfig } from "../types";
import {
  labelForExpression,
  labelForLlm,
  labelForSampler,
} from "./config-labels";

export function nodeDataFromConfig(
  config: NodeConfig,
  layoutDirection: LayoutDirection = "LR",
): RecipeNodeData {
  if (config.kind === "sampler") {
    return {
      title: "Generated field",
      kind: "sampler",
      subtype: labelForSampler(config.sampler_type),
      blockType: config.sampler_type,
      name: config.name,
      layoutDirection,
    };
  }
  if (config.kind === "expression") {
    return {
      title: "Formula",
      kind: "expression",
      subtype: labelForExpression(config.dtype),
      blockType: "expression",
      name: config.name,
      layoutDirection,
    };
  }
  if (config.kind === "validator") {
    const isOxc = config.validator_type === "oxc";
    const isSql = config.code_lang.startsWith("sql:");
    let subtype = "Python";
    let blockType: RecipeNodeData["blockType"] = "validator_python";
    if (isOxc) {
      subtype = "OXC";
      blockType = "validator_oxc";
    } else if (isSql) {
      subtype = "SQL";
      blockType = "validator_sql";
    }
    return {
      title: "Check",
      kind: "validator",
      subtype,
      blockType,
      name: config.name,
      layoutDirection,
    };
  }
  if (config.kind === "markdown_note") {
    return {
      title: "Note",
      kind: "note",
      subtype: "Markdown",
      blockType: "markdown_note",
      name: config.name,
      layoutDirection,
    };
  }
  if (config.kind === "seed") {
    const seedSourceType = config.seed_source_type ?? "hf";
    const sourceLabel =
      seedSourceType === "hf"
        ? "Hugging Face dataset"
        : seedSourceType === "local"
          ? "CSV or JSON file"
          : "Document file";
    return {
      title: "Source data",
      kind: "seed",
      subtype: sourceLabel,
      blockType: "seed",
      name: sourceLabel,
      layoutDirection,
    };
  }
  if (config.kind === "model_provider") {
    return {
      title: "Provider connection",
      kind: "model_provider",
      subtype: config.provider_type || "Connection",
      blockType: "model_provider",
      name: config.name,
      layoutDirection,
    };
  }
  if (config.kind === "model_config") {
    return {
      title: "Model preset",
      kind: "model_config",
      subtype: config.model || "Model",
      blockType: "model_config",
      name: config.name,
      layoutDirection,
    };
  }
  if (config.kind === "tool_config") {
    const providerCount = config.mcp_providers.length;
    return {
      title: "Tool access",
      kind: "tool_config",
      subtype: providerCount === 1 ? "1 server" : `${providerCount} servers`,
      blockType: "tool_config",
      name: config.name,
      layoutDirection,
    };
  }
  return {
    title: "AI step",
    kind: "llm",
    subtype: labelForLlm(config.llm_type),
    blockType: config.llm_type,
    name: config.name,
    layoutDirection,
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/parse.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export function parseNumber(value?: string): number | null {
  if (!value) {
    return null;
  }
  const num = Number(value);
  return Number.isFinite(num) ? num : null;
}

export function parseIntNumber(value?: string): number | null {
  const num = parseNumber(value);
  if (num === null || !Number.isInteger(num)) {
    return null;
  }
  return num;
}

export function parseAgeRange(value?: string): [number, number] | null {
  if (!value) {
    return null;
  }
  const parts = value.split(/[^0-9.]+/).filter(Boolean);
  if (parts.length !== 2) {
    return null;
  }
  const min = Number(parts[0]);
  const max = Number(parts[1]);
  if (!Number.isFinite(min) || !Number.isFinite(max)) {
    return null;
  }
  return [min, max];
}

export function parseJsonObject(
  value: string | undefined,
  label: string,
  errors: string[],
): Record<string, unknown> | undefined {
  if (!value || !value.trim()) {
    return undefined;
  }
  try {
    const parsed = JSON.parse(value);
    if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
      return parsed as Record<string, unknown>;
    }
  } catch {
    errors.push(`${label}: invalid JSON.`);
    return undefined;
  }
  errors.push(`${label}: must be a JSON object.`);
  return undefined;
}

export function isValidSex(value?: string): value is "Male" | "Female" {
  if (!value) {
    return false;
  }
  return value === "Male" || value === "Female";
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/payload/build-payload.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { Edge, XYPosition } from "@xyflow/react";
import type {
  LayoutDirection,
  ModelConfig,
  ModelProviderConfig,
  NodeConfig,
  RecipeNode,
  RecipeProcessorConfig,
} from "../../types";
import { isSemanticRelation } from "../graph/relations";
import { getConfigErrors } from "../index";
import {
  getDefaultDataSourceHandle,
  getDefaultDataTargetHandle,
  getDefaultSemanticSourceHandle,
  getDefaultSemanticTargetHandle,
  isDataSourceHandle,
  isDataTargetHandle,
  isSemanticSourceHandle,
  isSemanticTargetHandle,
  normalizeRecipeHandleId,
} from "../handles";
import { readNodeWidth } from "../rf-node-dimensions";
import {
  buildExpressionColumn,
  buildLlmColumn,
  buildModelConfig,
  buildModelProvider,
  buildProcessors,
  buildSamplerColumn,
  buildSeedConfig,
  buildSeedDropProcessor,
  buildToolProfilePayload,
  buildValidatorColumn,
  pickFirstSeedConfig,
} from "./builders";
import type { RecipePayloadResult } from "./types";
import {
  validateModelAliasLinks,
  validateModelConfigProviders,
  validateSubcategoryConfigs,
  validateTimedeltaConfigs,
  validateValidatorConfigs,
  validateUsedProviders,
} from "./validate";
import { isLikelyImageValue } from "../image-preview";

function pushUniqueJson(
  label: string,
  key: string,
  item: Record<string, unknown>,
  seen: Map<string, string>,
  out: Record<string, unknown>[],
  errors: string[],
): void {
  const serialized = JSON.stringify(item);
  const existing = seen.get(key);
  if (existing && existing !== serialized) {
    errors.push(`${label} ${key}: conflicting definitions.`);
    return;
  }
  if (!existing) {
    seen.set(key, serialized);
    out.push(item);
  }
}

function collectAdvancedOpenByNode(
  configs: Record<string, NodeConfig>,
): Record<string, boolean> {
  const out: Record<string, boolean> = {};
  for (const config of Object.values(configs)) {
    if (
      !(
        config.kind === "sampler" ||
        config.kind === "llm" ||
        config.kind === "validator" ||
        config.kind === "seed"
      )
    ) {
      continue;
    }
    if (config.advancedOpen !== true) {
      continue;
    }
    out[config.name] = true;
  }
  return out;
}

// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: payload build
export function buildRecipePayload(
  configs: Record<string, NodeConfig>,
  nodes: RecipeNode[],
  edges: Edge[],
  processors: RecipeProcessorConfig[] = [],
  layoutDirection: LayoutDirection = "LR",
  auxNodePositions: Record<string, XYPosition> = {},
): RecipePayloadResult {
  const errors: string[] = [];
  const columns: Record<string, unknown>[] = [];
  const modelAliases = new Set<string>();
  const modelProviderNames = new Set<string>();
  const modelProviders: Record<string, unknown>[] = [];
  const mcpProviders: Record<string, unknown>[] = [];
  const modelConfigs: Record<string, unknown>[] = [];
  const toolConfigs: Record<string, unknown>[] = [];
  const modelProviderConfigs: ModelProviderConfig[] = [];
  const modelConfigConfigs: ModelConfig[] = [];
  const llmToolAliasesUsed = new Set<string>();
  const mcpProviderJsonByName = new Map<string, string>();
  const toolConfigJsonByAlias = new Map<string, string>();
  const nameSet = new Set<string>();
  const nameToConfig = new Map<string, NodeConfig>();
  const allNameToConfig = new Map<string, NodeConfig>();
  const firstSeed = pickFirstSeedConfig(configs);

  for (const config of Object.values(configs)) {
    if (config.kind === "seed") {
      continue;
    }
    allNameToConfig.set(config.name, config);
  }

  for (const node of nodes) {
    const config = configs[node.id];
    if (!config) {
      continue;
    }
    for (const error of getConfigErrors(config)) {
      errors.push(`${config.name}: ${error}`);
    }
    if (config.kind !== "seed") {
      if (nameSet.has(config.name)) {
        errors.push(`Duplicate node name: ${config.name}.`);
      }
      nameSet.add(config.name);
    }

    if (config.kind === "sampler") {
      nameToConfig.set(config.name, config);
      columns.push(buildSamplerColumn(config, errors));
      continue;
    }
    if (config.kind === "llm") {
      if (config.image_context?.enabled) {
        const imageContext = config.image_context;
        const columnName = imageContext.column_name.trim();
        if (columnName) {
          if (firstSeed?.seed_columns && firstSeed.seed_columns.length > 0) {
            if (!firstSeed.seed_columns.includes(columnName)) {
              errors.push(
                `LLM ${config.name}: image context column '${columnName}' not found in seed columns.`,
              );
            }
          }
          const previewRows = firstSeed?.seed_preview_rows ?? [];
          if (previewRows.length > 0) {
            const hasImageLikeValue = previewRows.some((row) =>
              isLikelyImageValue(row[columnName]),
            );
            if (!hasImageLikeValue) {
              errors.push(
                `LLM ${config.name}: image context column '${columnName}' has no image-like values in preview rows.`,
              );
            }
          }
        }
      }
      columns.push(buildLlmColumn(config, errors));
      if (config.model_alias) {
        modelAliases.add(config.model_alias);
      }
      const toolAlias = config.tool_alias?.trim();
      if (toolAlias) {
        llmToolAliasesUsed.add(toolAlias);
      }
      nameToConfig.set(config.name, config);
      continue;
    }
    if (config.kind === "expression") {
      columns.push(buildExpressionColumn(config, errors));
      nameToConfig.set(config.name, config);
      continue;
    }
    if (config.kind === "validator") {
      columns.push(buildValidatorColumn(config, errors, allNameToConfig));
      nameToConfig.set(config.name, config);
      continue;
    }
    if (config.kind === "seed") {
      // SeedConfig is global config (seed_config); seed-dataset columns are added by DataDesigner.
      continue;
    }
    if (config.kind === "markdown_note") {
      continue;
    }
    if (config.kind === "model_provider") {
      modelProviderNames.add(config.name);
      modelProviders.push(buildModelProvider(config, errors));
      modelProviderConfigs.push(config);
      continue;
    }
    if (config.kind === "tool_config") {
      const built = buildToolProfilePayload(config, errors);
      for (const provider of built.mcp_providers) {
        pushUniqueJson(
          "MCP provider",
          String(provider.name),
          provider,
          mcpProviderJsonByName,
          mcpProviders,
          errors,
        );
      }
      if (built.tool_config) {
        pushUniqueJson(
          "Tool config",
          String(built.tool_config.tool_alias),
          built.tool_config,
          toolConfigJsonByAlias,
          toolConfigs,
          errors,
        );
      }
      continue;
    }
    modelConfigs.push(buildModelConfig(config, errors));
    modelConfigConfigs.push(config);
  }

  validateSubcategoryConfigs(configs, nameToConfig, errors);
  validateTimedeltaConfigs(configs, nameToConfig, errors);
  validateValidatorConfigs(configs, nameToConfig, errors);
  validateModelAliasLinks(modelAliases, modelConfigConfigs, errors);
  validateModelConfigProviders(
    modelConfigConfigs,
    modelAliases,
    modelProviderNames,
    errors,
  );
  validateUsedProviders(modelProviderConfigs, modelConfigConfigs, errors);
  for (const toolAlias of llmToolAliasesUsed) {
    if (!toolConfigJsonByAlias.has(toolAlias)) {
      errors.push(`Tool alias ${toolAlias}: missing tool config.`);
    }
  }

  const uiNodes = nodes.flatMap((node) => {
    const config = configs[node.id];
    if (!config) {
      return [];
    }
    const width = readNodeWidth(node);
    if (config.kind === "markdown_note") {
      return [
        {
          id: config.name,
          x: node.position.x,
          y: node.position.y,
          ...(width !== null ? { width } : {}),
          node_type: "markdown_note" as const,
          name: config.name,
          markdown: config.markdown,
          note_color: config.note_color,
          note_opacity: config.note_opacity,
        },
      ];
    }
    if (config.kind === "tool_config") {
      const toolsByProvider = Object.fromEntries(
        Object.entries(config.fetched_tools_by_provider ?? {}).flatMap(
          ([providerName, tools]) => {
            const name = providerName.trim();
            const values = Array.from(
              new Set(tools.map((tool) => tool.trim()).filter(Boolean)),
            );
            return name && values.length > 0 ? [[name, values]] : [];
          },
        ),
      );
      return [
        {
          id: config.name,
          x: node.position.x,
          y: node.position.y,
          ...(width !== null ? { width } : {}),
          node_type: "tool_config" as const,
          ...(Object.keys(toolsByProvider).length > 0 && {
            tools_by_provider: toolsByProvider,
          }),
        },
      ];
    }
    return [
      {
        id: config.name,
        x: node.position.x,
        y: node.position.y,
        ...(width !== null ? { width } : {}),
      },
    ];
  });

  const uiEdges = edges.flatMap((edge) => {
    const source = edge.source ? configs[edge.source] : null;
    const target = edge.target ? configs[edge.target] : null;
    if (!(source && target)) {
      return [];
    }
    if (source.kind === "markdown_note" || target.kind === "markdown_note") {
      return [];
    }
    const semantic =
      edge.type === "semantic" || isSemanticRelation(source, target);
    const sourceHandleNormalized = normalizeRecipeHandleId(edge.sourceHandle);
    const targetHandleNormalized = normalizeRecipeHandleId(edge.targetHandle);
    const semanticSourceDefault =
      source.kind === "llm"
        ? getDefaultDataSourceHandle(layoutDirection)
        : getDefaultSemanticSourceHandle(layoutDirection);
    const semanticTargetDefault =
      target.kind === "llm"
        ? getDefaultDataTargetHandle(layoutDirection)
        : getDefaultSemanticTargetHandle(layoutDirection);
    let sourceHandle = getDefaultDataSourceHandle(layoutDirection);
    let targetHandle = getDefaultDataTargetHandle(layoutDirection);

    if (semantic) {
      sourceHandle =
        isSemanticSourceHandle(sourceHandleNormalized) ||
        isDataSourceHandle(sourceHandleNormalized)
          ? sourceHandleNormalized ?? semanticSourceDefault
          : semanticSourceDefault;
      targetHandle =
        isSemanticTargetHandle(targetHandleNormalized) ||
        isDataTargetHandle(targetHandleNormalized)
          ? targetHandleNormalized ?? semanticTargetDefault
          : semanticTargetDefault;
    } else {
      sourceHandle = isDataSourceHandle(sourceHandleNormalized)
        ? sourceHandleNormalized ?? getDefaultDataSourceHandle(layoutDirection)
        : getDefaultDataSourceHandle(layoutDirection);
      targetHandle = isDataTargetHandle(targetHandleNormalized)
        ? targetHandleNormalized ?? getDefaultDataTargetHandle(layoutDirection)
        : getDefaultDataTargetHandle(layoutDirection);
    }
    return [
      {
        from: source.name,
        to: target.name,
        type: semantic ? "semantic" : "canvas",
        source_handle: sourceHandle ?? undefined,
        target_handle: targetHandle ?? undefined,
      },
    ];
  });
  const uiAuxNodes = Object.entries(auxNodePositions).flatMap(
    ([auxId, position]) => {
      const match = /^aux-([^-]+)-(.+)$/.exec(auxId);
      if (!match) {
        return [];
      }
      const [, llmId, key] = match;
      const llmConfig = configs[llmId];
      if (!(llmConfig && llmConfig.kind === "llm")) {
        return [];
      }
      return [
        {
          llm: llmConfig.name,
          key,
          x: position.x,
          y: position.y,
        },
      ];
    },
  );
  const recipeProcessors = buildProcessors(processors, errors);
  const seedConfig = firstSeed ? buildSeedConfig(firstSeed, errors) : undefined;
  const seedDropProcessor = firstSeed
    ? buildSeedDropProcessor(firstSeed, errors)
    : null;
  if (seedDropProcessor) {
    recipeProcessors.push(seedDropProcessor);
  }
  const uiAdvancedOpenByNode = collectAdvancedOpenByNode(configs);

  return {
    errors,
    payload: {
      recipe: {
        // biome-ignore lint/style/useNamingConvention: api schema
        model_providers: modelProviders,
        // biome-ignore lint/style/useNamingConvention: api schema
        mcp_providers: mcpProviders,
        // biome-ignore lint/style/useNamingConvention: api schema
        model_configs: modelConfigs,
        // biome-ignore lint/style/useNamingConvention: api schema
        seed_config: seedConfig,
        // biome-ignore lint/style/useNamingConvention: api schema
        tool_configs: toolConfigs,
        columns,
        processors: recipeProcessors,
      },
      run: {
        rows: 5,
        preview: true,
        // biome-ignore lint/style/useNamingConvention: api schema
        output_formats: ["jsonl"],
      },
      ui: {
        nodes: uiNodes,
        edges: uiEdges,
        layout_direction: layoutDirection,
        ...(uiAuxNodes.length > 0 && { aux_nodes: uiAuxNodes }),
        ...(firstSeed && { seed_source_type: firstSeed.seed_source_type }),
        ...(firstSeed && { seed_columns: firstSeed.seed_columns ?? [] }),
        ...(firstSeed && {
          seed_drop_columns: firstSeed.seed_drop_columns ?? [],
        }),
        ...(firstSeed && {
          seed_preview_rows: firstSeed.seed_preview_rows ?? [],
        }),
        ...(firstSeed &&
          firstSeed.local_file_name !== undefined && {
            local_file_name: firstSeed.local_file_name,
          }),
        ...(firstSeed &&
          firstSeed.unstructured_file_name !== undefined && {
            unstructured_file_name: firstSeed.unstructured_file_name,
          }),
        ...(firstSeed &&
          firstSeed.unstructured_chunk_size !== undefined && {
            unstructured_chunk_size: firstSeed.unstructured_chunk_size,
          }),
        ...(firstSeed &&
          firstSeed.unstructured_chunk_overlap !== undefined && {
            unstructured_chunk_overlap: firstSeed.unstructured_chunk_overlap,
          }),
        ...(Object.keys(uiAdvancedOpenByNode).length > 0 && {
          // biome-ignore lint/style/useNamingConvention: ui schema
          advanced_open_by_node: uiAdvancedOpenByNode,
        }),
      },
    },
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/payload/builders-llm.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  LlmConfig,
  LlmMcpProviderConfig,
  LlmToolConfig,
  ToolProfileConfig,
} from "../../types";

function buildImageContext(
  config: LlmConfig,
  errors: string[],
): Array<Record<string, unknown>> | undefined {
  const imageContext = config.image_context;
  if (!imageContext?.enabled) {
    return undefined;
  }
  const columnName = imageContext.column_name.trim();
  if (!columnName) {
    errors.push(`LLM ${config.name}: image context column is required.`);
    return undefined;
  }
  return [
    {
      modality: "image",
      // biome-ignore lint/style/useNamingConvention: api schema
      column_name: columnName,
    },
  ];
}

export function buildLlmColumn(
  config: LlmConfig,
  errors: string[],
): Record<string, unknown> {
  const toolAlias = config.tool_alias?.trim();
  const base = {
    name: config.name,
    drop: config.drop ?? false,
    // biome-ignore lint/style/useNamingConvention: api schema
    model_alias: config.model_alias,
    prompt: config.prompt,
    // biome-ignore lint/style/useNamingConvention: api schema
    system_prompt: config.system_prompt || undefined,
    // biome-ignore lint/style/useNamingConvention: api schema
    multi_modal_context: buildImageContext(config, errors),
    // biome-ignore lint/style/useNamingConvention: api schema
    tool_alias: toolAlias || undefined,
    // biome-ignore lint/style/useNamingConvention: api schema
    with_trace: config.with_trace ?? "none",
    // biome-ignore lint/style/useNamingConvention: api schema
    extract_reasoning_content: config.extract_reasoning_content === true,
  };

  if (config.llm_type === "code") {
    return {
      // biome-ignore lint/style/useNamingConvention: api schema
      column_type: "llm-code",
      ...base,
      // biome-ignore lint/style/useNamingConvention: api schema
      code_lang: config.code_lang || "python",
    };
  }
  if (config.llm_type === "structured") {
    let outputFormat: unknown = config.output_format || undefined;
    if (typeof outputFormat === "string" && outputFormat.trim()) {
      try {
        outputFormat = JSON.parse(outputFormat);
      } catch {
        errors.push(`LLM ${config.name}: output_format is not valid JSON.`);
      }
    }
    return {
      // biome-ignore lint/style/useNamingConvention: api schema
      column_type: "llm-structured",
      ...base,
      // biome-ignore lint/style/useNamingConvention: api schema
      output_format: outputFormat,
    };
  }
  if (config.llm_type === "judge") {
    const scores = (config.scores ?? [])
      .map((score) => {
        const options: Record<string, string> = {};
        for (const option of score.options ?? []) {
          const key = option.value.trim();
          const value = option.description.trim();
          if (!key || !value) {
            continue;
          }
          options[key] = value;
        }
        return {
          name: score.name.trim(),
          description: score.description.trim(),
          options,
        };
      })
      .filter(
        (score) =>
          score.name && score.description && Object.keys(score.options).length > 0,
      );
    if (scores.length === 0) {
      errors.push(`LLM ${config.name}: scores required for LLM Judge.`);
    }
    return {
      // biome-ignore lint/style/useNamingConvention: api schema
      column_type: "llm-judge",
      ...base,
      scores,
    };
  }
  return {
    // biome-ignore lint/style/useNamingConvention: api schema
    column_type: "llm-text",
    ...base,
  };
}

export function buildLlmMcpProvider(
  provider: LlmMcpProviderConfig,
  errors: string[],
): Record<string, unknown> | null {
  const name = provider.name.trim();
  if (!name) {
    errors.push("MCP provider: name is required.");
    return null;
  }
  if (provider.provider_type === "stdio") {
    const command = provider.command?.trim() ?? "";
    if (!command) {
      errors.push(`MCP provider ${name}: command is required for stdio.`);
      return null;
    }
    const env: Record<string, string> = {};
    for (const item of provider.env ?? []) {
      const key = item.key.trim();
      const value = item.value.trim();
      if (key && value) {
        env[key] = value;
      }
    }
    return {
      // biome-ignore lint/style/useNamingConvention: api schema
      provider_type: "stdio",
      name,
      command,
      args: (provider.args ?? []).map((value) => value.trim()).filter(Boolean),
      env,
    };
  }
  const endpoint = provider.endpoint?.trim() ?? "";
  if (!endpoint) {
    errors.push(`MCP provider ${name}: endpoint is required.`);
    return null;
  }
  return {
    // biome-ignore lint/style/useNamingConvention: api schema
    provider_type: "streamable_http",
    name,
    endpoint,
    // biome-ignore lint/style/useNamingConvention: api schema
    api_key: provider.api_key?.trim() || undefined,
    // biome-ignore lint/style/useNamingConvention: api schema
    api_key_env: provider.api_key_env?.trim() || undefined,
  };
}

export function buildLlmToolConfig(
  config: LlmToolConfig,
  errors: string[],
): Record<string, unknown> | null {
  const toolAlias = config.tool_alias.trim();
  if (!toolAlias) {
    errors.push("Tool config: tool_alias is required.");
    return null;
  }
  const providers = config.providers
    .map((value) => value.trim())
    .filter(Boolean);
  if (providers.length === 0) {
    errors.push(`Tool config ${toolAlias}: at least one provider is required.`);
    return null;
  }
  const allowTools = (config.allow_tools ?? [])
    .map((value) => value.trim())
    .filter(Boolean);
  const maxToolCallTurnsRaw = config.max_tool_call_turns?.trim();
  const maxToolCallTurns =
    maxToolCallTurnsRaw && Number.isFinite(Number(maxToolCallTurnsRaw))
      ? Number(maxToolCallTurnsRaw)
      : 5;
  const timeoutRaw = config.timeout_sec?.trim();
  const timeoutSec =
    timeoutRaw && Number.isFinite(Number(timeoutRaw))
      ? Number(timeoutRaw)
      : undefined;
  return {
    // biome-ignore lint/style/useNamingConvention: api schema
    tool_alias: toolAlias,
    providers,
    // biome-ignore lint/style/useNamingConvention: api schema
    allow_tools: allowTools.length > 0 ? allowTools : undefined,
    // biome-ignore lint/style/useNamingConvention: api schema
    max_tool_call_turns: maxToolCallTurns,
    // biome-ignore lint/style/useNamingConvention: api schema
    timeout_sec: timeoutSec,
  };
}

export function buildToolProfilePayload(
  config: ToolProfileConfig,
  errors: string[],
): {
  // biome-ignore lint/style/useNamingConvention: api schema
  mcp_providers: Record<string, unknown>[];
  // biome-ignore lint/style/useNamingConvention: api schema
  tool_config: Record<string, unknown> | null;
} {
  const mcpProviders = config.mcp_providers
    .map((provider) => buildLlmMcpProvider(provider, errors))
    .flatMap((provider) => (provider ? [provider] : []));
  const toolConfig = buildLlmToolConfig(
    {
      id: config.id,
      // biome-ignore lint/style/useNamingConvention: api schema
      tool_alias: config.name,
      providers: mcpProviders
        .map((provider) => String(provider.name ?? "").trim())
        .filter(Boolean),
      // biome-ignore lint/style/useNamingConvention: api schema
      allow_tools: config.allow_tools,
      // biome-ignore lint/style/useNamingConvention: api schema
      max_tool_call_turns: config.max_tool_call_turns,
      // biome-ignore lint/style/useNamingConvention: api schema
      timeout_sec: config.timeout_sec,
    },
    errors,
  );
  return {
    // biome-ignore lint/style/useNamingConvention: api schema
    mcp_providers: mcpProviders,
    // biome-ignore lint/style/useNamingConvention: api schema
    tool_config: toolConfig,
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/payload/builders-model.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ModelConfig, ModelProviderConfig } from "../../types";
import { parseJsonObject } from "./parse";

export function buildModelProvider(
  config: ModelProviderConfig,
  errors: string[],
): Record<string, unknown> {
  const extraHeaders = parseJsonObject(
    config.extra_headers,
    `Provider ${config.name} extra_headers`,
    errors,
  );
  const extraBody = parseJsonObject(
    config.extra_body,
    `Provider ${config.name} extra_body`,
    errors,
  );
  return {
    name: config.name,
    endpoint: config.endpoint,
    // biome-ignore lint/style/useNamingConvention: api schema
    provider_type: "openai",
    // biome-ignore lint/style/useNamingConvention: api schema
    api_key_env: config.api_key_env?.trim() || undefined,
    // biome-ignore lint/style/useNamingConvention: api schema
    api_key: config.api_key?.trim() || undefined,
    // biome-ignore lint/style/useNamingConvention: api schema
    extra_headers: extraHeaders ?? {},
    // biome-ignore lint/style/useNamingConvention: api schema
    extra_body: extraBody ?? {},
  };
}

export function buildModelConfig(
  config: ModelConfig,
  errors: string[],
): Record<string, unknown> {
  const inference: Record<string, unknown> = {};
  const temp = config.inference_temperature?.trim();
  const topP = config.inference_top_p?.trim();
  const maxTokens = config.inference_max_tokens?.trim();
  const timeout = config.inference_timeout?.trim();
  const extraBody = parseJsonObject(
    config.inference_extra_body,
    `Model ${config.name} inference extra_body`,
    errors,
  );

  if (temp) {
    const parsed = Number(temp);
    if (Number.isFinite(parsed)) {
      inference.temperature = parsed;
    }
  }
  if (topP) {
    const parsed = Number(topP);
    if (Number.isFinite(parsed)) {
      // biome-ignore lint/style/useNamingConvention: api schema
      inference.top_p = parsed;
    }
  }
  if (maxTokens) {
    const parsed = Number(maxTokens);
    if (Number.isFinite(parsed)) {
      // biome-ignore lint/style/useNamingConvention: api schema
      inference.max_tokens = parsed;
    }
  }
  if (timeout) {
    const parsed = Number(timeout);
    if (Number.isFinite(parsed)) {
      inference.timeout = Math.trunc(parsed);
    }
  }
  if (extraBody) {
    // biome-ignore lint/style/useNamingConvention: api schema
    inference.extra_body = extraBody;
  }

  return {
    alias: config.name,
    model: config.model,
    provider: config.provider || undefined,
    // biome-ignore lint/style/useNamingConvention: api schema
    inference_parameters:
      Object.keys(inference).length > 0 ? inference : undefined,
    // biome-ignore lint/style/useNamingConvention: api schema
    skip_health_check: config.skip_health_check || undefined,
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/payload/builders-processors.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ExpressionConfig, RecipeProcessorConfig } from "../../types";
import { parseJsonObject } from "./parse";

export function buildExpressionColumn(
  config: ExpressionConfig,
  errors: string[],
): Record<string, unknown> {
  if (!config.expr.trim()) {
    errors.push(`Expression ${config.name}: expr required.`);
  }
  return {
    // biome-ignore lint/style/useNamingConvention: api schema
    column_type: "expression",
    name: config.name,
    drop: config.drop ?? false,
    expr: config.expr,
    dtype: config.dtype,
  };
}

export function buildProcessors(
  processors: RecipeProcessorConfig[],
  errors: string[],
): Record<string, unknown>[] {
  const output: Record<string, unknown>[] = [];
  for (const processor of processors) {
    if (processor.processor_type !== "schema_transform") {
      continue;
    }
    const name = processor.name.trim();
    if (!name) {
      errors.push("Schema transform: name is required.");
      continue;
    }
    const template = parseJsonObject(
      processor.template,
      `Schema transform ${name} template`,
      errors,
    );
    if (!template) {
      continue;
    }
    output.push({
      // biome-ignore lint/style/useNamingConvention: api schema
      processor_type: "schema_transform",
      name,
      template,
    });
  }
  return output;
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/payload/builders-sampler.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { CategoryConditionalParams, SamplerConfig } from "../../types";
import { isValidSex, parseAgeRange, parseNumber } from "./parse";

const DATETIME_UNIT_MAP: Record<string, "Y" | "M" | "D" | "h" | "m" | "s"> = {
  year: "Y",
  month: "M",
  day: "D",
  hour: "h",
  minute: "m",
  second: "s",
};

function buildCategoryConditionalParams(
  config: SamplerConfig,
  errors: string[],
): Record<string, CategoryConditionalParams> | undefined {
  const conditional = config.conditional_params ?? {};
  const output: Record<string, CategoryConditionalParams> = {};
  for (const [rawCondition, params] of Object.entries(conditional)) {
    const condition = rawCondition.trim();
    if (!condition) {
      errors.push(`Sampler ${config.name}: conditional rule needs condition text.`);
      continue;
    }
    const values = (params.values ?? [])
      .map((value) => value.trim())
      .filter(Boolean);
    if (values.length === 0) {
      errors.push(`Sampler ${config.name}: conditional '${condition}' needs values.`);
      continue;
    }
    const weights = params.weights ?? [];
    const hasWeights = weights.some((weight) => weight !== null);
    if (
      hasWeights &&
      (weights.length !== values.length || weights.some((weight) => weight === null))
    ) {
      errors.push(`Sampler ${config.name}: conditional '${condition}' weights invalid.`);
      continue;
    }
    output[condition] = {
      // biome-ignore lint/style/useNamingConvention: api schema
      sampler_type: "category",
      values,
      weights: hasWeights
        ? weights.filter((weight): weight is number => weight !== null)
        : undefined,
    };
  }
  return Object.keys(output).length > 0 ? output : undefined;
}

// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: per type logic
function buildSamplerParams(
  config: SamplerConfig,
  errors: string[],
): Record<string, unknown> {
  if (config.sampler_type === "category") {
    const values = config.values ?? [];
    const params: Record<string, unknown> = { values };
    const weights = config.weights ?? [];
    const hasWeights = weights.some((weight) => weight !== null);
    if (hasWeights && weights.some((weight) => weight === null)) {
      errors.push(`Sampler ${config.name}: weights missing values.`);
    } else if (hasWeights) {
      params.weights = weights.filter((weight) => weight !== null);
    }
    return params;
  }
  if (config.sampler_type === "subcategory") {
    const mapping = config.subcategory_mapping ?? {};
    for (const [key, values] of Object.entries(mapping)) {
      if (!values || values.length === 0) {
        errors.push(
          `Subcategory ${config.name}: '${key}' needs at least 1 subcategory.`,
        );
      }
    }
    return {
      category: config.subcategory_parent,
      values: mapping,
    };
  }
  if (config.sampler_type === "uniform") {
    return {
      low: parseNumber(config.low),
      high: parseNumber(config.high),
    };
  }
  if (config.sampler_type === "gaussian") {
    return {
      mean: parseNumber(config.mean),
      // data_designer expects `stddev`
      stddev: parseNumber(config.std),
    };
  }
  if (config.sampler_type === "bernoulli") {
    return {
      p: parseNumber(config.p),
    };
  }
  if (config.sampler_type === "datetime") {
    const rawUnit = config.datetime_unit?.trim();
    let unit: string | undefined = rawUnit || undefined;
    if (rawUnit && DATETIME_UNIT_MAP[rawUnit]) {
      unit = DATETIME_UNIT_MAP[rawUnit];
    }
    if (rawUnit === "week") {
      errors.push(`Datetime ${config.name}: unit 'week' not supported.`);
      unit = undefined;
    }
    return {
      start: config.datetime_start ?? undefined,
      end: config.datetime_end ?? undefined,
      unit,
    };
  }
  if (config.sampler_type === "timedelta") {
    return {
      // biome-ignore lint/style/useNamingConvention: api schema
      dt_min: parseNumber(config.dt_min),
      // biome-ignore lint/style/useNamingConvention: api schema
      dt_max: parseNumber(config.dt_max),
      // biome-ignore lint/style/useNamingConvention: api schema
      reference_column_name: config.reference_column_name || undefined,
      unit: config.timedelta_unit || undefined,
    };
  }
  if (config.sampler_type === "uuid") {
    const raw = config.uuid_format?.trim();
    if (!raw) {
      return {};
    }
    // UI historically used "uuid4" as a "format". data_designer uuid sampler is always uuid4.
    if (raw.toLowerCase() === "uuid4") {
      return {};
    }
    if (raw.toLowerCase() === "short" || raw.toLowerCase() === "short_form") {
      // biome-ignore lint/style/useNamingConvention: api schema
      return { short_form: true };
    }
    if (raw.toLowerCase() === "upper" || raw.toLowerCase() === "uppercase") {
      return { uppercase: true };
    }
    if (raw.toLowerCase().startsWith("prefix:")) {
      return { prefix: raw.slice("prefix:".length).trim() || undefined };
    }
    return {
      prefix: raw,
    };
  }
  const params: Record<string, unknown> = {};
  if (config.person_locale?.trim()) {
    params.locale = config.person_locale.trim();
  }
  if (config.person_sex?.trim()) {
    if (isValidSex(config.person_sex.trim())) {
      params.sex = config.person_sex.trim();
    } else {
      errors.push(`Person ${config.name}: sex must be Male or Female.`);
    }
  }
  if (config.person_city?.trim()) {
    params.city = config.person_city.trim();
  }
  if (config.person_age_range?.trim()) {
    const parsed = parseAgeRange(config.person_age_range);
    if (parsed) {
      // biome-ignore lint/style/useNamingConvention: api schema
      params.age_range = parsed;
    } else {
      errors.push(`Person ${config.name}: age range must be like 18-70.`);
    }
  }
  return params;
}

export function buildSamplerColumn(
  config: SamplerConfig,
  errors: string[],
): Record<string, unknown> {
  const samplerColumn: Record<string, unknown> = {
    // biome-ignore lint/style/useNamingConvention: api schema
    column_type: "sampler",
    name: config.name,
    drop: config.drop ?? false,
    // biome-ignore lint/style/useNamingConvention: api schema
    sampler_type: config.sampler_type,
    params: buildSamplerParams(config, errors),
    // biome-ignore lint/style/useNamingConvention: api schema
    convert_to: config.convert_to ?? undefined,
  };
  if (config.sampler_type === "category") {
    const conditionalParams = buildCategoryConditionalParams(config, errors);
    if (conditionalParams) {
      // biome-ignore lint/style/useNamingConvention: api schema
      samplerColumn.conditional_params = conditionalParams;
    }
  }
  return samplerColumn;
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/payload/builders-seed.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { NodeConfig, SeedConfig } from "../../types";

const DEFAULT_CHUNK_SIZE = 1200;
const DEFAULT_CHUNK_OVERLAP = 200;
const MAX_CHUNK_SIZE = 20000;

function parseIntStrict(value: string | undefined): number | null {
  const trimmed = value?.trim();
  if (!trimmed) return null;
  const num = Number(value);
  if (!Number.isFinite(num) || !Number.isInteger(num)) return null;
  return num;
}

function resolveChunking(config: SeedConfig): { chunkSize: number; chunkOverlap: number } {
  const rawSize = parseIntStrict(config.unstructured_chunk_size);
  const rawOverlap = parseIntStrict(config.unstructured_chunk_overlap);
  const chunkSize = Math.min(MAX_CHUNK_SIZE, Math.max(1, rawSize ?? DEFAULT_CHUNK_SIZE));
  const chunkOverlap = Math.min(
    Math.max(0, chunkSize - 1),
    Math.max(0, rawOverlap ?? DEFAULT_CHUNK_OVERLAP),
  );
  return { chunkSize, chunkOverlap };
}

export function buildSeedConfig(
  config: SeedConfig,
  errors: string[],
): Record<string, unknown> | undefined {
  const seedSourceType = config.seed_source_type ?? "hf";
  const path = config.hf_path.trim();

  const endpoint = config.hf_endpoint?.trim() || "https://huggingface.co";
  const token = config.hf_token?.trim() || null;

  let selectionStrategy: Record<string, unknown> | null = null;
  if (config.selection_type === "index_range") {
    const start = parseIntStrict(config.selection_start);
    const end = parseIntStrict(config.selection_end);
    if (start === null || end === null) {
      errors.push(`Seed ${config.name}: selection index range invalid.`);
      return undefined;
    }
    selectionStrategy = { start, end };
  } else if (config.selection_type === "partition_block") {
    const index = parseIntStrict(config.selection_index);
    const numPartitions = parseIntStrict(config.selection_num_partitions);
    if (index === null || numPartitions === null) {
      errors.push(`Seed ${config.name}: selection partition invalid.`);
      return undefined;
    }
    // biome-ignore lint/style/useNamingConvention: api schema
    selectionStrategy = { index, num_partitions: numPartitions };
  }

  const source =
    seedSourceType === "hf"
      ? {
          // biome-ignore lint/style/useNamingConvention: api schema
          seed_type: "hf",
          path,
          token,
          endpoint,
        }
      : seedSourceType === "unstructured"
        ? (() => {
            const { chunkSize, chunkOverlap } = resolveChunking(config);
            return {
              // biome-ignore lint/style/useNamingConvention: api schema
              seed_type: "unstructured",
              path,
              // biome-ignore lint/style/useNamingConvention: api schema
              chunk_size: chunkSize,
              // biome-ignore lint/style/useNamingConvention: api schema
              chunk_overlap: chunkOverlap,
            };
          })()
        : {
            // biome-ignore lint/style/useNamingConvention: api schema
            seed_type: "local",
            path,
          };

  return {
    source,
    // biome-ignore lint/style/useNamingConvention: api schema
    sampling_strategy: config.sampling_strategy,
    // biome-ignore lint/style/useNamingConvention: api schema
    selection_strategy: selectionStrategy,
  };
}

export function pickFirstSeedConfig(
  configs: Record<string, NodeConfig>,
): SeedConfig | null {
  for (const config of Object.values(configs)) {
    if (config.kind === "seed") {
      return config;
    }
  }
  return null;
}

export function buildSeedDropProcessor(
  config: SeedConfig,
  errors: string[],
): Record<string, unknown> | null {
  const seedSourceType = config.seed_source_type ?? "hf";
  const loadedCols = (config.seed_columns ?? []).map((c) => c.trim()).filter(Boolean);
  let cols: string[] = [];

  if (seedSourceType === "unstructured") {
    if (!config.drop) {
      return null;
    }
    cols = loadedCols;
  } else {
    const selectedDropColumns = (config.seed_drop_columns ?? [])
      .map((c) => c.trim())
      .filter(Boolean);
    if (selectedDropColumns.length === 0) {
      return null;
    }
    const loadedSet = new Set(loadedCols);
    cols =
      loadedCols.length > 0
        ? selectedDropColumns.filter((col) => loadedSet.has(col))
        : selectedDropColumns;
  }

  if (cols.length === 0) {
    errors.push(
      `Seed ${config.name}: selected drop columns are unavailable.`,
    );
    return null;
  }
  return {
    // biome-ignore lint/style/useNamingConvention: api schema
    processor_type: "drop_columns",
    name: "drop_seed_columns",
    // biome-ignore lint/style/useNamingConvention: api schema
    column_names: cols,
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/payload/builders-validator.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { NodeConfig, ValidatorConfig } from "../../types";
import { isValidatorCodeLang } from "../validators/code-lang";

const OXC_VALIDATION_FN_MARKER = "unsloth_oxc_validator";

function parseBatchSize(value: string): number {
  const parsed = Number.parseInt(value, 10);
  if (!Number.isFinite(parsed) || parsed < 1) {
    return 10;
  }
  return parsed;
}

export function buildValidatorColumn(
  config: ValidatorConfig,
  errors: string[],
  nameToConfig?: Map<string, NodeConfig>,
): Record<string, unknown> {
  const targetColumns = (config.target_columns ?? [])
    .map((value) => value.trim())
    .filter(Boolean);
  if (targetColumns.length === 0) {
    errors.push(`Validator ${config.name}: target code column required.`);
  }
  if (config.validator_type === "oxc") {
    const targetName = targetColumns[0] ?? "";
    const targetConfig = targetName ? nameToConfig?.get(targetName) : null;
    let codeLang = config.code_lang;
    if (
      targetConfig &&
      targetConfig.kind === "llm" &&
      targetConfig.llm_type === "code"
    ) {
      const targetLang = (targetConfig.code_lang ?? "").trim();
      if (isValidatorCodeLang(targetLang)) {
        codeLang = targetLang;
      }
    }
    return {
      // biome-ignore lint/style/useNamingConvention: api schema
      column_type: "validation",
      name: config.name,
      drop: config.drop ?? false,
      // biome-ignore lint/style/useNamingConvention: api schema
      target_columns: targetColumns,
      // biome-ignore lint/style/useNamingConvention: api schema
      validator_type: "local_callable",
      // biome-ignore lint/style/useNamingConvention: api schema
      validator_params: {
        // backend resolves this marker to a real callable.
        // biome-ignore lint/style/useNamingConvention: api schema
        validation_function: `${OXC_VALIDATION_FN_MARKER}:${codeLang}:${config.oxc_validation_mode}:${config.oxc_code_shape ?? "auto"}`,
      },
      // biome-ignore lint/style/useNamingConvention: api schema
      batch_size: parseBatchSize(config.batch_size),
    };
  }

  return {
    // biome-ignore lint/style/useNamingConvention: api schema
    column_type: "validation",
    name: config.name,
    drop: config.drop ?? false,
    // biome-ignore lint/style/useNamingConvention: api schema
    target_columns: targetColumns,
    // biome-ignore lint/style/useNamingConvention: api schema
    validator_type: "code",
    // biome-ignore lint/style/useNamingConvention: api schema
    validator_params: {
      // biome-ignore lint/style/useNamingConvention: api schema
      code_lang: config.code_lang,
    },
    // biome-ignore lint/style/useNamingConvention: api schema
    batch_size: parseBatchSize(config.batch_size),
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/payload/builders.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export {
  buildLlmColumn,
  buildLlmMcpProvider,
  buildLlmToolConfig,
  buildToolProfilePayload,
} from "./builders-llm";
export { buildModelConfig, buildModelProvider } from "./builders-model";
export { buildExpressionColumn, buildProcessors } from "./builders-processors";
export { buildSamplerColumn } from "./builders-sampler";
export { buildValidatorColumn } from "./builders-validator";
export {
  buildSeedConfig,
  buildSeedDropProcessor,
  pickFirstSeedConfig,
} from "./builders-seed";


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/payload/empty.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { RecipePayload } from "./types";

export function createEmptyRecipePayload(): RecipePayload {
  return {
    recipe: {
      // biome-ignore lint/style/useNamingConvention: api schema
      model_providers: [],
      // biome-ignore lint/style/useNamingConvention: api schema
      mcp_providers: [],
      // biome-ignore lint/style/useNamingConvention: api schema
      model_configs: [],
      // biome-ignore lint/style/useNamingConvention: api schema
      tool_configs: [],
      columns: [],
      processors: [],
    },
    run: {
      rows: 5,
      preview: true,
      // biome-ignore lint/style/useNamingConvention: api schema
      output_formats: ["jsonl"],
    },
    ui: {
      nodes: [],
      edges: [],
      layout_direction: "LR",
    },
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/payload/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export { buildRecipePayload } from "./build-payload";
export { createEmptyRecipePayload } from "./empty";
export type { RecipePayload, RecipePayloadResult } from "./types";


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/payload/parse.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export {
  isValidSex,
  parseAgeRange,
  parseJsonObject,
  parseNumber,
} from "../parse";


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/payload/types.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export type RecipePayload = {
  recipe: {
    // biome-ignore lint/style/useNamingConvention: api schema
    model_providers: Record<string, unknown>[];
    // biome-ignore lint/style/useNamingConvention: api schema
    mcp_providers: Record<string, unknown>[];
    // biome-ignore lint/style/useNamingConvention: api schema
    model_configs: Record<string, unknown>[];
    // biome-ignore lint/style/useNamingConvention: api schema
    seed_config?: Record<string, unknown>;
    // biome-ignore lint/style/useNamingConvention: api schema
    tool_configs: Record<string, unknown>[];
    columns: Record<string, unknown>[];
    processors: Record<string, unknown>[];
  };
  run: {
    rows: number;
    preview: boolean;
    // biome-ignore lint/style/useNamingConvention: api schema
    output_formats: string[];
    // biome-ignore lint/style/useNamingConvention: backend schema
    execution_type?: "preview" | "full";
    // biome-ignore lint/style/useNamingConvention: backend schema
    run_config?: Record<string, unknown>;
    // biome-ignore lint/style/useNamingConvention: backend schema
    dataset_name?: string;
    // biome-ignore lint/style/useNamingConvention: backend schema
    artifact_path?: string;
    // biome-ignore lint/style/useNamingConvention: backend schema
    merge_batches?: boolean;
    // biome-ignore lint/style/useNamingConvention: backend schema
    run_name?: string | null;
  };
  ui: {
    nodes: Array<{
      id: string;
      x: number;
      y: number;
      width?: number;
      node_type?: "markdown_note" | "tool_config";
      name?: string;
      markdown?: string;
      note_color?: string;
      note_opacity?: string;
      tools_by_provider?: Record<string, string[]>;
    }>;
    edges: {
      from: string;
      to: string;
      type?: string;
      source_handle?: string;
      target_handle?: string;
    }[];
    // ui-only: graph orientation
    layout_direction?: "LR" | "TB";
    // ui-only, used to preserve seed block mode across imports/refresh
    seed_source_type?: "hf" | "local" | "unstructured";
    // ui-only, persisted aux node positions by llm name + aux key
    aux_nodes?: Array<{
      llm: string;
      key: string;
      x: number;
      y: number;
    }>;
    // ui-only, seed metadata cached for refresh/import UX
    seed_columns?: string[];
    seed_drop_columns?: string[];
    seed_preview_rows?: Record<string, unknown>[];
    local_file_name?: string;
    unstructured_file_name?: string;
    unstructured_chunk_size?: string;
    unstructured_chunk_overlap?: string;
    // ui-only: per-node advanced accordion state
    advanced_open_by_node?: Record<string, boolean>;
  };
};

export type RecipePayloadResult = {
  errors: string[];
  payload: RecipePayload;
};


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/payload/validate.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  ModelConfig,
  ModelProviderConfig,
  NodeConfig,
  ValidatorCodeLang,
  ValidatorConfig,
} from "../../types";
import { VALIDATOR_OXC_CODE_LANGS } from "../validators/code-lang";
import { isOxcCodeShape } from "../validators/oxc-code-shape";
import { isOxcValidationMode } from "../validators/oxc-mode";

export function validateSubcategoryConfigs(
  configs: Record<string, NodeConfig>,
  nameToConfig: Map<string, NodeConfig>,
  errors: string[],
): void {
  for (const config of Object.values(configs)) {
    if (config.kind !== "sampler" || config.sampler_type !== "subcategory") {
      continue;
    }
    const parentName = config.subcategory_parent;
    if (!parentName) {
      errors.push(`Subcategory ${config.name}: parent category required.`);
      continue;
    }
    const parent = nameToConfig.get(parentName);
    const parentValues =
      parent && parent.kind === "sampler" && parent.sampler_type === "category"
        ? (parent.values ?? [])
        : [];
    const mapping = config.subcategory_mapping ?? {};
    for (const value of parentValues) {
      const list = mapping[value];
      if (!list || list.length === 0) {
        errors.push(
          `Subcategory ${config.name}: '${value}' needs at least 1 subcategory.`,
        );
      }
    }
  }
}

export function validateTimedeltaConfigs(
  configs: Record<string, NodeConfig>,
  nameToConfig: Map<string, NodeConfig>,
  errors: string[],
): void {
  for (const config of Object.values(configs)) {
    if (config.kind !== "sampler" || config.sampler_type !== "timedelta") {
      continue;
    }
    const reference = config.reference_column_name?.trim() ?? "";
    if (!reference) {
      errors.push(`Timedelta ${config.name}: reference datetime column required.`);
      continue;
    }
    const parent = nameToConfig.get(reference);
    if (
      !parent ||
      parent.kind !== "sampler" ||
      parent.sampler_type !== "datetime"
    ) {
      errors.push(`Timedelta ${config.name}: reference '${reference}' must be datetime.`);
    }
  }
}

export function validateModelAliasLinks(
  modelAliases: Set<string>,
  modelConfigConfigs: ModelConfig[],
  errors: string[],
): void {
  for (const alias of modelAliases) {
    if (!modelConfigConfigs.some((config) => config.name === alias)) {
      errors.push(`LLM model_alias ${alias}: missing model config.`);
    }
  }
}

export function validateModelConfigProviders(
  modelConfigConfigs: ModelConfig[],
  modelAliases: Set<string>,
  modelProviderNames: Set<string>,
  errors: string[],
): void {
  for (const config of modelConfigConfigs) {
    const provider = config.provider.trim();
    const alias = config.name;
    if (modelAliases.has(alias) && !config.model.trim()) {
      errors.push(`Model config ${alias}: model is required.`);
    }
    if (provider && !modelProviderNames.has(provider)) {
      errors.push(`Model config ${alias}: provider ${provider} not found.`);
    }
  }
}

export function validateUsedProviders(
  modelProviderConfigs: ModelProviderConfig[],
  modelConfigConfigs: ModelConfig[],
  errors: string[],
): void {
  const usedProviders = new Set(
    modelConfigConfigs.map((config) => config.provider.trim()).filter(Boolean),
  );
  for (const provider of modelProviderConfigs) {
    if (!usedProviders.has(provider.name)) {
      continue;
    }
    if (!provider.endpoint.trim()) {
      errors.push(`Model provider ${provider.name}: endpoint is required.`);
    }
    if (!provider.provider_type.trim()) {
      errors.push(`Model provider ${provider.name}: provider_type is required.`);
    }
  }
}

export function validateValidatorConfigs(
  configs: Record<string, NodeConfig>,
  nameToConfig: Map<string, NodeConfig>,
  errors: string[],
): void {
  for (const config of Object.values(configs)) {
    if (config.kind !== "validator") {
      continue;
    }
    const target = (config as ValidatorConfig).target_columns[0]?.trim();
    if (!target) {
      continue;
    }
    const targetConfig = nameToConfig.get(target);
    if (!targetConfig) {
      errors.push(`Validator ${config.name}: target '${target}' not found.`);
      continue;
    }
    if (targetConfig.kind !== "llm" || targetConfig.llm_type !== "code") {
      errors.push(`Validator ${config.name}: target '${target}' must be LLM Code.`);
      continue;
    }
    if (
      config.validator_type === "oxc" &&
      !VALIDATOR_OXC_CODE_LANGS.includes(
        (targetConfig.code_lang ?? "").trim() as ValidatorCodeLang,
      )
    ) {
      errors.push(
        `Validator ${config.name}: target '${target}' must use javascript/typescript/jsx/tsx.`,
      );
      continue;
    }
    if (
      config.validator_type === "oxc" &&
      !isOxcValidationMode(config.oxc_validation_mode)
    ) {
      errors.push(
        `Validator ${config.name}: oxc_validation_mode '${config.oxc_validation_mode}' is invalid.`,
      );
      continue;
    }
    if (
      config.validator_type === "oxc" &&
      !isOxcCodeShape(config.oxc_code_shape)
    ) {
      errors.push(
        `Validator ${config.name}: oxc_code_shape '${config.oxc_code_shape}' is invalid.`,
      );
      continue;
    }
    if (
      config.validator_type !== "oxc" &&
      (targetConfig.code_lang ?? "").trim() !== config.code_lang.trim()
    ) {
      errors.push(
        `Validator ${config.name}: code_lang '${config.code_lang}' must match target '${target}' (${targetConfig.code_lang ?? "unknown"}).`,
      );
    }
  }
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/processors.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { RecipeProcessorConfig } from "../types";

export function buildDefaultSchemaTransform(): RecipeProcessorConfig {
  return {
    id: "schema-transform-1",
    // biome-ignore lint/style/useNamingConvention: api schema
    processor_type: "schema_transform",
    name: "schema_transform",
    template: '{\n  "text": "{{ column_name }}"\n}',
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/reactflow-changes.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  Edge,
  EdgeChange,
  Node,
  NodeChange,
  XYPosition,
} from "@xyflow/react";

export function applyAuxNodeChanges<T extends Node>(
  changes: NodeChange<T>[],
  actions: {
    setAuxNodePosition: (id: string, position: XYPosition) => void;
  },
): void {
  for (const change of changes) {
    if (!("id" in change) || !change.id.startsWith("aux-")) {
      continue;
    }
    if (change.type !== "position") {
      continue;
    }
    const nextPosition = change.position ?? change.positionAbsolute;
    if (!nextPosition) {
      continue;
    }
    actions.setAuxNodePosition(change.id, nextPosition);
  }
}

export function filterNodeChangesByIds<T extends Node>(
  changes: NodeChange<T>[],
  ids: Set<string>,
): NodeChange<T>[] {
  return changes.filter(
    (change): change is NodeChange<T> => "id" in change && ids.has(change.id),
  );
}

export function filterEdgeChangesByIds(
  changes: EdgeChange<Edge>[],
  ids: Set<string>,
): EdgeChange<Edge>[] {
  return changes.filter(
    (change): change is EdgeChange<Edge> => "id" in change && ids.has(change.id),
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/recipe-studio-view.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { NodeConfig, SamplerConfig } from "../types";

export type DialogOptions = {
  categoryOptions: SamplerConfig[];
  modelConfigAliases: string[];
  modelProviderOptions: string[];
  toolProfileAliases: string[];
  datetimeOptions: string[];
};

export function buildDialogOptions(configList: NodeConfig[]): DialogOptions {
  const categoryOptions: SamplerConfig[] = [];
  const modelConfigAliases: string[] = [];
  const modelProviderOptions: string[] = [];
  const toolProfileAliases: string[] = [];
  const datetimeOptions: string[] = [];

  for (const config of configList) {
    if (config.kind === "sampler") {
      if (config.sampler_type === "category") {
        categoryOptions.push(config);
      }
      if (config.sampler_type === "datetime") {
        datetimeOptions.push(config.name);
      }
      continue;
    }
    if (config.kind === "model_config") {
      modelConfigAliases.push(config.name);
      continue;
    }
    if (config.kind === "model_provider") {
      modelProviderOptions.push(config.name);
      continue;
    }
    if (config.kind === "tool_config") {
      toolProfileAliases.push(config.name);
    }
  }

  return {
    categoryOptions,
    modelConfigAliases,
    modelProviderOptions,
    toolProfileAliases,
    datetimeOptions,
  };
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/refs.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

const JINJA_REF_RE = /{{\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*}}/g;
const JINJA_EXPR_RE = /{{\s*([^{}]+?)\s*}}/g;
const SIMPLE_JINJA_EXPR_RE = /^[a-zA-Z_][a-zA-Z0-9_.]*$/;
const PLAIN_JINJA_EXPR_RE = /^[a-zA-Z0-9_.\s-]+$/;
const NESTED_REFERENCE_ROOTS = new Set(["user"]);

function isValidNestedReference(expr: string, validSet: Set<string>): boolean {
  if (!expr.includes(".")) {
    return false;
  }
  const parts = expr.split(".").map((part) => part.trim()).filter(Boolean);
  if (parts.length < 2) {
    return false;
  }
  const root = parts[0];
  return validSet.has(root) && NESTED_REFERENCE_ROOTS.has(root);
}

function escapeRegExp(value: string): string {
  return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}

export function extractRefs(template: string): string[] {
  if (!template) {
    return [];
  }
  const refs = new Set<string>();
  for (const match of template.matchAll(JINJA_REF_RE)) {
    if (match[1]) {
      refs.add(match[1]);
    }
  }
  return Array.from(refs);
}

export function findInvalidJinjaReferences(
  template: string,
  validReferences: string[],
): string[] {
  if (!template) {
    return [];
  }
  const validSet = new Set(
    validReferences.map((name) => name.trim()).filter(Boolean),
  );
  const invalid = new Set<string>();

  for (const match of template.matchAll(JINJA_EXPR_RE)) {
    const expr = (match[1] ?? "").trim();
    if (!expr) {
      continue;
    }
    if (SIMPLE_JINJA_EXPR_RE.test(expr)) {
      if (!validSet.has(expr) && !isValidNestedReference(expr, validSet)) {
        invalid.add(expr);
      }
      continue;
    }
    if (PLAIN_JINJA_EXPR_RE.test(expr)) {
      invalid.add(expr);
    }
  }

  return Array.from(invalid);
}

export function replaceRef(
  template: string,
  from: string,
  to: string,
): string {
  if (!template || from === to) {
    return template;
  }
  const pattern = new RegExp(`{{\\s*${escapeRegExp(from)}\\s*}}`, "g");
  return template.replace(pattern, `{{ ${to} }}`);
}

export function removeRef(template: string, ref: string): string {
  if (!template) {
    return template;
  }
  const escaped = escapeRegExp(ref);
  const fullLine = new RegExp(`^\\s*{{\\s*${escaped}\\s*}}\\s*$`);
  const inline = new RegExp(`{{\\s*${escaped}\\s*}}`, "g");
  const next = template
    .split("\n")
    .flatMap((line) => {
      if (fullLine.test(line)) {
        return [];
      }
      return [line.replace(inline, "").replace(/\s+$/g, "")];
    })
    .join("\n");
  return next;
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/rf-node-dimensions.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { Node } from "@xyflow/react";

function parseDim(value: unknown): number | null {
  if (typeof value === "number" && Number.isFinite(value)) {
    return value;
  }
  if (typeof value === "string") {
    const parsed = Number.parseFloat(value);
    return Number.isFinite(parsed) ? parsed : null;
  }
  return null;
}

export function readNodeWidth(node: Node): number | null {
  return (
    parseDim(node.width) ??
    parseDim(node.style?.width) ??
    parseDim(node.measured?.width) ??
    null
  );
}

export function readNodeHeight(node: Node): number | null {
  return (
    parseDim(node.height) ??
    parseDim(node.style?.height) ??
    parseDim(node.measured?.height) ??
    null
  );
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/ui-tones.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export const RECIPE_STUDIO_NODE_TONES = {
  sampler:
    "bg-emerald-50 text-emerald-700 border-emerald-100 dark:bg-emerald-950/30 dark:text-emerald-300 dark:border-emerald-900/60",
  llm:
    "bg-sky-50 text-sky-700 border-sky-100 dark:bg-sky-950/30 dark:text-sky-300 dark:border-sky-900/60",
  validator:
    "bg-rose-50 text-rose-700 border-rose-100 dark:bg-rose-950/30 dark:text-rose-300 dark:border-rose-900/60",
  expression:
    "bg-indigo-50 text-indigo-700 border-indigo-100 dark:bg-indigo-950/30 dark:text-indigo-300 dark:border-indigo-900/60",
  note:
    "bg-violet-50 text-violet-700 border-violet-100 dark:bg-violet-950/30 dark:text-violet-300 dark:border-violet-900/60",
  seed:
    "bg-lime-50 text-lime-700 border-lime-100 dark:bg-lime-950/30 dark:text-lime-300 dark:border-lime-900/60",
  model_provider:
    "bg-amber-50 text-amber-700 border-amber-100 dark:bg-amber-950/30 dark:text-amber-300 dark:border-amber-900/60",
  model_config:
    "bg-orange-50 text-orange-700 border-orange-100 dark:bg-orange-950/30 dark:text-orange-300 dark:border-orange-900/60",
  tool_config:
    "bg-cyan-50 text-cyan-700 border-cyan-100 dark:bg-cyan-950/30 dark:text-cyan-300 dark:border-cyan-900/60",
} as const;

export const RECIPE_STUDIO_USER_NODE_TONE =
  "bg-amber-50 text-amber-700 border-amber-100 dark:bg-amber-950/30 dark:text-amber-300 dark:border-amber-900/60";

export const RECIPE_STUDIO_REFERENCE_BADGE_TONES = {
  user:
    "corner-squircle border-amber-500/25 bg-amber-500/10 font-mono text-[11px] text-amber-700 dark:text-amber-300",
  seed:
    "corner-squircle border-blue-500/25 bg-blue-500/10 font-mono text-[11px] text-blue-700 dark:text-blue-300",
  default: "corner-squircle font-mono text-[11px]",
} as const;

export const RECIPE_STUDIO_WARNING_BADGE_TONE =
  "border-amber-500/40 bg-amber-500/10 text-amber-700 hover:bg-amber-500/20 dark:text-amber-300";

export const RECIPE_STUDIO_WARNING_ICON_TONE =
  "text-amber-600 dark:text-amber-400";

export const RECIPE_STUDIO_ONBOARDING_SURFACE_TONE =
  "border-primary/20 bg-primary/[0.045]";

export const RECIPE_STUDIO_ONBOARDING_ICON_TONE =
  "bg-primary/10 text-primary";


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/validation.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { NodeConfig } from "../types";
import { isValidSex, parseAgeRange, parseIntNumber, parseNumber } from "./parse";
import { VALIDATOR_OXC_CODE_LANGS, VALIDATOR_SQL_CODE_LANGS } from "./validators/code-lang";
import { isOxcCodeShape } from "./validators/oxc-code-shape";
import { isOxcValidationMode } from "./validators/oxc-mode";

const TRACE_MODES = new Set(["none", "last_message", "all_messages"]);

// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: validation rules
export function getConfigErrors(config: NodeConfig | null): string[] {
  if (!config) {
    return [];
  }
  const errors: string[] = [];
  if (!config.name.trim()) {
    errors.push("Name is required.");
  }
  if (config.kind === "sampler") {
    if (config.sampler_type === "category") {
      const values = config.values ?? [];
      if (values.length < 2) {
        errors.push("Category needs at least 2 values.");
      }
      const weights = config.weights ?? [];
      const hasWeights = weights.some((weight) => weight !== null);
      if (hasWeights && weights.some((weight) => weight === null)) {
        errors.push("Weights must be set for all values.");
      }
      for (const [condition, params] of Object.entries(
        config.conditional_params ?? {},
      )) {
        if (!condition.trim()) {
          errors.push("Category conditional rule needs condition text.");
          continue;
        }
        const conditionalValues = (params.values ?? [])
          .map((value) => value.trim())
          .filter(Boolean);
        if (conditionalValues.length === 0) {
          errors.push(`Category conditional '${condition}' needs values.`);
          continue;
        }
        const conditionalWeights = params.weights ?? [];
        const hasConditionalWeights = conditionalWeights.some(
          (weight) => weight !== null,
        );
        if (
          hasConditionalWeights &&
          (conditionalWeights.length !== conditionalValues.length ||
            conditionalWeights.some((weight) => weight === null))
        ) {
          errors.push(
            `Category conditional '${condition}' weights must be set for all values.`,
          );
        }
      }
    }
    if (config.sampler_type === "uniform") {
      const low = parseNumber(config.low);
      const high = parseNumber(config.high);
      if (low === null || high === null) {
        errors.push("Uniform low/high must be numbers.");
      } else if (low >= high) {
        errors.push("Uniform low must be < high.");
      }
    }
    if (config.sampler_type === "gaussian") {
      const mean = parseNumber(config.mean);
      const std = parseNumber(config.std);
      if (mean === null || std === null) {
        errors.push("Gaussian mean/std must be numbers.");
      } else if (std <= 0) {
        errors.push("Gaussian std must be > 0.");
      }
    }
    if (config.sampler_type === "bernoulli") {
      const p = parseNumber(config.p);
      if (p === null) {
        errors.push("Bernoulli p must be a number.");
      } else if (p < 0 || p > 1) {
        errors.push("Bernoulli p must be between 0 and 1.");
      }
    }
    if (config.sampler_type === "datetime") {
      if (!config.datetime_unit) {
        errors.push("Datetime unit required.");
      }
      if (config.datetime_start && config.datetime_end) {
        const start = new Date(config.datetime_start).getTime();
        const end = new Date(config.datetime_end).getTime();
        if (!(Number.isFinite(start) && Number.isFinite(end))) {
          errors.push("Datetime start/end must be valid.");
        } else if (start >= end) {
          errors.push("Datetime start must be before end.");
        }
      }
    }
    if (config.sampler_type === "timedelta") {
      const min = parseNumber(config.dt_min);
      const max = parseNumber(config.dt_max);
      if (min === null || max === null) {
        errors.push("Timedelta dt_min/dt_max must be numbers.");
      } else if (min >= max) {
        errors.push("Timedelta dt_min must be < dt_max.");
      }
      if (!config.reference_column_name?.trim()) {
        errors.push("Timedelta reference datetime column required.");
      }
      if (!config.timedelta_unit) {
        errors.push("Timedelta unit required.");
      }
    }
    if (config.sampler_type === "subcategory" && !config.subcategory_parent) {
      errors.push("Subcategory needs a parent category column.");
    }
    if (
      config.sampler_type === "person" ||
      config.sampler_type === "person_from_faker"
    ) {
      if (config.person_sex?.trim()) {
        const normalized = config.person_sex.trim();
        if (!isValidSex(normalized)) {
          errors.push("Person sex must be Male or Female.");
        }
      }
      if (config.person_age_range?.trim()) {
        const parsed = parseAgeRange(config.person_age_range);
        if (!parsed) {
          errors.push("Person age range must be like 18-70.");
        }
      }
    }
  }
  if (config.kind === "llm") {
    if (!config.model_alias.trim()) {
      errors.push("Choose a saved model.");
    }
    if (!config.prompt.trim()) {
      errors.push("Prompt is required.");
    }
    if (config.llm_type === "code" && !config.code_lang) {
      errors.push("Code language is required.");
    }
    if (config.llm_type === "structured") {
      if (!config.output_format?.trim()) {
        errors.push("Output format is required.");
      } else {
        try {
          JSON.parse(config.output_format);
        } catch {
          errors.push("Output format must be valid JSON.");
        }
      }
    }
    if (config.llm_type === "judge") {
      const scores = config.scores ?? [];
      if (scores.length === 0) {
        errors.push("Add at least one scoring rule.");
      }
      for (const score of scores) {
        if (!score.name.trim()) {
          errors.push("Each scoring rule needs a name.");
        }
        if (!score.description.trim()) {
          errors.push("Each scoring rule needs a description.");
        }
        const options = score.options ?? [];
        if (options.length === 0) {
          errors.push(`Scoring rule ${score.name || "Untitled"} needs options.`);
        }
        for (const option of options) {
          if (!option.value.trim() || !option.description.trim()) {
            errors.push(
              `Scoring rule ${score.name || "Untitled"} needs both a value and a description for each option.`,
            );
            break;
          }
        }
      }
    }
    if (config.image_context?.enabled) {
      if (!config.image_context.column_name.trim()) {
        errors.push("Image context column is required.");
      }
    }
    if (
      config.with_trace &&
      !TRACE_MODES.has(config.with_trace)
    ) {
      errors.push("Trace mode must be none, last_message, or all_messages.");
    }
  }
  if (config.kind === "expression") {
    if (!config.expr.trim()) {
      errors.push("Expression is required.");
    }
  }
  if (config.kind === "tool_config") {
    if (config.mcp_providers.length === 0) {
      errors.push("Add at least one tool server.");
    }
    const serverNames = new Set<string>();
    for (const provider of config.mcp_providers) {
      const name = provider.name.trim();
      if (!name) {
        errors.push("Each tool server needs a name.");
        continue;
      }
      if (serverNames.has(name)) {
        errors.push(`Tool server names must be unique: ${name}.`);
      }
      serverNames.add(name);
      if (provider.provider_type === "stdio") {
        if (!provider.command?.trim()) {
          errors.push(`Tool server ${name}: add a command.`);
        }
      } else if (!provider.endpoint?.trim()) {
        errors.push(`Tool server ${name}: add an endpoint.`);
      }
    }
    const maxTurnsRaw = config.max_tool_call_turns?.trim();
    if (
      maxTurnsRaw &&
      (!Number.isFinite(Number(maxTurnsRaw)) || Number(maxTurnsRaw) < 1)
    ) {
      errors.push("Max tool-use turns must be 1 or more.");
    }
    const timeoutRaw = config.timeout_sec?.trim();
    if (
      timeoutRaw &&
      (!Number.isFinite(Number(timeoutRaw)) || Number(timeoutRaw) <= 0)
    ) {
      errors.push("Timeout must be > 0.");
    }
  }
  if (config.kind === "validator") {
    const targets = (config.target_columns ?? [])
      .map((value) => value.trim())
      .filter(Boolean);
    if (targets.length === 0) {
      errors.push("Choose the code step to check.");
    }
    const batch = parseIntNumber(config.batch_size);
    if (batch === null || batch < 1) {
      errors.push("Batch size must be an integer >= 1.");
    }
    if (!config.code_lang.trim()) {
      errors.push("Choose a code language for this check.");
    } else if (config.validator_type === "oxc") {
      if (!VALIDATOR_OXC_CODE_LANGS.includes(config.code_lang)) {
        errors.push("This JS/TS check only supports JavaScript or TypeScript.");
      }
      if (!isOxcValidationMode(config.oxc_validation_mode)) {
        errors.push("Choose whether to check syntax, lint rules, or both.");
      }
      if (!isOxcCodeShape(config.oxc_code_shape)) {
        errors.push("Choose whether this code is a full file or a snippet.");
      }
    } else if (
      config.code_lang !== "python" &&
      !VALIDATOR_SQL_CODE_LANGS.includes(config.code_lang)
    ) {
      errors.push("This check supports Python or SQL.");
    }
  }
  if (config.kind === "seed") {
    const seedSourceType = config.seed_source_type ?? "hf";
    if (seedSourceType === "hf" && !config.hf_repo_id.trim()) {
      errors.push("Choose a Hugging Face dataset.");
    }
    if (!config.hf_path.trim()) {
      errors.push("Load the source-data preview first.");
    }
    if (
      seedSourceType === "hf" &&
      config.hf_endpoint?.trim() &&
      !config.hf_endpoint.trim().startsWith("http")
    ) {
      errors.push("HF endpoint must start with http.");
    }
    if (seedSourceType === "unstructured") {
      if (config.drop && (config.seed_columns?.length ?? 0) === 0) {
        errors.push("Load the available fields before hiding any from the final dataset.");
      }
      const chunkSizeRaw = Number(config.unstructured_chunk_size);
      const chunkOverlapRaw = Number(config.unstructured_chunk_overlap);
      if (!Number.isFinite(chunkSizeRaw) || Math.floor(chunkSizeRaw) < 1) {
        errors.push("Chunk size must be an integer >= 1.");
      }
      if (!Number.isFinite(chunkOverlapRaw) || Math.floor(chunkOverlapRaw) < 0) {
        errors.push("Chunk overlap must be an integer >= 0.");
      }
      if (
        Number.isFinite(chunkSizeRaw) &&
        Number.isFinite(chunkOverlapRaw) &&
        Math.floor(chunkOverlapRaw) >= Math.floor(chunkSizeRaw)
      ) {
        errors.push("Chunk overlap must be less than chunk size.");
      }
    } else {
      const selectedDropColumns = (config.seed_drop_columns ?? [])
        .map((value) => value.trim())
        .filter(Boolean);
      if (selectedDropColumns.length > 0 && (config.seed_columns?.length ?? 0) === 0) {
        errors.push("Load the available fields before hiding any from the final dataset.");
      }
    }

    if (config.selection_type === "index_range") {
      const start = parseIntNumber(config.selection_start);
      const end = parseIntNumber(config.selection_end);
      if (start === null || end === null) {
        errors.push("Index range start/end must be integers.");
      } else {
        if (start < 0 || end < 0) {
          errors.push("Index range start/end must be >= 0.");
        }
        if (end < start) {
          errors.push("Index range end must be >= start.");
        }
      }
    }
    if (config.selection_type === "partition_block") {
      const index = parseIntNumber(config.selection_index);
      const parts = parseIntNumber(config.selection_num_partitions);
      if (index === null || parts === null) {
        errors.push("Partition index/num_partitions must be integers.");
      } else {
        if (index < 0) errors.push("Partition index must be >= 0.");
        if (parts < 1) errors.push("Partition num_partitions must be >= 1.");
        if (parts >= 1 && index >= parts) {
          errors.push("Partition index must be < num_partitions.");
        }
      }
    }
  }
  return errors;
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/validators/code-lang.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ValidatorCodeLang } from "../../types";

export const VALIDATOR_OXC_CODE_LANGS: ValidatorCodeLang[] = [
  "javascript",
  "typescript",
  "jsx",
  "tsx",
];

export const VALIDATOR_SQL_CODE_LANGS: ValidatorCodeLang[] = [
  "sql:sqlite",
  "sql:postgres",
  "sql:mysql",
  "sql:tsql",
  "sql:bigquery",
  "sql:ansi",
];

const VALIDATOR_CODE_LANG_SET = new Set<ValidatorCodeLang>([
  ...VALIDATOR_OXC_CODE_LANGS,
  "python",
  ...VALIDATOR_SQL_CODE_LANGS,
]);

export function isValidatorCodeLang(value: string): value is ValidatorCodeLang {
  return VALIDATOR_CODE_LANG_SET.has(value as ValidatorCodeLang);
}

export function normalizeValidatorCodeLang(
  value: unknown,
): ValidatorCodeLang {
  const raw = typeof value === "string" ? value.trim() : "";
  if (!raw) {
    return "python";
  }
  if (VALIDATOR_OXC_CODE_LANGS.includes(raw as ValidatorCodeLang)) {
    return raw as ValidatorCodeLang;
  }
  if (raw === "python") {
    return "python";
  }
  if (raw.startsWith("sql:")) {
    if (VALIDATOR_SQL_CODE_LANGS.includes(raw as ValidatorCodeLang)) {
      return raw as ValidatorCodeLang;
    }
    return "sql:sqlite";
  }
  return "python";
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/validators/oxc-code-shape.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { OxcCodeShape } from "../../types";

export const OXC_CODE_SHAPES: OxcCodeShape[] = [
  "auto",
  "module",
  "snippet",
];

export function isOxcCodeShape(value: string): value is OxcCodeShape {
  return OXC_CODE_SHAPES.includes(value as OxcCodeShape);
}

export function normalizeOxcCodeShape(value: unknown): OxcCodeShape {
  if (typeof value !== "string") {
    return "auto";
  }
  const normalized = value.trim().toLowerCase();
  if (isOxcCodeShape(normalized)) {
    return normalized;
  }
  return "auto";
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/validators/oxc-mode.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { OxcValidationMode } from "../../types";

export const OXC_VALIDATION_MODES: OxcValidationMode[] = [
  "syntax",
  "lint",
  "syntax+lint",
];

export function isOxcValidationMode(value: string): value is OxcValidationMode {
  return OXC_VALIDATION_MODES.includes(value as OxcValidationMode);
}

export function normalizeOxcValidationMode(value: unknown): OxcValidationMode {
  if (typeof value !== "string") {
    return "syntax";
  }
  const normalized = value.trim().toLowerCase();
  if (isOxcValidationMode(normalized)) {
    return normalized;
  }
  return "syntax";
}


================================================
FILE: studio/frontend/src/features/recipe-studio/utils/variables.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { NodeConfig } from "../types";

export type AvailableVariableSource = "column" | "seed";

export type AvailableVariableEntry = {
  name: string;
  source: AvailableVariableSource;
};

function getStructuredRefs(llmName: string, outputFormat: string): string[] {
  try {
    const schema = JSON.parse(outputFormat);
    if (!(schema?.properties && typeof schema.properties === "object")) {
      return [];
    }
    return Object.keys(schema.properties).map((key) => `${llmName}.${key}`);
  } catch {
    return [];
  }
}

export function getAvailableVariableEntries(
  configs: Record<string, NodeConfig>,
  currentId: string,
): AvailableVariableEntry[] {
  const vars: AvailableVariableEntry[] = [];

  for (const config of Object.values(configs)) {
    if (config.id === currentId) {
      continue;
    }
    if (
      config.kind === "model_provider" ||
      config.kind === "model_config" ||
      config.kind === "tool_config"
    ) {
      continue;
    }

    if (config.kind === "sampler") {
      vars.push({ name: config.name, source: "column" });
      continue;
    }

    if (config.kind === "expression") {
      vars.push({ name: config.name, source: "column" });
      continue;
    }

    if (config.kind === "validator") {
      vars.push({ name: config.name, source: "column" });
      continue;
    }

    if (config.kind === "seed") {
      for (const col of config.seed_columns ?? []) {
        const name = col.trim();
        if (!name) continue;
        vars.push({ name, source: "seed" });
      }
      continue;
    }

    if (config.kind !== "llm") {
      continue;
    }

    vars.push({ name: config.name, source: "column" });
    if (config.llm_type !== "structured" || !config.output_format) {
      continue;
    }
    vars.push(
      ...getStructuredRefs(config.name, config.output_format).map((name) => ({
        name,
        source: "column" as const,
      })),
    );
  }

  return vars;
}

export function getAvailableVariables(
  configs: Record<string, NodeConfig>,
  currentId: string,
): string[] {
  return getAvailableVariableEntries(configs, currentId).map((entry) => entry.name);
}


================================================
FILE: studio/frontend/src/features/studio/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export { StudioPage } from "./studio-page";


================================================
FILE: studio/frontend/src/features/studio/sections/charts/chart-preferences-store.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { create } from "zustand";
import type { OutlierMode, ScaleMode } from "./types";
import { DEFAULT_VISIBLE_POINTS, clamp } from "./utils";

const DEFAULT_WINDOW_SIZE = Math.max(
  24,
  Math.floor(DEFAULT_VISIBLE_POINTS / 2),
);

type ChartPreferencesState = {
  availableSteps: number;
  windowSize: number | null;
  smoothing: number;
  showRaw: boolean;
  showSmoothed: boolean;
  showAvgLine: boolean;
  lossScale: ScaleMode;
  lrScale: ScaleMode;
  gradScale: ScaleMode;
  lossOutlierMode: OutlierMode;
  gradOutlierMode: OutlierMode;
  lrOutlierMode: OutlierMode;
  setAvailableSteps: (value: number) => void;
  setWindowSize: (value: number | null) => void;
  setSmoothing: (value: number) => void;
  setShowRaw: (value: boolean) => void;
  setShowSmoothed: (value: boolean) => void;
  setShowAvgLine: (value: boolean) => void;
  setLossScale: (value: ScaleMode) => void;
  setLrScale: (value: ScaleMode) => void;
  setGradScale: (value: ScaleMode) => void;
  setLossOutlierMode: (value: OutlierMode) => void;
  setGradOutlierMode: (value: OutlierMode) => void;
  setLrOutlierMode: (value: OutlierMode) => void;
  resetPreferences: () => void;
};

const defaultPreferences = {
  windowSize: DEFAULT_WINDOW_SIZE as number | null,
  smoothing: 0.6,
  showRaw: true,
  showSmoothed: true,
  showAvgLine: true,
  lossScale: "linear" as ScaleMode,
  lrScale: "linear" as ScaleMode,
  gradScale: "linear" as ScaleMode,
  lossOutlierMode: "none" as OutlierMode,
  gradOutlierMode: "none" as OutlierMode,
  lrOutlierMode: "none" as OutlierMode,
};

export const useChartPreferencesStore = create<ChartPreferencesState>(
  (set) => ({
    availableSteps: 0,
    ...defaultPreferences,
    setAvailableSteps: (value) =>
      set((state) => {
        const availableSteps = Math.max(0, Math.round(value));
        if (state.windowSize == null || availableSteps <= 0) {
          return { availableSteps };
        }

        if (state.windowSize >= availableSteps) {
          return { availableSteps, windowSize: null };
        }

        return {
          availableSteps,
          windowSize: clamp(Math.round(state.windowSize), 1, availableSteps),
        };
      }),
    setWindowSize: (value) =>
      set((state) => {
        if (value == null || state.availableSteps <= 0) {
          return { windowSize: null };
        }

        const next = clamp(Math.round(value), 1, state.availableSteps);
        return { windowSize: next >= state.availableSteps ? null : next };
      }),
    setSmoothing: (value) => set({ smoothing: clamp(value, 0, 0.9) }),
    setShowRaw: (value) => set({ showRaw: value }),
    setShowSmoothed: (value) => set({ showSmoothed: value }),
    setShowAvgLine: (value) => set({ showAvgLine: value }),
    setLossScale: (value) => set({ lossScale: value }),
    setLrScale: (value) => set({ lrScale: value }),
    setGradScale: (value) => set({ gradScale: value }),
    setLossOutlierMode: (value) => set({ lossOutlierMode: value }),
    setGradOutlierMode: (value) => set({ gradOutlierMode: value }),
    setLrOutlierMode: (value) => set({ lrOutlierMode: value }),
    resetPreferences: () => set({ ...defaultPreferences }),
  }),
);


================================================
FILE: studio/frontend/src/features/studio/sections/charts/chart-settings-sheet.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import { Label } from "@/components/ui/label";
import { Separator } from "@/components/ui/separator";
import {
  Sheet,
  SheetContent,
  SheetDescription,
  SheetFooter,
  SheetHeader,
  SheetTitle,
} from "@/components/ui/sheet";
import { Slider } from "@/components/ui/slider";
import { Switch } from "@/components/ui/switch";
import { Settings02Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { type ReactElement, useState } from "react";
import { useShallow } from "zustand/react/shallow";
import { useChartPreferencesStore } from "./chart-preferences-store";
import type { OutlierMode, ScaleMode } from "./types";

function ChoiceButtons<T extends string>({
  options,
  value,
  onChange,
}: {
  options: { label: string; value: T }[];
  value: T;
  onChange: (value: T) => void;
}): ReactElement {
  return (
    <div className="flex flex-wrap gap-2">
      {options.map((option) => (
        <Button
          key={option.value}
          type="button"
          size="xs"
          variant={value === option.value ? "secondary" : "outline"}
          onClick={() => onChange(option.value)}
        >
          {option.label}
        </Button>
      ))}
    </div>
  );
}

function SettingRow({
  label,
  description,
  control,
}: {
  label: string;
  description?: string;
  control: ReactElement;
}): ReactElement {
  return (
    <div className="flex items-start justify-between gap-4">
      <div className="min-w-0">
        <Label className="text-sm">{label}</Label>
        {description ? (
          <p className="mt-1 text-xs text-muted-foreground">{description}</p>
        ) : null}
      </div>
      <div className="shrink-0">{control}</div>
    </div>
  );
}

function ScaleSection({
  title,
  scale,
  setScale,
  outlierMode,
  setOutlierMode,
}: {
  title: string;
  scale: ScaleMode;
  setScale: (value: ScaleMode) => void;
  outlierMode: OutlierMode;
  setOutlierMode: (value: OutlierMode) => void;
}): ReactElement {
  return (
    <div className="space-y-3">
      <div>
        <p className="text-sm font-medium">{title}</p>
        <p className="text-xs text-muted-foreground">Scale and cleanup</p>
      </div>
      <ChoiceButtons
        options={[
          { label: "Linear", value: "linear" },
          { label: "Log", value: "log" },
        ]}
        value={scale}
        onChange={setScale}
      />
      <ChoiceButtons
        options={[
          { label: "No clip", value: "none" },
          { label: "Clip p99", value: "p99" },
          { label: "Clip p95", value: "p95" },
        ]}
        value={outlierMode}
        onChange={setOutlierMode}
      />
    </div>
  );
}

export function ChartSettingsSheet(): ReactElement {
  const [open, setOpen] = useState(false);
  const {
    availableSteps,
    windowSize,
    smoothing,
    showRaw,
    showSmoothed,
    showAvgLine,
    lossScale,
    lrScale,
    gradScale,
    lossOutlierMode,
    gradOutlierMode,
    lrOutlierMode,
    setWindowSize,
    setSmoothing,
    setShowRaw,
    setShowSmoothed,
    setShowAvgLine,
    setLossScale,
    setLrScale,
    setGradScale,
    setLossOutlierMode,
    setGradOutlierMode,
    setLrOutlierMode,
    resetPreferences,
  } = useChartPreferencesStore(
    useShallow((state) => ({
      availableSteps: state.availableSteps,
      windowSize: state.windowSize,
      smoothing: state.smoothing,
      showRaw: state.showRaw,
      showSmoothed: state.showSmoothed,
      showAvgLine: state.showAvgLine,
      lossScale: state.lossScale,
      lrScale: state.lrScale,
      gradScale: state.gradScale,
      lossOutlierMode: state.lossOutlierMode,
      gradOutlierMode: state.gradOutlierMode,
      lrOutlierMode: state.lrOutlierMode,
      setWindowSize: state.setWindowSize,
      setSmoothing: state.setSmoothing,
      setShowRaw: state.setShowRaw,
      setShowSmoothed: state.setShowSmoothed,
      setShowAvgLine: state.setShowAvgLine,
      setLossScale: state.setLossScale,
      setLrScale: state.setLrScale,
      setGradScale: state.setGradScale,
      setLossOutlierMode: state.setLossOutlierMode,
      setGradOutlierMode: state.setGradOutlierMode,
      setLrOutlierMode: state.setLrOutlierMode,
      resetPreferences: state.resetPreferences,
    })),
  );

  const minWindow = Math.min(10, Math.max(1, availableSteps));
  const effectiveWindowSize =
    windowSize == null ? Math.max(availableSteps, 1) : windowSize;
  const showingAll =
    availableSteps > 0 &&
    (windowSize == null || effectiveWindowSize >= availableSteps);
  const sliderMax = Math.max(minWindow, availableSteps || 1);

  return (
    <>
      <Button
        type="button"
        variant="ghost"
        size="icon-sm"
        className="rounded-full text-muted-foreground hover:bg-muted hover:text-foreground"
        onClick={() => setOpen(true)}
        aria-label="Open chart settings"
      >
        <HugeiconsIcon icon={Settings02Icon} className="size-4" />
      </Button>
      <Sheet open={open} onOpenChange={setOpen}>
        <SheetContent
          className="w-full sm:max-w-md"
          overlayClassName="bg-transparent backdrop-blur-0"
        >
          <SheetHeader className="pb-4">
            <SheetTitle>Chart Settings</SheetTitle>
            <SheetDescription>
              Tune chart presentation while training keeps running.
            </SheetDescription>
          </SheetHeader>
          <div className="flex-1 space-y-6 overflow-y-auto px-6 pb-6">
            <div className="space-y-3">
              <div>
                <p className="text-sm font-medium">View window</p>
                <p className="text-xs text-muted-foreground">
                  Show latest steps only or the full history.
                </p>
              </div>
              <div className="space-y-2">
                <div className="flex items-center justify-between text-xs text-muted-foreground">
                  <span>Window</span>
                  <span className="tabular-nums">
                    {showingAll ? "All" : effectiveWindowSize}
                  </span>
                </div>
                <Slider
                  value={[effectiveWindowSize]}
                  onValueChange={([value]) => setWindowSize(value)}
                  min={minWindow}
                  max={sliderMax}
                  step={1}
                  disabled={availableSteps <= 1}
                />
              </div>
            </div>
            <Separator />
            <div className="space-y-4">
              <div>
                <p className="text-sm font-medium">Training loss</p>
                <p className="text-xs text-muted-foreground">
                  Control overlays and EMA smoothing.
                </p>
              </div>
              <div className="space-y-2">
                <div className="flex items-center justify-between text-xs text-muted-foreground">
                  <span>Smoothing</span>
                  <span className="tabular-nums">{smoothing.toFixed(2)}</span>
                </div>
                <Slider
                  value={[smoothing]}
                  onValueChange={([value]) => setSmoothing(value)}
                  min={0}
                  max={0.9}
                  step={0.01}
                />
                <p className="text-[11px] text-muted-foreground">
                  Move right for more smoothing. `0` = raw.
                </p>
              </div>
              <SettingRow
                label="Show raw loss"
                control={
                  <Switch checked={showRaw} onCheckedChange={setShowRaw} />
                }
              />
              <SettingRow
                label="Show smoothed loss"
                control={
                  <Switch
                    checked={showSmoothed}
                    onCheckedChange={setShowSmoothed}
                  />
                }
              />
              <SettingRow
                label="Show average line"
                control={
                  <Switch
                    checked={showAvgLine}
                    onCheckedChange={setShowAvgLine}
                  />
                }
              />
            </div>
            <Separator />
            <ScaleSection
              title="Loss axis"
              scale={lossScale}
              setScale={setLossScale}
              outlierMode={lossOutlierMode}
              setOutlierMode={setLossOutlierMode}
            />
            <Separator />
            <ScaleSection
              title="Gradient norm axis"
              scale={gradScale}
              setScale={setGradScale}
              outlierMode={gradOutlierMode}
              setOutlierMode={setGradOutlierMode}
            />
            <Separator />
            <ScaleSection
              title="Learning rate axis"
              scale={lrScale}
              setScale={setLrScale}
              outlierMode={lrOutlierMode}
              setOutlierMode={setLrOutlierMode}
            />
          </div>
          <SheetFooter className="mt-0 border-t border-border/60 bg-background/70 sm:flex-row sm:justify-between">
            <Button
              type="button"
              variant="outline"
              size="sm"
              onClick={resetPreferences}
            >
              Reset defaults
            </Button>
            <Button type="button" size="sm" onClick={() => setOpen(false)}>
              Done
            </Button>
          </SheetFooter>
        </SheetContent>
      </Sheet>
    </>
  );
}


================================================
FILE: studio/frontend/src/features/studio/sections/charts/eval-loss-chart-card.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import {
  ChartContainer,
  ChartLegend,
  ChartLegendContent,
  ChartTooltip,
  ChartTooltipContent,
} from "@/components/ui/chart";
import type { ChartConfig } from "@/components/ui/chart";
import { ChartAverageIcon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import type { ReactElement } from "react";
import { CartesianGrid, Line, LineChart, XAxis, YAxis } from "recharts";
import {
  CHART_CONTAINER_CLASS,
  DEFAULT_CHART_MARGIN,
  DEFAULT_Y_AXIS_WIDTH,
  formatAxisMetric,
  formatMetric,
  formatStepTick,
  placeholderEvalData,
} from "./utils";

const evalLossConfig = {
  loss: { label: "Eval Loss", color: "#ef4444" },
} satisfies ChartConfig;

export function EvalLossChartCard({
  data,
  domain,
  ticks,
  isTraining,
  evalEnabled,
}: {
  data: { step: number; loss: number }[];
  domain: [number, number];
  ticks?: number[];
  isTraining: boolean;
  evalEnabled: boolean;
}): ReactElement {
  return (
    <Card data-tour="studio-eval-loss" size="sm">
      <CardHeader>
        <CardTitle className={`text-sm${data.length > 0 ? "" : " text-muted-foreground"}`}>
          Eval Loss
        </CardTitle>
      </CardHeader>
      <CardContent>
        {data.length > 0 ? (
          <ChartContainer config={evalLossConfig} className={CHART_CONTAINER_CLASS}>
            <LineChart
              data={data}
              accessibilityLayer={true}
              margin={DEFAULT_CHART_MARGIN}
            >
              <CartesianGrid vertical={false} strokeDasharray="3 3" />
              <XAxis
                dataKey="step"
                type="number"
                domain={["dataMin", "dataMax"]}
                ticks={ticks}
                allowDataOverflow={true}
                allowDecimals={false}
                minTickGap={28}
                tickLine={false}
                axisLine={false}
                tickMargin={8}
                fontSize={10}
                tickFormatter={(value) => formatStepTick(Number(value))}
                interval="preserveStartEnd"
              />
              <YAxis
                domain={domain}
                allowDataOverflow={true}
                tickLine={false}
                axisLine={false}
                tickMargin={8}
                tickCount={5}
                fontSize={10}
                width={DEFAULT_Y_AXIS_WIDTH}
                tickFormatter={(value) => formatAxisMetric(Number(value))}
              />
              <ChartTooltip
                content={
                  <ChartTooltipContent
                    labelFormatter={(_value, payload) =>
                      `Step ${payload?.[0]?.payload?.step ?? ""}`
                    }
                    formatter={(_value, _name, item) => [
                      formatMetric(Number(item?.payload?.loss)),
                      "Eval Loss",
                    ]}
                  />
                }
              />
              <Line
                type="monotone"
                dataKey="loss"
                stroke="var(--color-loss)"
                strokeWidth={2}
                dot={{ r: 3, strokeWidth: 0, fill: "#ef4444" }}
                activeDot={{ r: 4, strokeWidth: 0 }}
                connectNulls={true}
                isAnimationActive={false}
              />
              <ChartLegend content={<ChartLegendContent />} />
            </LineChart>
          </ChartContainer>
        ) : (
          <div className="relative">
            <ChartContainer
              config={evalLossConfig}
              className={`${CHART_CONTAINER_CLASS} blur`}
            >
              <LineChart
                data={placeholderEvalData}
                accessibilityLayer={true}
                margin={DEFAULT_CHART_MARGIN}
              >
                <CartesianGrid vertical={false} strokeDasharray="3 3" />
                <XAxis
                  dataKey="step"
                  type="number"
                  domain={["dataMin", "dataMax"]}
                  tickLine={false}
                  axisLine={false}
                  tickMargin={8}
                  fontSize={10}
                  interval="preserveStartEnd"
                />
                <YAxis
                  tickLine={false}
                  axisLine={false}
                  tickMargin={8}
                  tickCount={5}
                  fontSize={10}
                  width={DEFAULT_Y_AXIS_WIDTH}
                />
                <Line
                  type="monotone"
                  dataKey="loss"
                  stroke="var(--color-loss)"
                  strokeWidth={2}
                  dot={false}
                  isAnimationActive={false}
                />
              </LineChart>
            </ChartContainer>
            <div className="absolute inset-0 flex flex-col items-center justify-center gap-1">
              <HugeiconsIcon
                icon={ChartAverageIcon}
                className="size-5 text-muted-foreground/50"
              />
              <p className="text-sm font-medium text-muted-foreground">
                {isTraining && evalEnabled
                  ? "Waiting for first evaluation step…"
                  : "Evaluation not configured"}
              </p>
              <p className="text-xs text-muted-foreground/60">
                {isTraining && evalEnabled
                  ? "Chart will appear once eval_steps is reached"
                  : "Set eval dataset & eval_steps to track eval loss"}
              </p>
            </div>
          </div>
        )}
      </CardContent>
    </Card>
  );
}


================================================
FILE: studio/frontend/src/features/studio/sections/charts/grad-norm-chart-card.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import {
  ChartContainer,
  ChartLegend,
  ChartLegendContent,
  ChartTooltip,
  ChartTooltipContent,
} from "@/components/ui/chart";
import type { ChartConfig } from "@/components/ui/chart";
import type { ReactElement } from "react";
import { CartesianGrid, Line, LineChart, XAxis, YAxis } from "recharts";
import type { ScaleMode } from "./types";
import {
  CHART_SYNC_ID,
  CHART_CONTAINER_CLASS,
  DEFAULT_CHART_MARGIN,
  DEFAULT_Y_AXIS_WIDTH,
  formatAxisMetric,
  formatMetric,
  formatStepTick,
  fromLog1p,
} from "./utils";

const gradNormConfig = {
  displayGradNorm: { label: "Grad Norm", color: "#f97316" },
} satisfies ChartConfig;

interface GradNormPoint {
  step: number;
  gradNorm: number;
  displayGradNorm: number;
}

export function GradNormChartCard({
  data,
  domain,
  visibleStepDomain,
  xAxisTicks,
  scale,
}: {
  data: GradNormPoint[];
  domain: [number, number];
  visibleStepDomain: [number, number];
  xAxisTicks: number[];
  scale: ScaleMode;
}): ReactElement {
  const showPoint = data.length <= 1 ? { r: 3, strokeWidth: 0 } : false;

  return (
    <Card size="sm">
      <CardHeader>
        <CardTitle className="text-sm">Gradient Norm</CardTitle>
      </CardHeader>
      <CardContent>
        <ChartContainer config={gradNormConfig} className={CHART_CONTAINER_CLASS}>
          <LineChart
            data={data}
            syncId={CHART_SYNC_ID}
            syncMethod="value"
            accessibilityLayer={true}
            margin={DEFAULT_CHART_MARGIN}
          >
            <CartesianGrid vertical={false} strokeDasharray="3 3" />
            <XAxis
              dataKey="step"
              type="number"
              domain={visibleStepDomain}
              ticks={xAxisTicks}
              allowDataOverflow={true}
              allowDecimals={false}
              minTickGap={28}
              tickLine={false}
              axisLine={false}
              tickMargin={8}
              fontSize={10}
              tickFormatter={(value) => formatStepTick(Number(value))}
              interval="preserveStartEnd"
            />
            <YAxis
              domain={domain}
              allowDataOverflow={true}
              tickLine={false}
              axisLine={false}
              tickMargin={8}
              tickCount={5}
              fontSize={10}
              width={DEFAULT_Y_AXIS_WIDTH}
              tickFormatter={(value) => {
                const num = Number(value);
                if (!Number.isFinite(num)) {
                  return "0";
                }
                const shown = scale === "log" ? fromLog1p(num) : num;
                return formatAxisMetric(shown);
              }}
            />
            <ChartTooltip
              content={
                <ChartTooltipContent
                  labelFormatter={(_value, payload) =>
                    `Step ${payload?.[0]?.payload?.step ?? ""}`
                  }
                  formatter={(_value, _name, item) => {
                    const raw = Number(item?.payload?.gradNorm);
                    return [formatMetric(raw), "Grad Norm"];
                  }}
                />
              }
            />
            <Line
              type="linear"
              dataKey="displayGradNorm"
              stroke="var(--color-displayGradNorm)"
              strokeWidth={2}
              dot={showPoint}
              activeDot={{ r: 3, strokeWidth: 0 }}
              connectNulls={true}
              strokeLinecap="round"
              strokeLinejoin="round"
              isAnimationActive={false}
            />
            <ChartLegend content={<ChartLegendContent />} />
          </LineChart>
        </ChartContainer>
      </CardContent>
    </Card>
  );
}


================================================
FILE: studio/frontend/src/features/studio/sections/charts/learning-rate-chart-card.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import {
  ChartContainer,
  ChartLegend,
  ChartLegendContent,
  ChartTooltip,
  ChartTooltipContent,
} from "@/components/ui/chart";
import type { ChartConfig } from "@/components/ui/chart";
import type { ReactElement } from "react";
import { CartesianGrid, Line, LineChart, XAxis, YAxis } from "recharts";
import type { ScaleMode } from "./types";
import {
  CHART_CONTAINER_CLASS,
  CHART_SYNC_ID,
  DEFAULT_CHART_MARGIN,
  DEFAULT_Y_AXIS_WIDTH,
  formatStepTick,
  fromLog1p,
} from "./utils";

const lrConfig = {
  displayLr: { label: "LR", color: "#8b5cf6" },
} satisfies ChartConfig;

interface LearningRatePoint {
  step: number;
  lr: number;
  displayLr: number;
}

export function LearningRateChartCard({
  data,
  domain,
  visibleStepDomain,
  xAxisTicks,
  scale,
}: {
  data: LearningRatePoint[];
  domain: [number, number];
  visibleStepDomain: [number, number];
  xAxisTicks: number[];
  scale: ScaleMode;
}): ReactElement {
  const showPoint = data.length <= 1 ? { r: 3, strokeWidth: 0 } : false;

  return (
    <Card size="sm">
      <CardHeader>
        <CardTitle className="text-sm">Learning Rate</CardTitle>
      </CardHeader>
      <CardContent>
        <ChartContainer config={lrConfig} className={CHART_CONTAINER_CLASS}>
          <LineChart
            data={data}
            syncId={CHART_SYNC_ID}
            syncMethod="value"
            accessibilityLayer={true}
            margin={DEFAULT_CHART_MARGIN}
          >
            <CartesianGrid vertical={false} strokeDasharray="3 3" />
            <XAxis
              dataKey="step"
              type="number"
              domain={visibleStepDomain}
              ticks={xAxisTicks}
              allowDataOverflow={true}
              allowDecimals={false}
              minTickGap={28}
              tickLine={false}
              axisLine={false}
              tickMargin={8}
              fontSize={10}
              tickFormatter={(value) => formatStepTick(Number(value))}
              interval="preserveStartEnd"
            />
            <YAxis
              domain={domain}
              allowDataOverflow={true}
              tickLine={false}
              axisLine={false}
              tickMargin={8}
              tickCount={5}
              fontSize={10}
              width={DEFAULT_Y_AXIS_WIDTH}
              tickFormatter={(value) => {
                const num = Number(value);
                if (!Number.isFinite(num)) {
                  return "0e+0";
                }
                const shown = scale === "log" ? fromLog1p(num) : num;
                return shown.toExponential(0);
              }}
            />
            <ChartTooltip
              content={
                <ChartTooltipContent
                  labelFormatter={(_value, payload) =>
                    `Step ${payload?.[0]?.payload?.step ?? ""}`
                  }
                  formatter={(_value, _name, item) => {
                    const raw = Number(item?.payload?.lr);
                    return [
                      Number.isFinite(raw) ? raw.toExponential(3) : "0e+0",
                      "LR",
                    ];
                  }}
                />
              }
            />
            <Line
              type="linear"
              dataKey="displayLr"
              stroke="var(--color-displayLr)"
              strokeWidth={2}
              dot={showPoint}
              activeDot={{ r: 3, strokeWidth: 0 }}
              connectNulls={true}
              strokeLinecap="round"
              strokeLinejoin="round"
              isAnimationActive={false}
            />
            <ChartLegend content={<ChartLegendContent />} />
          </LineChart>
        </ChartContainer>
      </CardContent>
    </Card>
  );
}


================================================
FILE: studio/frontend/src/features/studio/sections/charts/training-loss-chart-card.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import {
  ChartContainer,
  ChartLegend,
  ChartLegendContent,
  ChartTooltip,
  ChartTooltipContent,
} from "@/components/ui/chart";
import type { ChartConfig } from "@/components/ui/chart";
import type { ReactElement } from "react";
import {
  CartesianGrid,
  Line,
  LineChart,
  ReferenceLine,
  XAxis,
  YAxis,
} from "recharts";
import type { ScaleMode } from "./types";
import {
  CHART_SYNC_ID,
  CHART_CONTAINER_CLASS,
  DEFAULT_CHART_MARGIN,
  DEFAULT_Y_AXIS_WIDTH,
  formatAxisMetric,
  formatMetric,
  formatStepTick,
  fromLog1p,
} from "./utils";

const lossConfig = {
  displayLoss: { label: "Loss", color: "#3b82f6" },
  displaySmoothed: { label: "Smoothed", color: "#f59e0b" },
} satisfies ChartConfig;

interface LossChartPoint {
  step: number;
  loss: number;
  smoothed: number;
  displayLoss: number;
  displaySmoothed: number;
}

export function TrainingLossChartCard({
  data,
  domain,
  visibleStepDomain,
  xAxisTicks,
  avgRaw,
  avgDisplay,
  showRaw,
  showSmoothed,
  showAvgLine,
  scale,
}: {
  data: LossChartPoint[];
  domain: [number, number];
  visibleStepDomain: [number, number];
  xAxisTicks: number[];
  avgRaw: number;
  avgDisplay: number;
  showRaw: boolean;
  showSmoothed: boolean;
  showAvgLine: boolean;
  scale: ScaleMode;
}): ReactElement {
  const showPoint = data.length <= 1 ? { r: 3, strokeWidth: 0 } : false;

  return (
    <Card data-tour="studio-training-loss" size="sm">
      <CardHeader>
        <CardTitle className="text-sm">Training Loss</CardTitle>
      </CardHeader>
      <CardContent>
        <ChartContainer config={lossConfig} className={CHART_CONTAINER_CLASS}>
          <LineChart
            data={data}
            syncId={CHART_SYNC_ID}
            syncMethod="value"
            accessibilityLayer={true}
            margin={DEFAULT_CHART_MARGIN}
          >
            <CartesianGrid vertical={false} strokeDasharray="3 3" />
            <XAxis
              dataKey="step"
              type="number"
              domain={visibleStepDomain}
              ticks={xAxisTicks}
              allowDataOverflow={true}
              allowDecimals={false}
              minTickGap={28}
              tickLine={false}
              axisLine={false}
              tickMargin={8}
              fontSize={10}
              tickFormatter={(value) => formatStepTick(Number(value))}
              interval="preserveStartEnd"
            />
            <YAxis
              domain={domain}
              allowDataOverflow={true}
              tickLine={false}
              axisLine={false}
              tickMargin={8}
              tickCount={5}
              fontSize={10}
              width={DEFAULT_Y_AXIS_WIDTH}
              tickFormatter={(value) => {
                const num = Number(value);
                if (!Number.isFinite(num)) {
                  return "0";
                }
                const shown = scale === "log" ? fromLog1p(num) : num;
                return formatAxisMetric(shown);
              }}
            />
            <ChartTooltip
              content={
                <ChartTooltipContent
                  labelFormatter={(_value, payload) =>
                    `Step ${payload?.[0]?.payload?.step ?? ""}`
                  }
                  formatter={(_value, name, item) => {
                    if (name === "displaySmoothed") {
                      return [
                        formatMetric(Number(item?.payload?.smoothed)),
                        "Smoothed",
                      ];
                    }
                    return [formatMetric(Number(item?.payload?.loss)), "Loss"];
                  }}
                />
              }
            />
            {showAvgLine && (
              <ReferenceLine
                y={avgDisplay}
                stroke="#3b82f6"
                strokeDasharray="4 4"
                strokeOpacity={0.5}
                label={{
                  value: `avg ${formatMetric(avgRaw)}`,
                  position: "insideTopRight",
                  fontSize: 10,
                  fill: "#3b82f6",
                }}
              />
            )}
            {showRaw && (
              <Line
                type="linear"
                dataKey="displayLoss"
                stroke="var(--color-displayLoss)"
                strokeWidth={1.2}
                strokeOpacity={showSmoothed ? 0.35 : 1}
                dot={showPoint}
                activeDot={{ r: 3, strokeWidth: 0 }}
                connectNulls={true}
                strokeLinecap="round"
                strokeLinejoin="round"
                isAnimationActive={false}
              />
            )}
            {showSmoothed && (
              <Line
                type="linear"
                dataKey="displaySmoothed"
                stroke="var(--color-displaySmoothed)"
                strokeWidth={2.2}
                dot={showPoint}
                activeDot={{ r: 3, strokeWidth: 0 }}
                connectNulls={true}
                strokeLinecap="round"
                strokeLinejoin="round"
                isAnimationActive={false}
              />
            )}
            <ChartLegend content={<ChartLegendContent />} />
          </LineChart>
        </ChartContainer>
      </CardContent>
    </Card>
  );
}


================================================
FILE: studio/frontend/src/features/studio/sections/charts/types.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export type ScaleMode = "linear" | "log";
export type OutlierMode = "none" | "p99" | "p95";

export type LossHistoryItem = { step: number; loss: number };
export type SmoothedLossItem = LossHistoryItem & { smoothed: number };

export interface TrainingChartSeries {
  lossHistory: LossHistoryItem[];
  lrHistory: { step: number; lr: number }[];
  gradNormHistory: { step: number; gradNorm: number }[];
  evalLossHistory: { step: number; loss: number }[];
}


================================================
FILE: studio/frontend/src/features/studio/sections/charts/utils.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { LossHistoryItem, OutlierMode, SmoothedLossItem } from "./types";

export const CHART_SYNC_ID = "train-metrics-sync";
export const MAX_RENDER_POINTS = 800;
export const DEFAULT_VISIBLE_POINTS = 160;
export const CHART_CONTAINER_CLASS = "h-[220px] w-full";
export const DEFAULT_CHART_MARGIN = { top: 4, right: 8, bottom: 0, left: 4 };
export const DEFAULT_Y_AXIS_WIDTH = 45;
const TRAILING_ZEROES_RE = /\.?0+$/;
const NEGATIVE_ZERO_RE = /^-0$/;

export const placeholderEvalData = [
  { step: 0, loss: 2.8 },
  { step: 50, loss: 2.4 },
  { step: 100, loss: 2.0 },
  { step: 150, loss: 1.7 },
  { step: 200, loss: 1.5 },
];

export function toLog1p(value: number): number {
  const safe = Number.isFinite(value) ? Math.max(value, 0) : 0;
  return Math.log10(safe + 1);
}

export function fromLog1p(value: number): number {
  return Math.max(0, 10 ** value - 1);
}

export function formatMetric(value: number): string {
  if (!Number.isFinite(value)) {
    return "0";
  }
  const abs = Math.abs(value);
  let decimals = 6;

  if (abs >= 1000) {
    decimals = 0;
  } else if (abs >= 100) {
    decimals = 2;
  } else if (abs >= 1) {
    decimals = 4;
  } else if (abs >= 0.01) {
    decimals = 5;
  } else if (abs >= 0.0001) {
    decimals = 6;
  } else {
    decimals = 8;
  }

  return value
    .toFixed(decimals)
    .replace(TRAILING_ZEROES_RE, "")
    .replace(NEGATIVE_ZERO_RE, "0");
}

export function formatAxisMetric(value: number): string {
  if (!Number.isFinite(value)) {
    return "0";
  }

  const abs = Math.abs(value);
  let decimals = 4;

  if (abs >= 1000) {
    decimals = 0;
  } else if (abs >= 100) {
    decimals = 1;
  } else if (abs >= 1) {
    decimals = 3;
  } else if (abs >= 0.01) {
    decimals = 4;
  } else {
    decimals = 5;
  }

  return value
    .toFixed(decimals)
    .replace(TRAILING_ZEROES_RE, "")
    .replace(NEGATIVE_ZERO_RE, "0");
}

export function formatStepTick(value: number): string {
  if (value >= 1_000_000) {
    return `${(value / 1_000_000).toFixed(1)}M`;
  }
  if (value >= 1_000) {
    return `${(value / 1_000).toFixed(1)}k`;
  }
  return String(Math.round(value));
}

export function compressSeries<T>(data: T[], maxPoints: number): T[] {
  if (data.length <= maxPoints) {
    return data;
  }

  const stride = Math.ceil(data.length / maxPoints);
  return data.filter(
    (_item, index) => index % stride === 0 || index === data.length - 1,
  );
}

export function clamp(value: number, min: number, max: number): number {
  return Math.min(max, Math.max(min, value));
}

export function buildStepTicks(
  min: number,
  max: number,
  targetCount = 6,
): number[] {
  if (!(Number.isFinite(min) && Number.isFinite(max))) {
    return [0, 1];
  }
  if (max <= min) {
    return [min, max];
  }

  const stepSize = Math.max(1, Math.ceil((max - min) / (targetCount - 1)));
  const ticks: number[] = [];
  let current = min;

  while (current < max) {
    ticks.push(current);
    current += stepSize;
  }

  ticks.push(max);
  return Array.from(new Set(ticks));
}

export function buildYDomain(values: number[]): [number, number] {
  const finiteValues = values.filter((value) => Number.isFinite(value));
  if (finiteValues.length === 0) {
    return [0, 1];
  }

  const min = Math.min(...finiteValues);
  const max = Math.max(...finiteValues);

  if (min === max) {
    const base = Math.abs(min);
    const pad = base > 0 ? base * 0.08 : 0.1;
    return [min - pad, max + pad];
  }

  const pad = (max - min) * 0.12;
  return [min - pad, max + pad];
}

function getUpperPercentile(
  values: number[],
  mode: OutlierMode,
): number | null {
  if (mode === "none") {
    return null;
  }
  const finiteValues = values.filter((value) => Number.isFinite(value));
  if (finiteValues.length < 3) {
    return null;
  }

  const sorted = [...finiteValues].sort((a, b) => a - b);
  const q = mode === "p99" ? 0.99 : 0.95;
  const index = Math.max(
    0,
    Math.min(sorted.length - 1, Math.floor((sorted.length - 1) * q)),
  );
  return sorted[index] ?? null;
}

export function applyOutlierCap(values: number[], mode: OutlierMode): number[] {
  const cap = getUpperPercentile(values, mode);
  if (cap == null) {
    return values;
  }
  return values.map((value) => Math.min(value, cap));
}

export function ema(
  data: LossHistoryItem[],
  alpha: number,
): SmoothedLossItem[] {
  if (data.length === 0) {
    return [];
  }

  const values = data.map((point) => point.loss);
  const isConstant = values.every((value) => value === values[0]);

  let last = 0;
  let count = 0;

  return data.map((point) => {
    const next = point.loss;
    if (!Number.isFinite(next) || isConstant) {
      return { ...point, smoothed: next };
    }

    last = last * alpha + (1 - alpha) * next;
    count += 1;

    const debias = alpha === 1 ? 1 : 1 - alpha ** count;
    return { ...point, smoothed: last / debias };
  });
}


================================================
FILE: studio/frontend/src/features/studio/sections/charts-content.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { type ReactElement, useEffect, useMemo } from "react";
import { useShallow } from "zustand/react/shallow";
import { useChartPreferencesStore } from "./charts/chart-preferences-store";
import { EvalLossChartCard } from "./charts/eval-loss-chart-card";
import { GradNormChartCard } from "./charts/grad-norm-chart-card";
import { LearningRateChartCard } from "./charts/learning-rate-chart-card";
import { TrainingLossChartCard } from "./charts/training-loss-chart-card";
import type { TrainingChartSeries } from "./charts/types";
import {
  MAX_RENDER_POINTS,
  applyOutlierCap,
  buildStepTicks,
  buildYDomain,
  clamp,
  compressSeries,
  ema,
  toLog1p,
} from "./charts/utils";

type LossDisplayPoint = {
  step: number;
  displayLoss: number;
  displaySmoothed: number;
};

function isStepVisible(step: number, domain: [number, number]): boolean {
  return step >= domain[0] && step <= domain[1];
}

function collectLossValues(
  data: LossDisplayPoint[],
  domain: [number, number],
  options: { includeRaw: boolean; includeSmoothed: boolean },
): number[] {
  const values: number[] = [];

  for (const point of data) {
    if (!isStepVisible(point.step, domain)) {
      continue;
    }

    if (options.includeRaw && Number.isFinite(point.displayLoss)) {
      values.push(point.displayLoss);
    }

    if (options.includeSmoothed && Number.isFinite(point.displaySmoothed)) {
      values.push(point.displaySmoothed);
    }
  }

  return values;
}

export function ChartsContent({
  metrics,
  isTraining,
  evalEnabled,
}: {
  metrics: TrainingChartSeries;
  isTraining: boolean;
  evalEnabled: boolean;
}): ReactElement {
  const {
    windowSize,
    smoothing,
    showRaw,
    showSmoothed,
    showAvgLine,
    lossScale,
    lrScale,
    gradScale,
    lossOutlierMode,
    gradOutlierMode,
    lrOutlierMode,
    setAvailableSteps,
  } = useChartPreferencesStore(
    useShallow((state) => ({
      windowSize: state.windowSize,
      smoothing: state.smoothing,
      showRaw: state.showRaw,
      showSmoothed: state.showSmoothed,
      showAvgLine: state.showAvgLine,
      lossScale: state.lossScale,
      lrScale: state.lrScale,
      gradScale: state.gradScale,
      lossOutlierMode: state.lossOutlierMode,
      gradOutlierMode: state.gradOutlierMode,
      lrOutlierMode: state.lrOutlierMode,
      setAvailableSteps: state.setAvailableSteps,
    })),
  );

  const smoothedData = useMemo(
    () =>
      metrics.lossHistory.length > 0 ? ema(metrics.lossHistory, smoothing) : [],
    [metrics.lossHistory, smoothing],
  );

  const reducedLossData = useMemo(
    () => compressSeries(smoothedData, MAX_RENDER_POINTS),
    [smoothedData],
  );
  const reducedGradNormData = useMemo(
    () => compressSeries(metrics.gradNormHistory, MAX_RENDER_POINTS),
    [metrics.gradNormHistory],
  );
  const reducedLrData = useMemo(
    () => compressSeries(metrics.lrHistory, MAX_RENDER_POINTS),
    [metrics.lrHistory],
  );
  const reducedEvalLossData = useMemo(
    () => compressSeries(metrics.evalLossHistory, MAX_RENDER_POINTS),
    [metrics.evalLossHistory],
  );

  const allSteps = useMemo(() => {
    const set = new Set<number>();
    for (const point of metrics.lossHistory) {
      set.add(point.step);
    }
    for (const point of metrics.gradNormHistory) {
      set.add(point.step);
    }
    for (const point of metrics.lrHistory) {
      set.add(point.step);
    }
    return Array.from(set).sort((a, b) => a - b);
  }, [metrics.gradNormHistory, metrics.lossHistory, metrics.lrHistory]);

  useEffect(() => {
    setAvailableSteps(allSteps.length);
  }, [allSteps.length, setAvailableSteps]);

  const stepCount = Math.max(1, allSteps.length);
  const effectiveWindowSize =
    windowSize == null
      ? stepCount
      : clamp(Math.round(windowSize), 1, stepCount);

  const visibleStepDomain = useMemo<[number, number]>(() => {
    if (allSteps.length === 0) {
      return [0, 1];
    }

    const endIndex = allSteps.length - 1;
    const startIndex = Math.max(0, endIndex - effectiveWindowSize + 1);
    const minStep = allSteps[0] ?? 0;
    const startStep = allSteps[startIndex] ?? minStep;
    const endStep = allSteps[endIndex] ?? startStep;

    if (startStep === endStep) {
      return [startStep, startStep + 4];
    }
    if (endStep - startStep < 6) {
      return [Math.max(minStep, endStep - 6), endStep];
    }
    return [startStep, endStep];
  }, [allSteps, effectiveWindowSize]);

  const xAxisTicks = useMemo(
    () => buildStepTicks(visibleStepDomain[0], visibleStepDomain[1]),
    [visibleStepDomain],
  );

  const displayLossData = useMemo(
    () =>
      reducedLossData.map((point) => ({
        ...point,
        displayLoss: lossScale === "log" ? toLog1p(point.loss) : point.loss,
        displaySmoothed:
          lossScale === "log" ? toLog1p(point.smoothed) : point.smoothed,
      })),
    [lossScale, reducedLossData],
  );

  const displayGradData = useMemo(
    () =>
      reducedGradNormData.map((point) => ({
        ...point,
        displayGradNorm:
          gradScale === "log" ? toLog1p(point.gradNorm) : point.gradNorm,
      })),
    [gradScale, reducedGradNormData],
  );

  const displayLrData = useMemo(
    () =>
      reducedLrData.map((point) => ({
        ...point,
        displayLr: lrScale === "log" ? toLog1p(point.lr) : point.lr,
      })),
    [lrScale, reducedLrData],
  );

  const visibleLossDisplayValues = useMemo(() => {
    const visibleValues = collectLossValues(
      displayLossData,
      visibleStepDomain,
      {
        includeRaw: showRaw,
        includeSmoothed: showSmoothed,
      },
    );

    if (visibleValues.length > 0) {
      return visibleValues;
    }

    return collectLossValues(displayLossData, visibleStepDomain, {
      includeRaw: true,
      includeSmoothed: true,
    });
  }, [displayLossData, showRaw, showSmoothed, visibleStepDomain]);

  const visibleGradDisplayValues = useMemo(
    () =>
      displayGradData
        .filter(
          (point) =>
            point.step >= visibleStepDomain[0] &&
            point.step <= visibleStepDomain[1],
        )
        .map((point) => point.displayGradNorm)
        .filter((value) => Number.isFinite(value)),
    [displayGradData, visibleStepDomain],
  );

  const visibleLrDisplayValues = useMemo(
    () =>
      displayLrData
        .filter(
          (point) =>
            point.step >= visibleStepDomain[0] &&
            point.step <= visibleStepDomain[1],
        )
        .map((point) => point.displayLr)
        .filter((value) => Number.isFinite(value)),
    [displayLrData, visibleStepDomain],
  );

  const lossDomain = useMemo(
    () =>
      buildYDomain(applyOutlierCap(visibleLossDisplayValues, lossOutlierMode)),
    [lossOutlierMode, visibleLossDisplayValues],
  );
  const gradDomain = useMemo(
    () =>
      buildYDomain(applyOutlierCap(visibleGradDisplayValues, gradOutlierMode)),
    [gradOutlierMode, visibleGradDisplayValues],
  );
  const lrDomain = useMemo(
    () => buildYDomain(applyOutlierCap(visibleLrDisplayValues, lrOutlierMode)),
    [lrOutlierMode, visibleLrDisplayValues],
  );

  const evalLossDomain = useMemo(() => {
    const vals = reducedEvalLossData.map((point) => point.loss);
    return buildYDomain(vals);
  }, [reducedEvalLossData]);

  const evalLossStepTicks = useMemo(() => {
    if (reducedEvalLossData.length < 2) {
      return undefined;
    }
    const min = reducedEvalLossData[0].step;
    const max = reducedEvalLossData[reducedEvalLossData.length - 1].step;
    return buildStepTicks(min, max);
  }, [reducedEvalLossData]);

  const avgRaw =
    metrics.lossHistory.length > 0
      ? +(
          metrics.lossHistory.reduce((sum, point) => sum + point.loss, 0) /
          metrics.lossHistory.length
        ).toFixed(4)
      : 0;
  const avgDisplay = lossScale === "log" ? toLog1p(avgRaw) : avgRaw;

  return (
    <div className="grid grid-cols-1 gap-6 lg:grid-cols-2">
      <TrainingLossChartCard
        data={displayLossData}
        domain={lossDomain}
        visibleStepDomain={visibleStepDomain}
        xAxisTicks={xAxisTicks}
        avgRaw={avgRaw}
        avgDisplay={avgDisplay}
        showRaw={showRaw}
        showSmoothed={showSmoothed}
        showAvgLine={showAvgLine}
        scale={lossScale}
      />
      <GradNormChartCard
        data={displayGradData}
        domain={gradDomain}
        visibleStepDomain={visibleStepDomain}
        xAxisTicks={xAxisTicks}
        scale={gradScale}
      />
      <LearningRateChartCard
        data={displayLrData}
        domain={lrDomain}
        visibleStepDomain={visibleStepDomain}
        xAxisTicks={xAxisTicks}
        scale={lrScale}
      />
      <EvalLossChartCard
        data={reducedEvalLossData}
        domain={evalLossDomain}
        ticks={evalLossStepTicks}
        isTraining={isTraining}
        evalEnabled={evalEnabled}
      />
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/studio/sections/charts-section.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useTrainingRuntimeStore } from "@/features/training";
import { type ReactElement, Suspense, lazy, useMemo } from "react";

const ChartsContent = lazy(() =>
  import("./charts-content").then((module) => ({
    default: module.ChartsContent,
  })),
);
const SKELETON_KEYS = [
  "chart-skeleton-1",
  "chart-skeleton-2",
  "chart-skeleton-3",
  "chart-skeleton-4",
];

export function ChartsSection(): ReactElement | null {
  const currentStep = useTrainingRuntimeStore((state) => state.currentStep);
  const totalSteps = useTrainingRuntimeStore((state) => state.totalSteps);
  const isTraining = useTrainingRuntimeStore((state) => state.isTrainingRunning);
  const evalEnabled = useTrainingRuntimeStore((state) => state.evalEnabled);
  const lossHistoryRaw = useTrainingRuntimeStore((state) => state.lossHistory);
  const lrHistoryRaw = useTrainingRuntimeStore((state) => state.lrHistory);
  const gradNormHistoryRaw = useTrainingRuntimeStore(
    (state) => state.gradNormHistory,
  );
  const evalLossHistoryRaw = useTrainingRuntimeStore(
    (state) => state.evalLossHistory,
  );

  const series = useMemo(
    () => ({
      currentStep,
      totalSteps,
      lossHistory: lossHistoryRaw.map((point) => ({
        step: point.step,
        loss: point.value,
      })),
      lrHistory: lrHistoryRaw.map((point) => ({
        step: point.step,
        lr: point.value,
      })),
      gradNormHistory: gradNormHistoryRaw.map((point) => ({
        step: point.step,
        gradNorm: point.value,
      })),
      evalLossHistory: evalLossHistoryRaw.map((point) => ({
        step: point.step,
        loss: point.value,
      })),
    }),
    [currentStep, evalLossHistoryRaw, gradNormHistoryRaw, lossHistoryRaw, lrHistoryRaw, totalSteps],
  );

  if (
    series.lossHistory.length === 0 &&
    series.lrHistory.length === 0 &&
    series.gradNormHistory.length === 0
  ) {
    return null;
  }

  return (
    <Suspense
      fallback={
        <div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
          {SKELETON_KEYS.map((key) => (
            <div
              key={key}
              className="h-[280px] rounded-xl border bg-muted/30 animate-pulse"
            />
          ))}
        </div>
      }
    >
      <ChartsContent metrics={series} isTraining={isTraining} evalEnabled={evalEnabled} />
    </Suspense>
  );
}


================================================
FILE: studio/frontend/src/features/studio/sections/dataset-preview-dialog-mapping.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Badge } from "@/components/ui/badge";
import { Button } from "@/components/ui/button";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import type { CheckFormatResponse } from "@/features/training/types/datasets";
import { cn } from "@/lib/utils";
import { AlertCircleIcon, CheckmarkCircle02Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { Loader2, Sparkles } from "lucide-react";

const CHATML_ROLES = ["system", "user", "assistant"] as const;
const ALPACA_ROLES = ["instruction", "input", "output"] as const;
const SHAREGPT_ROLES = ["system", "human", "gpt"] as const;
const VLM_ROLES = ["image", "text"] as const;
const AUDIO_ROLES = ["audio", "text", "speaker_id"] as const;

const ROLE_LABELS: Record<string, string> = {
  system: "System",
  user: "User",
  assistant: "Assistant",
  human: "Human",
  gpt: "GPT",
  instruction: "Instruction",
  input: "Input",
  output: "Output",
  image: "Image",
  text: "Text",
  audio: "Audio",
  speaker_id: "Speaker ID",
};

export function getAvailableRoles(isVlm: boolean, format?: string, isAudio?: boolean): readonly string[] {
  if (isAudio) return AUDIO_ROLES;
  if (isVlm) return VLM_ROLES;
  if (format === "alpaca") return ALPACA_ROLES;
  if (format === "sharegpt") return SHAREGPT_ROLES;
  return CHATML_ROLES;
}

export function isMappingComplete(
  mapping: Record<string, string>,
  isVlm: boolean,
  format?: string,
  isAudio?: boolean,
): boolean {
  const roles = new Set(Object.values(mapping));
  if (isAudio) return roles.has("audio") && roles.has("text");
  if (isVlm) return roles.has("image") && roles.has("text");
  if (format === "alpaca") return roles.has("instruction") && roles.has("output");
  if (format === "sharegpt") return roles.has("human") && roles.has("gpt");
  return roles.has("user") && roles.has("assistant");
}

export function HeaderRolePicker({
  currentRole,
  onRoleChange,
  availableRoles,
}: {
  currentRole: string | undefined;
  onRoleChange: (role: string | undefined) => void;
  availableRoles: readonly string[];
}) {
  return (
    <Select
      value={currentRole ?? "_none"}
      onValueChange={(v) => onRoleChange(v === "_none" ? undefined : v)}
    >
      <SelectTrigger className="h-6 w-[90px] text-[10px] px-2 py-0 border-dashed cursor-pointer">
        <SelectValue placeholder="Role..." />
      </SelectTrigger>
      <SelectContent>
        <SelectItem value="_none" className="text-[11px]">
          None
        </SelectItem>
        {availableRoles.map((role) => (
          <SelectItem key={role} value={role} className="text-[11px]">
            {ROLE_LABELS[role] ?? role}
          </SelectItem>
        ))}
      </SelectContent>
    </Select>
  );
}

export function DatasetMappingCard({
  mapping,
  mappingOk,
  autoDetected = false,
  isVlm = false,
  isAudio = false,
  format,
  onAiAssist,
  isAiLoading = false,
  aiError,
  advisorNotification,
  advisorSystemPrompt,
}: {
  mapping: Record<string, string>;
  mappingOk: boolean;
  autoDetected?: boolean;
  isVlm?: boolean;
  isAudio?: boolean;
  format?: string;
  onAiAssist?: () => void;
  isAiLoading?: boolean;
  aiError?: string | null;
  advisorNotification?: string | null;
  advisorSystemPrompt?: string;
}) {
  const entries = Object.entries(mapping);
  const requiredLabel = isAudio
    ? "audio and text"
    : isVlm
      ? "image and text"
      : format === "alpaca"
        ? "instruction and output"
        : format === "sharegpt"
          ? "human and gpt"
          : "user and assistant";

  return (
    <div
      className={cn(
        "rounded-xl corner-squircle ring-1 px-5 py-4 mb-4",
        mappingOk
          ? "ring-emerald-200/70 bg-emerald-50/70 text-emerald-950 dark:ring-emerald-900/50 dark:bg-emerald-950/30 dark:text-emerald-50"
          : "ring-amber-200/70 bg-amber-50/70 text-amber-950 dark:ring-amber-900/50 dark:bg-amber-950/30 dark:text-amber-50",
      )}
    >
      <div className="flex items-start gap-3">
        <div
          className={cn(
            "rounded-xl corner-squircle p-2 shrink-0",
            mappingOk ? "bg-emerald-500/15" : "bg-amber-500/15",
          )}
        >
          <HugeiconsIcon
            icon={mappingOk ? CheckmarkCircle02Icon : AlertCircleIcon}
            className={cn(
              "size-4",
              mappingOk
                ? "text-emerald-700 dark:text-emerald-300"
                : "text-amber-700 dark:text-amber-300",
            )}
          />
        </div>
        <div className="min-w-0">
          <p className="text-sm font-semibold tracking-tight">
            {mappingOk
              ? autoDetected ? "Heuristic-detected mapping" : "Mapping ready"
              : "Map dataset columns"}
          </p>
          <p
            className={cn(
              "text-xs mt-0.5",
              mappingOk
                ? "text-emerald-800/80 dark:text-emerald-200/80"
                : "text-amber-800/80 dark:text-amber-200/80",
            )}
          >
            {mappingOk
              ? autoDetected
                ? "We auto-detected the column mapping below using heuristics. Please review and adjust using the dropdowns in the column headers, or use AI Assist for a smarter mapping."
                : "Looks good. We'll convert this dataset automatically."
              : `Assign roles to columns using the dropdowns in the headers. At minimum, assign ${requiredLabel}.`}
          </p>
          {entries.length > 0 && (
            <div className="mt-3 flex flex-wrap items-center gap-2">
              {entries.map(([col, role]) => (
                <Badge
                  key={col}
                  variant="outline"
                  className="h-6 text-[11px] bg-white/60 dark:bg-transparent"
                >
                  <span className="font-mono">{col}</span>
                  <span className="mx-1 text-muted-foreground/60">&rarr;</span>
                  <span>{ROLE_LABELS[role] ?? role}</span>
                </Badge>
              ))}
            </div>
          )}
          {!mappingOk && entries.length === 0 && (
            <p className="mt-2 text-xs text-amber-800/80 dark:text-amber-200/80">
              Use the dropdowns in the column headers to assign roles.
            </p>
          )}
          {onAiAssist && (
            <div className="mt-3 flex items-center gap-2">
              <Button
                variant="outline"
                size="sm"
                onClick={onAiAssist}
                disabled={isAiLoading}
                className="cursor-pointer bg-white/60 dark:bg-transparent"
              >
                {isAiLoading ? (
                  <>
                    <Loader2 className="mr-1.5 h-3.5 w-3.5 animate-spin" />
                    Analyzing dataset...
                  </>
                ) : (
                  <>
                    <Sparkles className="mr-1.5 h-3.5 w-3.5" />
                    AI Assist
                    <Badge variant="outline" className="ml-1.5 text-[9px] px-1 py-0 h-4 font-medium">Beta</Badge>
                  </>
                )}
              </Button>
              {aiError && (
                <p className="text-xs text-amber-700 dark:text-amber-300">{aiError}</p>
              )}
            </div>
          )}
          {advisorNotification && (
            <div className="mt-3 rounded-lg border border-indigo-200 bg-indigo-50 px-3 py-2.5 text-xs text-indigo-700 dark:border-indigo-800 dark:bg-indigo-950 dark:text-indigo-300 space-y-2">
              <div className="flex items-start gap-2">
                <Sparkles className="size-3.5 shrink-0 mt-0.5" />
                <span>{advisorNotification}</span>
              </div>
              {advisorSystemPrompt && (
                <div className="pl-5.5 text-[11px] font-mono text-indigo-600/80 dark:text-indigo-400/80">
                  <span className="font-sans font-medium text-indigo-500 dark:text-indigo-400">System:</span>{" "}
                  <span className="break-words">{advisorSystemPrompt}</span>
                </div>
              )}
            </div>
          )}
        </div>
      </div>
    </div>
  );
}

export function DatasetMappingFooter({
  mappingOk,
  isStarting,
  startError,
  onCancel,
  onStartTraining,
}: {
  mappingOk: boolean;
  isStarting: boolean;
  startError: string | null;
  onCancel: () => void;
  onStartTraining: () => Promise<void>;
}) {
  return (
    <div className="mt-3 flex flex-col gap-2">
      <div className="flex items-center justify-between gap-3">
        <p className="text-[11px] text-muted-foreground/70 leading-relaxed">
          Tip: use the role dropdowns in the column headers to assign roles.
        </p>
        <div className="flex items-center gap-2">
          <Button
            variant="outline"
            size="sm"
            className="cursor-pointer"
            onClick={onCancel}
          >
            Cancel
          </Button>
          <Button
            size="sm"
            className="cursor-pointer"
            disabled={!mappingOk || isStarting}
            onClick={() => void onStartTraining()}
          >
            {isStarting ? "Starting..." : "Continue"}
          </Button>
        </div>
      </div>

      {startError && (
        <p className="text-xs text-red-500 leading-relaxed text-center">
          {startError}
        </p>
      )}
    </div>
  );
}

/** Canonical chatml role for any format-specific role name. */
const TO_CANONICAL: Record<string, string> = {
  user: "user", assistant: "assistant", system: "system",
  instruction: "user", input: "system", output: "assistant",
  human: "user", gpt: "assistant",
  image: "image", text: "text",
  audio: "audio", speaker_id: "speaker_id",
};

/** Chatml → format-specific role names (only for formats that differ). */
const FROM_CANONICAL: Record<string, Record<string, string>> = {
  alpaca: { user: "instruction", system: "input", assistant: "output" },
  sharegpt: { user: "human", assistant: "gpt", system: "system" },
};

/**
 * Remap a column→role mapping between formats.
 * Normalises every role to canonical chatml first, then maps to the target format.
 */
export function remapRolesForFormat(
  mapping: Record<string, string>,
  format?: string,
): Record<string, string> {
  const table = format ? FROM_CANONICAL[format] : undefined;
  const out: Record<string, string> = {};
  for (const [col, role] of Object.entries(mapping)) {
    const canonical = TO_CANONICAL[role] ?? role;
    out[col] = table ? (table[canonical] ?? canonical) : canonical;
  }
  return out;
}

export function deriveDefaultMapping(
  data: CheckFormatResponse,
  isVlm: boolean,
  format?: string,
  isAudio?: boolean,
): Record<string, string> {
  if (data.suggested_mapping) {
    return remapRolesForFormat({ ...data.suggested_mapping }, format);
  }
  if (isAudio) {
    const result: Record<string, string> = {};
    if (data.detected_audio_column) result[data.detected_audio_column] = "audio";
    if (data.detected_text_column) result[data.detected_text_column] = "text";
    if (data.detected_speaker_column) result[data.detected_speaker_column] = "speaker_id";
    return result;
  }
  if (isVlm) {
    const result: Record<string, string> = {};
    if (data.detected_image_column) result[data.detected_image_column] = "image";
    if (data.detected_text_column) result[data.detected_text_column] = "text";
    return result;
  }
  return {};
}


================================================
FILE: studio/frontend/src/features/studio/sections/dataset-preview-dialog-utils.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

type PreviewImagePayload = {
  type: "image";
  mime?: string;
  width?: number;
  height?: number;
  data?: string;
};

export function formatCell(value: unknown): string {
  if (value == null) return "";
  if (typeof value === "string") return value;
  if (typeof value === "number" || typeof value === "boolean") return String(value);
  if (Array.isArray(value) || typeof value === "object")
    return JSON.stringify(value).slice(0, 500);
  return String(value);
}

function isPreviewImagePayload(value: unknown): value is PreviewImagePayload {
  if (!value || typeof value !== "object") return false;
  const record = value as Record<string, unknown>;
  return (
    record.type === "image" &&
    typeof record.data === "string" &&
    record.data.length > 0
  );
}

export function collectPreviewImages(value: unknown): PreviewImagePayload[] {
  const images: PreviewImagePayload[] = [];
  const stack: unknown[] = [value];
  let steps = 0;

  while (stack.length > 0 && steps < 200) {
    steps += 1;
    const current = stack.pop();
    if (isPreviewImagePayload(current)) {
      images.push(current);
      continue;
    }

    if (Array.isArray(current)) {
      for (const item of current) stack.push(item);
      continue;
    }

    if (current && typeof current === "object") {
      for (const nested of Object.values(current as Record<string, unknown>)) {
        stack.push(nested);
      }
    }
  }

  return images;
}


================================================
FILE: studio/frontend/src/features/studio/sections/dataset-preview-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { type ReactNode, useCallback, useEffect, useMemo, useRef, useState } from "react";
import { aiAssistMapping } from "@/features/training/api/datasets-api";
import type { ColumnDef } from "@tanstack/react-table";
import {
  Dialog,
  DialogContent,
  DialogHeader,
  DialogTitle,
} from "@/components/ui/dialog";
import { DataTable } from "@/components/ui/data-table";
import { Badge } from "@/components/ui/badge";
import { Spinner } from "@/components/ui/spinner";
import { useTrainingActions, useTrainingConfigStore } from "@/features/training";
import { checkDatasetFormat } from "@/features/training/api/datasets-api";
import type { CheckFormatResponse } from "@/features/training/types/datasets";
import { Database02Icon, AlertCircleIcon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { useShallow } from "zustand/react/shallow";
import { collectPreviewImages, formatCell } from "./dataset-preview-dialog-utils";
import {
  DatasetMappingCard,
  DatasetMappingFooter,
  HeaderRolePicker,
  deriveDefaultMapping,
  getAvailableRoles,
  isMappingComplete,
  remapRolesForFormat,
} from "./dataset-preview-dialog-mapping";

/** Chatml → format-specific role remap (only for formats that differ from chatml). */
const ROLE_REMAP: Record<string, Record<string, string>> = {
  alpaca: { user: "instruction", system: "input", assistant: "output" },
  sharegpt: { user: "human", assistant: "gpt", system: "system" },
};

type DatasetPreviewDialogProps = {
  open: boolean;
  onOpenChange: (open: boolean) => void;
  datasetName: string | null;
  datasetSource?: "huggingface" | "upload";
  hfToken: string | null;
  datasetSubset?: string | null;
  datasetSplit?: string | null;
  mode?: "preview" | "mapping";
  initialData?: CheckFormatResponse | null;
  isVlm?: boolean;
};

export function DatasetPreviewDialog({
  open,
  onOpenChange,
  datasetName,
  datasetSource,
  hfToken,
  datasetSubset,
  datasetSplit,
  mode = "preview",
  initialData,
  isVlm = false,
}: DatasetPreviewDialogProps) {
  const [data, setData] = useState<CheckFormatResponse | null>(null);
  const [loading, setLoading] = useState(false);
  const [error, setError] = useState<string | null>(null);

  const {
    manualMapping, setManualMapping, datasetFormat,
    setDatasetAdvisorFields, datasetAdvisorNotification,
    datasetSystemPrompt,
    selectedModel,
    modelType,
  } = useTrainingConfigStore(
    useShallow((s) => ({
      manualMapping: s.datasetManualMapping,
      setManualMapping: s.setDatasetManualMapping,
      datasetFormat: s.datasetFormat,
      setDatasetAdvisorFields: s.setDatasetAdvisorFields,
      datasetAdvisorNotification: s.datasetAdvisorNotification,
      datasetSystemPrompt: s.datasetSystemPrompt,
      selectedModel: s.selectedModel,
      modelType: s.modelType,
    })),
  );
  const { isStarting, startError, startTrainingRun } = useTrainingActions();

  // If the backend reports image data, treat as VLM even if the prop
  // hasn't caught up yet (isDatasetImage may still be null in the store).
  const effectiveIsAudio = !!data?.is_audio;
  const effectiveIsVlm = isVlm || !!data?.is_image;

  const hasHeuristicMapping = !data?.requires_manual_mapping && !!data?.suggested_mapping;
  const mappingEnabled = !!data?.requires_manual_mapping || hasHeuristicMapping;
  const showMappingFooter = mode === "mapping" && mappingEnabled;
  const mappingOk = isMappingComplete(manualMapping, effectiveIsVlm, datasetFormat, effectiveIsAudio);
  const availableRoles = getAvailableRoles(effectiveIsVlm, datasetFormat, effectiveIsAudio);
  const isHfDataset = datasetSource === "huggingface";

  // ── AI Assist ──────────────────────────────────────────────────────
  const [isAiLoading, setIsAiLoading] = useState(false);
  const [aiError, setAiError] = useState<string | null>(null);

  const handleAiAssist = useCallback(async () => {
    if (!data?.columns || !data?.preview_samples) return;
    setIsAiLoading(true);
    setAiError(null);

    try {
      const result = await aiAssistMapping({
        columns: data.columns,
        samples: data.preview_samples,
        datasetName: datasetName,
        hfToken: hfToken,
        modelName: selectedModel,
        modelType: modelType,
      });

      if (result.success && result.suggested_mapping) {
        // Remap from chatml roles (user/assistant/system) to format-specific roles
        const table = ROLE_REMAP[datasetFormat];
        const mapped: Record<string, string> = {};
        for (const [col, role] of Object.entries(result.suggested_mapping)) {
          mapped[col] = table ? (table[role] ?? role) : role;
        }
        setManualMapping(mapped);

        // Store conversion advisor fields (system prompt, label mapping, notification)
        if (result.system_prompt || result.label_mapping || result.user_notification) {
          setDatasetAdvisorFields({
            systemPrompt: result.system_prompt ?? undefined,
            labelMapping: result.label_mapping ?? undefined,
            notification: result.user_notification ?? null,
          });
        }
      } else {
        setAiError(result.warning || "AI could not determine column roles.");
      }
    } catch (err) {
      setAiError(err instanceof Error ? err.message : "AI assist failed.");
    } finally {
      setIsAiLoading(false);
    }
  }, [data, datasetFormat, datasetName, hfToken, setManualMapping, setDatasetAdvisorFields, selectedModel, modelType]);

  // When format changes, remap existing mapping roles to the new format's role names
  const prevFormatRef = useRef(datasetFormat);
  useEffect(() => {
    const prev = prevFormatRef.current;
    prevFormatRef.current = datasetFormat;
    if (prev === datasetFormat) return;
    if (Object.keys(manualMapping).length === 0) return;
    setManualMapping(remapRolesForFormat(manualMapping, datasetFormat));
  }, [datasetFormat]); // eslint-disable-line react-hooks/exhaustive-deps

  // Handle role change for a column
  const handleRoleChange = useCallback(
    (colName: string, role: string | undefined) => {
      const next = { ...manualMapping };
      // Remove this column's previous role
      delete next[colName];
      if (role) {
        // Remove any other column that had this role (each role can only be assigned once)
        for (const [col, r] of Object.entries(next)) {
          if (r === role) delete next[col];
        }
        next[colName] = role;
      }
      setManualMapping(next);
    },
    [manualMapping, setManualMapping],
  );

  useEffect(() => {
    if (!open || !datasetName) {
      setData(null);
      setError(null);
      return;
    }

    if (initialData) {
      setData(initialData);
      setError(null);
      setLoading(false);
      return;
    }

    let cancelled = false;
    setLoading(true);
    setError(null);

    checkDatasetFormat({
      datasetName,
      hfToken,
      subset: datasetSubset,
      split: datasetSplit,
      isVlm,
    })
      .then((res) => {
        if (!cancelled) {
          setData(res);
          setError(null);
        }
      })
      .catch((err) => {
        if (!cancelled) setError(err.message || "Failed to load preview");
      })
      .finally(() => {
        if (!cancelled) setLoading(false);
      });

    return () => {
      cancelled = true;
    };
  }, [open, datasetName, hfToken, datasetSubset, datasetSplit, isVlm, initialData]);

  // Pre-fill mapping from suggested_mapping when data arrives
  useEffect(() => {
    if (!open || !datasetName) return;
    if (!data?.requires_manual_mapping && !data?.suggested_mapping) return;
    // Don't overwrite if mapping already has entries
    if (Object.keys(manualMapping).length > 0) return;
    const derived = deriveDefaultMapping(data, effectiveIsVlm, datasetFormat, effectiveIsAudio);
    if (Object.keys(derived).length === 0) return;
    setManualMapping(derived);
  }, [open, datasetName, data, effectiveIsVlm, datasetFormat, effectiveIsAudio, manualMapping, setManualMapping]);

  const rows = data?.preview_samples ?? [];
  const columns = data?.columns ?? [];

  // Determine source label
  const sourceLabel = useMemo(() => {
    if (!datasetName) return "";
    if (datasetSource === "huggingface") {
      let label = `Hugging Face (${datasetName}`;
      if (datasetSubset) label += ` / ${datasetSubset}`;
      if (datasetSplit) label += ` / ${datasetSplit}`;
      label += ")";
      return label;
    }
    return `Local Files (${datasetName})`;
  }, [datasetName, datasetSource, datasetSubset, datasetSplit]);

  // Build TanStack Table columns from the column names
  const tableColumns = useMemo<ColumnDef<Record<string, unknown>>[]>(() => {
    if (!columns.length) return [];

    const dataCols: ColumnDef<Record<string, unknown>>[] = columns.map((colName) => ({
      accessorKey: colName,
      header: () => (
        <div className="flex flex-col gap-2">
          <span className="font-heading text-[13px] font-semibold tracking-tight text-foreground">
            {colName}
          </span>
          {mappingEnabled && (
            <HeaderRolePicker
              currentRole={manualMapping[colName]}
              onRoleChange={(role) => handleRoleChange(colName, role)}
              availableRoles={availableRoles}
            />
          )}
        </div>
      ),
      cell: ({ getValue }: { getValue: () => unknown }) => {
        const value = getValue();
        const images = collectPreviewImages(value);
        if (images.length > 0) {
          return (
            <div className="flex flex-wrap gap-2">
              {images.slice(0, 4).map((image, index) => {
                const mime = image.mime || "image/jpeg";
                const src = image.data ? `data:${mime};base64,${image.data}` : "";
                const width = image.width ?? 128;
                const height = image.height ?? 128;
                return (
                  <img
                    key={`${colName}-img-${index}`}
                    src={src}
                    alt={`preview-${index}`}
                    className="h-16 w-auto max-w-40 rounded-md border object-contain bg-muted"
                    width={width}
                    height={height}
                    loading="lazy"
                  />
                );
              })}
              {images.length > 4 && (
                <span className="text-xs text-muted-foreground self-end">
                  +{images.length - 4} more
                </span>
              )}
            </div>
          );
        }

        const text = formatCell(value);
        if (!text) {
          return (
            <span className="text-muted-foreground/40 italic text-[13px]">
              --
            </span>
          );
        }
        const full = typeof value === "string" ? value : JSON.stringify(value);
        return (
          <p
            className="text-[13px] leading-relaxed line-clamp-6"
            title={full}
          >
            {text}
          </p>
        );
      },
    }));

    // Prepend generated system prompt column when advisor is active
    if (datasetSystemPrompt) {
      dataCols.unshift({
        id: "__system_generated",
        header: () => (
          <div className="flex flex-col gap-2">
            <span className="font-heading text-[13px] font-semibold tracking-tight text-foreground">
              System <span className="text-muted-foreground font-normal">(generated)</span>
            </span>
            {mappingEnabled && (
              <Badge variant="outline" className="h-6 w-fit text-[10px] px-2 py-0 border-dashed text-muted-foreground">
                System
              </Badge>
            )}
          </div>
        ),
        cell: () => (
          <p
            className="text-[13px] leading-relaxed line-clamp-6 text-muted-foreground italic"
            title={datasetSystemPrompt}
          >
            {datasetSystemPrompt}
          </p>
        ),
      });
    }

    return dataCols;
  }, [
    columns,
    manualMapping,
    handleRoleChange,
    mappingEnabled,
    availableRoles,
    datasetSystemPrompt,
  ]);

  return (
    <Dialog open={open} onOpenChange={onOpenChange}>
      <DialogContent
        className="sm:max-w-5xl w-[90vw] max-h-[88vh] flex flex-col gap-0 p-0 overflow-hidden rounded-3xl corner-squircle"
        showCloseButton={true}
      >
        {/* Header */}
        <DialogHeader className="px-6 pt-5 pb-4 shrink-0">
          <div className="flex items-center gap-3 pr-10">
            <div className="rounded-xl corner-squircle p-2 ring-1 ring-indigo-200 bg-indigo-50 text-indigo-600 dark:ring-indigo-800 dark:bg-indigo-950 dark:text-indigo-400 shrink-0">
              <HugeiconsIcon icon={Database02Icon} className="size-4" />
            </div>
            <DialogTitle className="font-heading text-lg font-semibold tracking-tight">
              Dataset Preview
            </DialogTitle>
          </div>
        </DialogHeader>

        {/* Body */}
        <div className="flex flex-col min-h-0 flex-1 overflow-auto px-6 pb-6">
          {/* Loading */}
          {loading && (
            <div className="py-24 flex flex-col items-center justify-center gap-3">
              <div className="rounded-2xl corner-squircle bg-primary/5 p-4">
                <Spinner className="size-5 text-primary" />
              </div>
              <p className="text-sm text-muted-foreground font-medium">
                {isHfDataset ? "Fetching dataset preview from Hugging Face..." : "Loading preview..."}
              </p>
              {isHfDataset && (
                <p className="text-xs text-muted-foreground/60">
                  This may take a moment for large datasets
                </p>
              )}
            </div>
          )}

          {/* Error */}
          {error && (
            <div className="py-20 flex flex-col items-center justify-center gap-3">
              <div className="rounded-2xl corner-squircle bg-destructive/10 p-3">
                <HugeiconsIcon
                  icon={AlertCircleIcon}
                  className="size-5 text-destructive"
                />
              </div>
              <div className="text-center space-y-1">
                <p className="text-sm font-medium text-destructive">{error}</p>
                <p className="text-xs text-muted-foreground">
                  Make sure the backend is running and reachable.
                </p>
              </div>
            </div>
          )}

          {/* Content */}
          {!loading && !error && data && (
            <>
              {/* Metadata card */}
              <div className="rounded-xl corner-squircle ring-1 ring-border/60 bg-muted/30 px-5 py-4 mb-4 space-y-2">
                <MetaRow label="Source" value={sourceLabel} />
                <MetaRow
                  label="Format"
                  value={data.detected_format || "--"}
                />
                <MetaRow
                  label="Total Rows"
                  value={
                    data.total_rows != null
                      ? data.total_rows.toLocaleString()
                      : "--"
                  }
                />
                <MetaRow
                  label="Columns"
                  value={
                    <span className="flex items-center gap-1.5 flex-wrap">
                      {columns.map((col) => (
                        <Badge
                          key={col}
                          variant="outline"
                          className="text-[11px] font-mono h-5"
                        >
                          {col}
                        </Badge>
                      ))}
                    </span>
                  }
                />
              </div>

              {data.warning && (
                <div className="rounded-lg border border-amber-200 bg-amber-50 px-4 py-3 text-xs text-amber-700 dark:border-amber-800 dark:bg-amber-950 dark:text-amber-400 mb-4 flex items-start gap-2.5">
                  <HugeiconsIcon icon={AlertCircleIcon} className="size-4 shrink-0 mt-0.5" />
                  <span>{data.warning}</span>
                </div>
              )}

              {mappingEnabled && (
                <DatasetMappingCard
                  mapping={manualMapping}
                  mappingOk={mappingOk}
                  autoDetected={hasHeuristicMapping}
                  isVlm={effectiveIsVlm}
                  isAudio={effectiveIsAudio}
                  format={datasetFormat}
                  onAiAssist={handleAiAssist}
                  isAiLoading={isAiLoading}
                  aiError={aiError}
                  advisorNotification={datasetAdvisorNotification}
                  advisorSystemPrompt={datasetSystemPrompt || undefined}
                />
              )}

              {/* Data table */}
              <div className="flex-1 min-h-[250px] rounded-xl corner-squircle ring-1 ring-border/60 overflow-auto">
                <DataTable columns={tableColumns} data={rows} />
              </div>

              {/* Footer */}
              <div className="mt-3">
                <p className="text-[11px] text-muted-foreground/60 text-center tabular-nums">
                  Showing {rows.length}
                  {data.total_rows != null &&
                    ` of ${data.total_rows.toLocaleString()}`}{" "}
                  rows
                </p>

                {mode === "preview" && mappingEnabled && (
                  <p className="mt-2 text-[11px] text-muted-foreground/70 text-center">
                    Mapping is saved automatically. You can start training anytime.
                  </p>
                )}

                {showMappingFooter && (
                  <DatasetMappingFooter
                    mappingOk={mappingOk}
                    isStarting={isStarting}
                    startError={startError}
                    onCancel={() => onOpenChange(false)}
                    onStartTraining={async () => {
                      const ok = await startTrainingRun();
                      if (ok) onOpenChange(false);
                    }}
                  />
                )}
              </div>
            </>
          )}
        </div>
      </DialogContent>
    </Dialog>
  );
}

// ---------------------------------------------------------------------------
// Metadata row
// ---------------------------------------------------------------------------

function MetaRow({
  label,
  value,
}: {
  label: string;
  value: ReactNode;
}) {
  return (
    <div className="flex items-baseline gap-3 text-sm">
      <span className="text-muted-foreground font-medium text-xs w-24 shrink-0">
        {label}:
      </span>
      <span className="text-foreground text-[13px] min-w-0">{value}</span>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/studio/sections/dataset-section.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { SectionCard } from "@/components/section-card";
import { Button } from "@/components/ui/button";
import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/components/ui/collapsible";
import {
  Combobox,
  ComboboxContent,
  ComboboxEmpty,
  ComboboxInput,
  ComboboxItem,
  ComboboxList,
} from "@/components/ui/combobox";
import { Input } from "@/components/ui/input";
import { InputGroupAddon } from "@/components/ui/input-group";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import { Spinner } from "@/components/ui/spinner";
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import {
  useDebouncedValue,
  useHfDatasetSearch,
  useHfTokenValidation,
  useInfiniteScroll,
} from "@/hooks";
import {
  HfDatasetSubsetSplitSelectors,
  uploadTrainingDataset,
  useDatasetPreviewDialogStore,
  useTrainingConfigStore,
} from "@/features/training";
import { listLocalDatasets } from "@/features/training/api/datasets-api";
import type { LocalDatasetInfo } from "@/features/training/types/datasets";
import { useNavigate } from "@tanstack/react-router";
import {
  ArrowDown01Icon,
  Cancel01Icon,
  CloudUploadIcon,
  Database02Icon,
  FileAttachmentIcon,
  InformationCircleIcon,
  Search01Icon,
  ViewIcon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { type ChangeEvent, useCallback, useEffect, useMemo, useRef, useState } from "react";
import { toast } from "sonner";
import { useShallow } from "zustand/react/shallow";
import { DocumentUploadRedirectDialog } from "./document-upload-redirect-dialog";

const DOCUMENT_REDIRECT_EXTENSIONS = new Set([".pdf", ".docx", ".txt"]);

const SEARCH_INPUT_REASONS = new Set(["input-change", "input-paste", "input-clear"]);
const OPEN_LEARNING_RECIPES_ON_ARRIVAL_KEY =
  "data-recipes:open-learning-recipes";

function isLikelyLocalDatasetRef(value: string) {
  return (
    value.startsWith("/") ||
    value.startsWith("./") ||
    value.startsWith("../") ||
    value.includes("\\") ||
    /\.(jsonl|json|csv|parquet)$/i.test(value)
  );
}

function deriveLocalDatasetName(path: string): string {
  const normalized = path.replaceAll("\\", "/");
  const parts = normalized.split("/").filter(Boolean);
  const parquetIndex = parts.lastIndexOf("parquet-files");
  if (parquetIndex > 0) return parts[parquetIndex - 1];
  const basename = parts[parts.length - 1] ?? path;
  // Strip UUID prefix from uploaded files (format: {32hex}_{original})
  const uuidPrefixMatch = basename.match(/^[a-f0-9]{32}_(.+)$/);
  if (uuidPrefixMatch) return uuidPrefixMatch[1];
  return basename;
}

function formatUpdatedDate(timestamp: number | null): string {
  if (typeof timestamp !== "number") return "--";
  return new Date(timestamp * 1000).toLocaleDateString();
}

function normalizeSliceInput(value: string): string | null {
  const trimmed = value.trim();
  if (!trimmed) return null;
  if (!/^\d+$/.test(trimmed)) return null;
  return trimmed;
}

export function DatasetSection() {
  const navigate = useNavigate();
  const {
    dataset,
    datasetSource,
    selectHfDataset,
    selectLocalDataset,
    datasetFormat,
    setDatasetFormat,
    datasetSubset,
    setDatasetSubset,
    datasetSplit,
    setDatasetSplit,
    datasetEvalSplit,
    setDatasetEvalSplit,
    uploadedFile,
    uploadedEvalFile,
    setUploadedEvalFile,
    hfToken,
    modelType,
    datasetSliceStart,
    setDatasetSliceStart,
    datasetSliceEnd,
    setDatasetSliceEnd,
  } = useTrainingConfigStore(
    useShallow((s) => ({
      dataset: s.dataset,
      datasetSource: s.datasetSource,
      selectHfDataset: s.selectHfDataset,
      selectLocalDataset: s.selectLocalDataset,
      datasetFormat: s.datasetFormat,
      setDatasetFormat: s.setDatasetFormat,
      datasetSubset: s.datasetSubset,
      setDatasetSubset: s.setDatasetSubset,
      datasetSplit: s.datasetSplit,
      setDatasetSplit: s.setDatasetSplit,
      datasetEvalSplit: s.datasetEvalSplit,
      setDatasetEvalSplit: s.setDatasetEvalSplit,
      uploadedFile: s.uploadedFile,
      uploadedEvalFile: s.uploadedEvalFile,
      setUploadedEvalFile: s.setUploadedEvalFile,
      hfToken: s.hfToken,
      modelType: s.modelType,
      datasetSliceStart: s.datasetSliceStart,
      setDatasetSliceStart: s.setDatasetSliceStart,
      datasetSliceEnd: s.datasetSliceEnd,
      setDatasetSliceEnd: s.setDatasetSliceEnd,
    })),
  );

  const [searchQuery, setSearchQuery] = useState("");
  const [advancedOpen, setAdvancedOpen] = useState(false);
  const [pickerTab, setPickerTab] = useState<"huggingface" | "local">(
    datasetSource === "upload" ? "local" : "huggingface",
  );
  const [localDatasets, setLocalDatasets] = useState<LocalDatasetInfo[]>([]);
  const [hasLoadedLocalDatasets, setHasLoadedLocalDatasets] = useState(false);
  const [localLoading, setLocalLoading] = useState(false);
  const [localError, setLocalError] = useState<string | null>(null);
  const openPreview = useDatasetPreviewDialogStore((s) => s.openPreview);
  const selectingRef = useRef(false);
  const pendingSourceTabRef = useRef<"huggingface" | "local" | null>(null);
  const debouncedQuery = useDebouncedValue(searchQuery);

  useEffect(() => {
    setPickerTab(datasetSource === "upload" ? "local" : "huggingface");
  }, [datasetSource]);

  const refreshLocalDatasets = useCallback(async () => {
    setLocalLoading(true);
    setLocalError(null);
    try {
      const response = await listLocalDatasets();
      setLocalDatasets(response.datasets ?? []);
    } catch (error) {
      setLocalError(
        error instanceof Error ? error.message : "Failed to load local datasets.",
      );
    } finally {
      setHasLoadedLocalDatasets(true);
      setLocalLoading(false);
    }
  }, []);

  useEffect(() => {
    if (pickerTab !== "local") return;
    void refreshLocalDatasets();
  }, [pickerTab, refreshLocalDatasets]);

  useEffect(() => {
    const handleRefresh = () => {
      if (document.hidden) return;
      if (pickerTab !== "local" && datasetSource !== "upload") return;
      void refreshLocalDatasets();
    };

    window.addEventListener("focus", handleRefresh);
    document.addEventListener("visibilitychange", handleRefresh);
    return () => {
      window.removeEventListener("focus", handleRefresh);
      document.removeEventListener("visibilitychange", handleRefresh);
    };
  }, [datasetSource, pickerTab, refreshLocalDatasets]);

  function handleDatasetSelect(id: string | null) {
    selectingRef.current = true;
    pendingSourceTabRef.current = "huggingface";
    selectHfDataset(id);
  }

  function handleLocalDatasetSelect(path: string) {
    selectingRef.current = true;
    pendingSourceTabRef.current = "local";
    selectLocalDataset(path);
  }

  function clearSelectionForTab(tab: "huggingface" | "local") {
    pendingSourceTabRef.current = tab;
    if (tab === "huggingface") {
      handleDatasetSelect(null);
      return;
    }
    selectingRef.current = true;
    selectLocalDataset(null);
  }

  function handleInputChange(
    val: string,
    eventDetails?: {
      reason?: string;
    },
  ) {
    if (selectingRef.current) {
      selectingRef.current = false;
      return;
    }
    if (!SEARCH_INPUT_REASONS.has(eventDetails?.reason ?? "")) {
      return;
    }
    setSearchQuery(val);
  }

  const effectiveModelType = modelType ?? "text";

  const {
    results: hfResults,
    isLoading,
    isLoadingMore,
    fetchMore,
    error: hfSearchError,
  } = useHfDatasetSearch(pickerTab === "huggingface" ? debouncedQuery : "", {
    modelType: effectiveModelType,
    accessToken: hfToken || undefined,
    enabled: pickerTab === "huggingface",
  });

  const { error: tokenValidationError, isChecking: isCheckingToken } =
    useHfTokenValidation(hfToken);

  const hfResultIds = useMemo(() => {
    const ids = hfResults.map((r) => r.id);
    if (dataset && !ids.includes(dataset)) {
      ids.push(dataset);
    }
    return ids;
  }, [hfResults, dataset]);

  const localFilteredDatasets = useMemo(() => {
    const query = searchQuery.trim().toLowerCase();
    if (!query) return localDatasets;
    return localDatasets.filter(
      (item) =>
        item.label.toLowerCase().includes(query) ||
        item.path.toLowerCase().includes(query),
    );
  }, [localDatasets, searchQuery]);

  const localPathById = useMemo(() => {
    return new Map(localDatasets.map((item) => [item.id, item.path]));
  }, [localDatasets]);

  const localLabelById = useMemo(() => {
    return new Map(localDatasets.map((item) => [item.id, item.label]));
  }, [localDatasets]);

  const selectedLocalDataset = useMemo(() => {
    if (!uploadedFile) return null;
    return localDatasets.find((item) => item.path === uploadedFile) ?? null;
  }, [localDatasets, uploadedFile]);

  const selectedLocalId = selectedLocalDataset?.id ?? null;

  const localResultIds = useMemo(() => {
    const ids = localFilteredDatasets.map((item) => item.id);
    if (selectedLocalDataset && selectedLocalId && !ids.includes(selectedLocalId)) {
      ids.push(selectedLocalId);
    }
    return ids;
  }, [localFilteredDatasets, selectedLocalDataset, selectedLocalId]);

  useEffect(() => {
    if (!hasLoadedLocalDatasets) return;
    if (localLoading) return;
    if (localError) return;
    if (datasetSource !== "upload") return;
    if (!uploadedFile) return;
    if (selectedLocalDataset) return;
    // Don't clear if this is a direct file upload (e.g. user uploaded a .jsonl/.csv)
    if (/\.(jsonl|json|csv|parquet|arrow)$/i.test(uploadedFile)) return;
    selectLocalDataset(null);
  }, [
    datasetSource,
    hasLoadedLocalDatasets,
    localError,
    localLoading,
    uploadedFile,
    selectedLocalDataset,
    selectLocalDataset,
  ]);

  const activeSourceTab = datasetSource === "upload" ? "local" : "huggingface";
  const comboboxItems = pickerTab === "huggingface" ? hfResultIds : localResultIds;
  const comboboxValue =
    pickerTab === "huggingface"
      ? datasetSource === "huggingface"
        ? dataset
        : null
      : datasetSource === "upload"
        ? selectedLocalId
        : null;
  const isHfDatasetSelected =
    datasetSource === "huggingface" &&
    !!dataset &&
    !isLikelyLocalDatasetRef(dataset);

  const selectedDatasetName = datasetSource === "upload" ? uploadedFile : dataset;
  const selectedLocalMetadata = selectedLocalDataset?.metadata ?? null;
  const selectedLocalColumns = selectedLocalMetadata?.columns ?? [];
  const selectedLocalRows =
    selectedLocalDataset?.rows ?? selectedLocalMetadata?.actual_num_records ?? null;
  const selectedLocalUpdatedAt = selectedLocalDataset?.updated_at ?? null;

  const comboboxAnchorRef = useRef<HTMLDivElement>(null);
  const fileInputRef = useRef<HTMLInputElement>(null);
  const evalFileInputRef = useRef<HTMLInputElement>(null);
  const { scrollRef, sentinelRef } = useInfiniteScroll(
    fetchMore,
    hfResults.length,
  );

  const [isUploading, setIsUploading] = useState(false);
  const [documentRedirectOpen, setDocumentRedirectOpen] = useState(false);
  const [redirectFileName, setRedirectFileName] = useState<string | null>(null);

  const handleUploadButtonClick = () => {
    fileInputRef.current?.click();
  };

  const handleFileUpload = async (
    file: File,
    onSuccess: (storedPath: string) => void,
    successMessage: string,
  ) => {
    setIsUploading(true);
    try {
      const uploaded = await uploadTrainingDataset(file);
      onSuccess(uploaded.stored_path);
      toast.success(successMessage, { description: uploaded.filename });
    } catch (error) {
      toast.error("Upload failed", {
        description: error instanceof Error ? error.message : "Unknown error",
      });
    } finally {
      setIsUploading(false);
    }
  };

  const handleDatasetFileChange = async (event: ChangeEvent<HTMLInputElement>) => {
    const file = event.target.files?.[0];
    event.target.value = "";
    if (!file) return;

    const extension = file.name.slice(file.name.lastIndexOf(".")).toLowerCase();
    if (DOCUMENT_REDIRECT_EXTENSIONS.has(extension)) {
      setRedirectFileName(file.name);
      setDocumentRedirectOpen(true);
      return;
    }

    await handleFileUpload(file, selectLocalDataset, "Dataset uploaded");
  };

  const handleEvalFileChange = async (event: ChangeEvent<HTMLInputElement>) => {
    const file = event.target.files?.[0];
    event.target.value = "";
    if (!file) return;

    await handleFileUpload(file, setUploadedEvalFile, "Eval dataset uploaded");
  };

  const handleOpenLearningRecipes = useCallback(() => {
    sessionStorage.setItem(OPEN_LEARNING_RECIPES_ON_ARRIVAL_KEY, "1");
    setDocumentRedirectOpen(false);
    void navigate({ to: "/data-recipes" });
  }, [navigate]);

  return (
    <div data-tour="studio-dataset" className="col-span-1 xl:col-span-4">
      <SectionCard
        icon={<HugeiconsIcon icon={Database02Icon} className="size-5" />}
        title="Dataset"
        description="Select or upload training data"
        accent="indigo"
        className="dark:shadow-border"
      >
        <div className="flex flex-col gap-4">
          <div className="flex flex-col gap-2">
            <span className="flex items-center gap-1.5 text-xs font-medium text-muted-foreground">
              Choose dataset
              <span className="rounded-full border border-border/70 bg-muted/40 px-2 py-0.5 text-[10px] font-medium text-foreground/80">
                {datasetSource === "upload" ? "Local" : "Hugging Face"}
              </span>
              <Tooltip>
                <TooltipTrigger asChild={true}>
                  <button
                    type="button"
                    className="text-foreground/70 hover:text-foreground"
                  >
                    <HugeiconsIcon
                      icon={InformationCircleIcon}
                      className="size-3"
                    />
                  </button>
                </TooltipTrigger>
                <TooltipContent>
                  Use the popup tabs to switch between Hugging Face and local
                  recipe outputs.{" "}
                  <a
                    href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/datasets-guide"
                    target="_blank"
                    rel="noopener noreferrer"
                    className="text-primary underline"
                  >
                    Read more
                  </a>
                </TooltipContent>
              </Tooltip>
            </span>
            <div
              ref={comboboxAnchorRef}
              onKeyDown={(event) => {
                if (event.key !== "Enter") return;
                if (!(event.target instanceof HTMLInputElement)) return;
                event.preventDefault();
                if (pickerTab === "huggingface") {
                  if (hfResults.length > 0) {
                    handleDatasetSelect(hfResults[0].id);
                  } else {
                    const text = event.target.value.trim();
                    if (text) handleDatasetSelect(text);
                  }
                  return;
                }

                if (localResultIds.length > 0) {
                  const selectedId = localResultIds[0];
                  const path = localPathById.get(selectedId);
                  if (path) {
                    handleLocalDatasetSelect(path);
                  }
                }
              }}
            >
              <Combobox
                items={comboboxItems}
                filteredItems={comboboxItems}
                filter={null}
                value={comboboxValue}
                onOpenChange={(open) => {
                  setSearchQuery("");
                  if (open && (pickerTab === "local" || activeSourceTab === "local")) {
                    void refreshLocalDatasets();
                  }
                  if (!open) {
                    setPickerTab(pendingSourceTabRef.current ?? activeSourceTab);
                    pendingSourceTabRef.current = null;
                  }
                }}
                onValueChange={(value) => {
                  if (!value) {
                    clearSelectionForTab(pickerTab);
                    return;
                  }
                  if (pickerTab === "huggingface") {
                    handleDatasetSelect(value);
                    return;
                  }
                  const path = localPathById.get(value);
                  if (path) {
                    handleLocalDatasetSelect(path);
                  }
                }}
                onInputValueChange={(value, eventDetails) =>
                  handleInputChange(value, eventDetails)
                }
                itemToStringValue={(id) =>
                  pickerTab === "local"
                    ? localLabelById.get(id) ?? id
                    : id
                }
                autoHighlight={true}
              >
                <ComboboxInput
                  placeholder={
                    pickerTab === "huggingface"
                      ? "Search Hugging Face datasets..."
                      : "Search local datasets..."
                  }
                  className="w-full"
                  showClear={true}
                >
                  <InputGroupAddon>
                    <HugeiconsIcon icon={Search01Icon} className="size-4" />
                  </InputGroupAddon>
                </ComboboxInput>
                <ComboboxContent anchor={comboboxAnchorRef}>
                  <div className="px-2 pt-2 pb-2">
                    <Tabs
                      value={pickerTab}
                      onValueChange={(value) => {
                        setPickerTab(value as "huggingface" | "local");
                        setSearchQuery("");
                      }}
                      className="w-full"
                    >
                      <TabsList className=" w-full">
                        <TabsTrigger value="huggingface">Hugging Face</TabsTrigger>
                        <TabsTrigger value="local">Local</TabsTrigger>
                      </TabsList>

                      <TabsContent value="huggingface" className="m-0">
                        {isLoading ? (
                          <div className="flex items-center justify-center py-4 gap-2 text-xs text-muted-foreground">
                            <Spinner className="size-4" /> Searching...
                          </div>
                        ) : (
                          <ComboboxEmpty>No datasets found</ComboboxEmpty>
                        )}
                        <div
                          ref={scrollRef}
                          className="max-h-64 overflow-y-auto overscroll-contain [scrollbar-width:thin]"
                        >
                          <ComboboxList className="p-1 !max-h-none !overflow-visible">
                            {(id: string) => {
                              return (
                                <ComboboxItem key={id} value={id} className="gap-2">
                                  <Tooltip>
                                    <TooltipTrigger asChild={true}>
                                      <span className="block min-w-0 flex-1 truncate">
                                        {id}
                                      </span>
                                    </TooltipTrigger>
                                    <TooltipContent
                                      side="left"
                                      className="max-w-xs break-all"
                                    >
                                      {id}
                                    </TooltipContent>
                                  </Tooltip>
                                </ComboboxItem>
                              );
                            }}
                          </ComboboxList>
                          <div ref={sentinelRef} className="h-px" />
                          {isLoadingMore && (
                            <div className="flex items-center justify-center py-2">
                              <Spinner className="size-3.5 text-muted-foreground" />
                            </div>
                          )}
                        </div>
                      </TabsContent>

                      <TabsContent value="local" className="m-0">
                        {localLoading ? (
                          <div className="flex items-center justify-center py-4 gap-2 text-xs text-muted-foreground">
                            <Spinner className="size-4" /> Loading local datasets...
                          </div>
                        ) : (
                          <>
                            {localError ? (
                              <p className="px-2 py-2 text-xs text-destructive">{localError}</p>
                            ) : (
                              <ComboboxEmpty className="px-2 py-3">
                                <div className="flex w-full flex-col items-center gap-2 text-center">
                                  <p className="text-xs text-muted-foreground">
                                    {localDatasets.length === 0
                                      ? "No local datasets yet."
                                      : "No local datasets match search."}
                                  </p>
                                  {localDatasets.length === 0 ? (
                                    <Button asChild={true} size="sm" variant="outline">
                                      <a href="/data-recipes">Open Data Recipes</a>
                                    </Button>
                                  ) : null}
                                </div>
                              </ComboboxEmpty>
                            )}
                            <div className="max-h-64 overflow-y-auto overscroll-contain [scrollbar-width:thin]">
                              <ComboboxList className="p-1 !max-h-none !overflow-visible">
                                {(id: string) => {
                                  const label = localLabelById.get(id) ?? id;
                                  return (
                                    <ComboboxItem key={id} value={id} className="gap-2">
                                      <Tooltip>
                                        <TooltipTrigger asChild={true}>
                                          <span className="block min-w-0 flex-1 truncate">
                                            {label}
                                          </span>
                                        </TooltipTrigger>
                                        <TooltipContent
                                          side="left"
                                          className="max-w-xs break-all"
                                        >
                                          {label}
                                        </TooltipContent>
                                      </Tooltip>
                                    </ComboboxItem>
                                  );
                                }}
                              </ComboboxList>
                            </div>
                          </>
                        )}
                      </TabsContent>
                    </Tabs>
                  </div>
                </ComboboxContent>
              </Combobox>
            </div>
            {(tokenValidationError ?? hfSearchError) && (
              <p className="text-xs text-destructive">
                {tokenValidationError ?? hfSearchError}
                {" — "}
                <a
                  href="https://huggingface.co/settings/tokens"
                  target="_blank"
                  rel="noopener noreferrer"
                  className="underline"
                >
                  Get or update token
                </a>
              </p>
            )}
            {isCheckingToken && (
              <p className="text-xs text-muted-foreground">Checking token…</p>
            )}
            {pickerTab !== activeSourceTab && (
              <p className="text-[11px] text-muted-foreground">
                Browsing {pickerTab === "local" ? "Local datasets" : "Hugging Face"}.
                Current selection stays {datasetSource === "upload" ? "Local" : "Hugging Face"}.
              </p>
            )}
          </div>

          {isHfDatasetSelected ? (
            <HfDatasetSubsetSplitSelectors
              variant="studio"
              enabled={true}
              datasetName={dataset}
              accessToken={hfToken || undefined}
              datasetSubset={datasetSubset}
              setDatasetSubset={setDatasetSubset}
              datasetSplit={datasetSplit}
              setDatasetSplit={setDatasetSplit}
              datasetEvalSplit={datasetEvalSplit}
              setDatasetEvalSplit={setDatasetEvalSplit}
            />
          ) : !selectedDatasetName ? (
            <HfDatasetSubsetSplitSelectors
              variant="studio"
              enabled={false}
              datasetName={null}
              accessToken={hfToken || undefined}
              datasetSubset={datasetSubset}
              setDatasetSubset={setDatasetSubset}
              datasetSplit={datasetSplit}
              setDatasetSplit={setDatasetSplit}
              datasetEvalSplit={datasetEvalSplit}
              setDatasetEvalSplit={setDatasetEvalSplit}
            />
          ) : datasetSource === "upload" && selectedLocalDataset ? (
            <div className="rounded-lg border bg-muted/20 px-3.5 py-3">
              <div className="mb-3 flex items-center justify-between gap-3">
                <div>
                  <p className="text-xs font-medium text-muted-foreground">
                    Local dataset metadata
                  </p>
                  <p className="text-[10px] text-muted-foreground/80">
                    Data Recipe output.
                  </p>
                </div>
              </div>

              <div className="flex flex-col gap-3">
                <div className="grid grid-cols-2 gap-x-4 gap-y-2 text-xs">
                  <MetadataRow
                    label="Rows"
                    value={
                      typeof selectedLocalRows === "number"
                        ? selectedLocalRows.toLocaleString()
                        : "--"
                    }
                  />
                  <MetadataRow
                    label="Columns"
                    value={
                      selectedLocalColumns.length > 0
                        ? String(selectedLocalColumns.length)
                        : "--"
                    }
                  />
                  <MetadataRow
                    label="Batches"
                    value={
                      typeof selectedLocalMetadata?.num_completed_batches === "number" &&
                      typeof selectedLocalMetadata?.total_num_batches === "number"
                        ? `${selectedLocalMetadata.num_completed_batches}/${selectedLocalMetadata.total_num_batches}`
                        : "--"
                    }
                  />
                  <MetadataRow
                    label="Updated"
                    value={formatUpdatedDate(selectedLocalUpdatedAt)}
                  />
                </div>
              </div>
            </div>
          ) : null}

          {datasetSource === "upload" && uploadedFile && (
            <div className="rounded-lg border bg-muted/20 px-3.5 py-3">
              <p className="mb-2 text-xs font-medium text-muted-foreground">
                Eval dataset
              </p>
              {uploadedEvalFile ? (
                <div className="flex items-center justify-between gap-2">
                  <div className="flex items-center gap-1.5 overflow-hidden">
                    <HugeiconsIcon icon={FileAttachmentIcon} className="size-3.5 shrink-0 text-muted-foreground" />
                    <span className="truncate text-xs">
                      {deriveLocalDatasetName(uploadedEvalFile)}
                    </span>
                  </div>
                  <Button
                    variant="ghost"
                    size="sm"
                    className="h-6 w-6 shrink-0 cursor-pointer p-0"
                    onClick={() => setUploadedEvalFile(null)}
                  >
                    <HugeiconsIcon icon={Cancel01Icon} className="size-3.5" />
                  </Button>
                </div>
              ) : (
                <div className="flex flex-col gap-1.5">
                  <Button
                    variant="outline"
                    size="sm"
                    className="w-full cursor-pointer gap-1.5"
                    disabled={isUploading}
                    onClick={() => evalFileInputRef.current?.click()}
                  >
                    {isUploading ? (
                      <Spinner className="size-3.5" />
                    ) : (
                      <HugeiconsIcon icon={CloudUploadIcon} className="size-3.5" />
                    )}
                    {isUploading ? "Uploading..." : "Upload eval file"}
                  </Button>
                  <p className="text-[10px] text-muted-foreground/80">
                    Optional. If not provided, a small portion will be split from the training data.
                  </p>
                </div>
              )}
            </div>
          )}

          <Collapsible open={advancedOpen} onOpenChange={setAdvancedOpen}>
            <CollapsibleTrigger className="flex w-full cursor-pointer items-center gap-1.5 text-xs text-muted-foreground">
              <HugeiconsIcon
                icon={ArrowDown01Icon}
                className={`size-3.5 transition-transform ${advancedOpen ? "rotate-180" : ""}`}
              />
              Advanced
            </CollapsibleTrigger>
            <CollapsibleContent className="mt-3 data-[state=open]:overflow-visible">
              <div className="flex flex-col gap-4">
                <div className="flex flex-col gap-2">
                  <span className="flex items-center gap-1.5 text-xs font-medium text-muted-foreground">
                    Target Format
                    <Tooltip>
                      <TooltipTrigger asChild={true}>
                        <button
                          type="button"
                          className="text-foreground/70 hover:text-foreground"
                        >
                          <HugeiconsIcon
                            icon={InformationCircleIcon}
                            className="size-3"
                          />
                        </button>
                      </TooltipTrigger>
                      <TooltipContent>
                        Format of your training data. Auto-detect works for most
                        datasets.{" "}
                        <a
                          href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/datasets-guide"
                          target="_blank"
                          rel="noopener noreferrer"
                          className="text-primary underline"
                        >
                          Read more
                        </a>
                      </TooltipContent>
                    </Tooltip>
                  </span>
                  <Select
                    value={datasetFormat}
                    onValueChange={(v) =>
                      setDatasetFormat(v as typeof datasetFormat)
                    }
                  >
                    <SelectTrigger className="w-full">
                      <SelectValue />
                    </SelectTrigger>
                    <SelectContent>
                      <SelectItem value="auto">Auto</SelectItem>
                      <SelectItem value="alpaca">Alpaca</SelectItem>
                      <SelectItem value="chatml">ChatML</SelectItem>
                      <SelectItem value="sharegpt">ShareGPT</SelectItem>
                    </SelectContent>
                  </Select>
                </div>
                <div className="grid grid-cols-2 gap-3">
                  <div className="flex flex-col gap-1.5">
                    <span className="flex items-center gap-1.5 text-xs font-medium text-muted-foreground">
                      Train Split Start
                      <Tooltip>
                        <TooltipTrigger asChild={true}>
                          <button
                            type="button"
                            className="text-foreground/70 hover:text-foreground"
                          >
                            <HugeiconsIcon
                              icon={InformationCircleIcon}
                              className="size-3"
                            />
                          </button>
                        </TooltipTrigger>
                        <TooltipContent>
                          Only train on a subset of your training split by
                          specifying a start row index (inclusive, 0-based).
                          Leave empty to start from the first row.
                        </TooltipContent>
                      </Tooltip>
                    </span>
                    <Input
                      type="number"
                      inputMode="numeric"
                      min={0}
                      step={1}
                      placeholder="0"
                      value={datasetSliceStart ?? ""}
                      onChange={(e) =>
                        setDatasetSliceStart(normalizeSliceInput(e.target.value))
                      }
                    />
                  </div>
                  <div className="flex flex-col gap-1.5">
                    <span className="flex items-center gap-1.5 text-xs font-medium text-muted-foreground">
                      Train Split End
                      <Tooltip>
                        <TooltipTrigger asChild={true}>
                          <button
                            type="button"
                            className="text-foreground/70 hover:text-foreground"
                          >
                            <HugeiconsIcon
                              icon={InformationCircleIcon}
                              className="size-3"
                            />
                          </button>
                        </TooltipTrigger>
                        <TooltipContent>
                          Last row index to include from the training split
                          (inclusive, 0-based). For example, set Start to 0 and
                          End to 99 to train on the first 100 rows. Leave empty
                          to use all remaining rows.
                        </TooltipContent>
                      </Tooltip>
                    </span>
                    <Input
                      type="number"
                      inputMode="numeric"
                      min={0}
                      step={1}
                      placeholder="End"
                      value={datasetSliceEnd ?? ""}
                      onChange={(e) =>
                        setDatasetSliceEnd(normalizeSliceInput(e.target.value))
                      }
                    />
                  </div>
                </div>
              </div>
            </CollapsibleContent>
          </Collapsible>

          <div className="flex flex-col gap-4 pt-1">
            {selectedDatasetName ? (
              <div className="flex items-center gap-3 rounded-lg border bg-muted/40 px-3.5 py-3">
                <div className="rounded-md bg-indigo-500/10 p-1.5">
                  <HugeiconsIcon
                    icon={FileAttachmentIcon}
                    className="size-4 text-indigo-500"
                  />
                </div>
                <div className="flex-1 min-w-0">
                  <p className="font-mono text-sm font-medium truncate">
                    {datasetSource === "upload"
                      ? selectedLocalDataset?.label ??
                        deriveLocalDatasetName(selectedDatasetName)
                      : selectedDatasetName}
                  </p>
                  <p className="text-[10px] text-muted-foreground">
                    {datasetSource === "upload" ? (
                      uploadedFile ? (
                        <>
                          Local dataset
                          {selectedLocalRows != null
                            ? ` / ${selectedLocalRows.toLocaleString()} rows`
                            : ""}
                        </>
                      ) : (
                        "Local dataset"
                      )
                    ) : (
                      <>
                        Hugging Face Dataset
                        {datasetSubset && ` / ${datasetSubset}`}
                        {datasetSplit && ` / ${datasetSplit}`}
                      </>
                    )}
                  </p>
                </div>
                <Button
                  variant="ghost"
                  size="sm"
                  className="shrink-0 text-xs"
                  onClick={() => clearSelectionForTab(activeSourceTab)}
                >
                  Clear
                </Button>
              </div>
            ) : (
              <div className="flex items-center gap-3 rounded-lg border border-dashed bg-muted/20 px-3.5 py-3">
                <HugeiconsIcon
                  icon={Database02Icon}
                  className="size-4 text-muted-foreground/40"
                />
                <span className="text-xs text-muted-foreground">
                  No dataset selected
                </span>
              </div>
            )}

            <div className="grid grid-cols-2 gap-2">
              <Button
                variant="outline"
                size="sm"
                className="cursor-pointer gap-1.5"
                disabled={isUploading}
                onClick={handleUploadButtonClick}
              >
                {isUploading ? (
                  <Spinner className="size-3.5" />
                ) : (
                  <HugeiconsIcon icon={CloudUploadIcon} className="size-3.5" />
                )}
                {isUploading ? "Uploading..." : "Upload"}
              </Button>
              <Button
                variant="outline"
                size="sm"
                className="cursor-pointer gap-1.5"
                disabled={!selectedDatasetName}
                onClick={() => openPreview()}
              >
                <HugeiconsIcon icon={ViewIcon} className="size-3.5" />
                View dataset
              </Button>
            </div>
          </div>
          <input
            ref={fileInputRef}
            type="file"
            accept=".json,.jsonl,.csv,.parquet,.pdf,.docx,.txt"
            className="hidden"
            onChange={(event) => {
              void handleDatasetFileChange(event);
            }}
          />
          <input
            ref={evalFileInputRef}
            type="file"
            accept=".json,.jsonl,.csv,.parquet"
            className="hidden"
            onChange={(event) => {
              void handleEvalFileChange(event);
            }}
          />
          <DocumentUploadRedirectDialog
            open={documentRedirectOpen}
            onOpenChange={setDocumentRedirectOpen}
            fileName={redirectFileName}
            onOpenLearningRecipes={handleOpenLearningRecipes}
          />
      </div>
      </SectionCard>
    </div>
  );
}

function MetadataRow({ label, value }: { label: string; value: string }) {
  return (
    <div className="flex items-center justify-between gap-2 rounded-md bg-background/60 px-2 py-1.5">
      <span className="text-muted-foreground">{label}</span>
      <span className="font-medium text-foreground">{value}</span>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/studio/sections/document-upload-redirect-dialog.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import {
  Dialog,
  DialogContent,
  DialogDescription,
  DialogFooter,
  DialogHeader,
  DialogTitle,
} from "@/components/ui/dialog";
import { Badge } from "@/components/ui/badge";
import {
  ArrowRight01Icon,
  DocumentAttachmentIcon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import type { ReactElement } from "react";

type DocumentUploadRedirectDialogProps = {
  open: boolean;
  onOpenChange: (open: boolean) => void;
  fileName: string | null;
  onOpenLearningRecipes: () => void;
};

export function DocumentUploadRedirectDialog({
  open,
  onOpenChange,
  fileName,
  onOpenLearningRecipes,
}: DocumentUploadRedirectDialogProps): ReactElement {
  return (
    <Dialog open={open} onOpenChange={onOpenChange}>
      <DialogContent
        className="sm:max-w-lg"
        overlayClassName="bg-background/45 supports-backdrop-filter:backdrop-blur-[1px]"
      >
        <DialogHeader className="gap-3">
          <div className="flex items-center gap-2">
            <div className="flex size-10 items-center justify-center rounded-2xl border border-border/70 bg-muted/30">
              <HugeiconsIcon
                icon={DocumentAttachmentIcon}
                className="size-5 text-foreground/90"
              />
            </div>
            <Badge variant="outline">Recipe Studio</Badge>
          </div>
          <div className="space-y-1">
            <DialogTitle>This file needs conversion first</DialogTitle>
            <DialogDescription>
              {fileName ? (
                <>
                  <span className="font-medium text-foreground">{fileName}</span>{" "}
                  is source material, not a ready-to-train dataset.
                </>
              ) : (
                "This file is source material, not a ready-to-train dataset."
              )}{" "}
              Use Data Recipes to turn documents into a dataset, then bring the
              result back here for fine-tuning.
            </DialogDescription>
          </div>
        </DialogHeader>

        <div className="corner-squircle rounded-2xl border border-border/70 bg-muted/20 p-4">
          <p className="text-sm font-medium text-foreground">
            Best next step
          </p>
          <p className="mt-1 text-sm text-muted-foreground">
            Open Learning Recipes and start from a document-based recipe like PDF
            grounded QA.
          </p>
        </div>

        <DialogFooter className="sm:justify-between">
          <Button
            type="button"
            variant="outline"
            onClick={() => onOpenChange(false)}
          >
            Cancel
          </Button>
          <Button type="button" onClick={onOpenLearningRecipes}>
            Open Learning Recipes
            <HugeiconsIcon icon={ArrowRight01Icon} className="size-4" />
          </Button>
        </DialogFooter>
      </DialogContent>
    </Dialog>
  );
}


================================================
FILE: studio/frontend/src/features/studio/sections/model-section.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { SectionCard } from "@/components/section-card";
import {
  Combobox,
  ComboboxContent,
  ComboboxEmpty,
  ComboboxInput,
  ComboboxItem,
  ComboboxList,
} from "@/components/ui/combobox";
import {
  InputGroup,
  InputGroupAddon,
  InputGroupInput,
} from "@/components/ui/input-group";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import { Spinner } from "@/components/ui/spinner";
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import { MODEL_TYPE_TO_HF_TASK, PRIORITY_TRAINING_MODELS, applyPriorityOrdering } from "@/config/training";
import {
  useDebouncedValue,
  useGpuInfo,
  useHfModelSearch,
  useHfTokenValidation,
  useInfiniteScroll,
} from "@/hooks";
import { formatCompact } from "@/lib/utils";
import {
  type TrainingMethod as VramTrainingMethod,
  type VramFitStatus,
  buildModelVramMap,
} from "@/lib/vram";
import {
  listLocalModels,
  type LocalModelInfo,
  useTrainingConfigStore,
} from "@/features/training";
import type { TrainingMethod } from "@/types/training";
import {
  ChipIcon,
  FolderSearchIcon,
  InformationCircleIcon,
  Key01Icon,
  Search01Icon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { useEffect, useMemo, useRef, useState } from "react";
import { useShallow } from "zustand/react/shallow";

const METHOD_DOTS: Record<string, string> = {
  qlora: "bg-emerald-400",
  lora: "bg-blue-400",
  full: "bg-amber-400",
};

const DARK_TRIGGER =
  "w-full bg-foreground text-background hover:bg-foreground/90 dark:bg-foreground dark:text-background dark:hover:bg-foreground [&_svg]:text-background/50";
const DARK_CONTENT =
  "bg-foreground text-background shadow-xl border-background/10 [--accent:rgba(255,255,255,0.1)] [--accent-foreground:white] dark:[--accent:rgba(2,6,23,0.08)] dark:[--accent-foreground:rgb(2,6,23)] [&_[data-slot=select-item]]:text-white/80 dark:[&_[data-slot=select-item]]:text-slate-900 [&_[data-slot=select-scroll-up-button]]:bg-foreground [&_[data-slot=select-scroll-down-button]]:bg-foreground";
const DARK_COMBOBOX_CONTENT =
  "bg-foreground text-background shadow-xl border-background/10 dark:[--accent:rgba(2,6,23,0.08)] dark:[--accent-foreground:rgb(2,6,23)] dark:[&_[data-slot=combobox-item]]:text-slate-900 dark:[&_.text-muted-foreground]:text-slate-500";

export function ModelSection() {
  const gpu = useGpuInfo();

  const {
    modelType,
    selectedModel,
    setSelectedModel,
    trainingMethod,
    setTrainingMethod,
    hfToken,
    setHfToken,
  } = useTrainingConfigStore(
    useShallow(
      ({
        modelType,
        selectedModel,
        setSelectedModel,
        trainingMethod,
        setTrainingMethod,
        hfToken,
        setHfToken,
      }) => ({
        modelType,
        selectedModel,
        setSelectedModel,
        trainingMethod,
        setTrainingMethod,
        hfToken,
        setHfToken,
      }),
    ),
  );

  const [inputValue, setInputValue] = useState("");
  const [localModelInput, setLocalModelInput] = useState("");
  const [localModels, setLocalModels] = useState<LocalModelInfo[]>([]);
  const [isLoadingLocalModels, setIsLoadingLocalModels] = useState(true);
  const [localModelsError, setLocalModelsError] = useState<string | null>(null);
  const selectingRef = useRef(false);
  const debouncedQuery = useDebouncedValue(inputValue);

  function handleModelSelect(id: string | null) {
    selectingRef.current = true;
    setSelectedModel(id);
  }

  function handleInputChange(val: string) {
    if (selectingRef.current) {
      selectingRef.current = false;
      return;
    }
    setInputValue(val);
  }

  function applyLocalModel(value: string) {
    const next = value.trim();
    if (!next) return;
    setSelectedModel(next);
  }

  useEffect(() => {
    const controller = new AbortController();
    void listLocalModels(controller.signal)
      .then((models) => {
        if (controller.signal.aborted) return;
        setLocalModels(models);
      })
      .catch((error) => {
        if (controller.signal.aborted) return;
        setLocalModelsError(
          error instanceof Error ? error.message : "Failed to load local models",
        );
      })
      .finally(() => {
        if (controller.signal.aborted) return;
        setIsLoadingLocalModels(false);
      });
    return () => controller.abort();
  }, []);
  const task = modelType ? MODEL_TYPE_TO_HF_TASK[modelType] : undefined;
  const {
    results: hfResults,
    isLoading,
    isLoadingMore,
    fetchMore,
    error: hfSearchError,
  } = useHfModelSearch(debouncedQuery, {
    task,
    accessToken: hfToken || undefined,
    excludeGguf: true,
    priorityIds: PRIORITY_TRAINING_MODELS,
  });

  const { error: tokenValidationError, isChecking: isCheckingToken } =
    useHfTokenValidation(hfToken);

  const resultIds = useMemo(() => {
    const ids = hfResults.map((r) => r.id);
    if (selectedModel && !ids.includes(selectedModel)) {
      ids.push(selectedModel);
    }

    return applyPriorityOrdering(ids);
  }, [hfResults, selectedModel]);

  // Filter out GGUF models — they can't be used for training
  const trainableLocalModels = useMemo(
    () =>
      localModels.filter((m) => {
        if (m.path.endsWith(".gguf")) return false;
        if (m.id.toLowerCase().includes("-gguf")) return false;
        return true;
      }),
    [localModels],
  );

  const localMetaById = useMemo(() => {
    const map = new Map<string, LocalModelInfo>();
    for (const model of trainableLocalModels) map.set(model.id, model);
    return map;
  }, [trainableLocalModels]);

  const localResultIds = useMemo(() => {
    const ids = trainableLocalModels.map((model) => model.id);
    const manual = localModelInput.trim();
    if (manual && !ids.includes(manual)) {
      ids.unshift(manual);
    }
    return ids;
  }, [localModelInput, localModels]);

  const localFilteredIds = useMemo(() => {
    const q = localModelInput.trim().toLowerCase();
    if (!q) return localResultIds;
    return localResultIds.filter((id) => {
      const meta = localMetaById.get(id);
      if (id.toLowerCase().includes(q)) return true;
      if (meta?.display_name.toLowerCase().includes(q)) return true;
      if (meta?.path.toLowerCase().includes(q)) return true;
      return false;
    });
  }, [localMetaById, localModelInput, localResultIds]);

  // Pre-compute VRAM fit status for every model in the current result set.
  // Keyed by model id so the render callback is a simple O(1) lookup.
  //
  // Pre-compute VRAM fit status for every model in the current result set.
  // Keyed by model id so the render callback is a simple O(1) lookup.
  // Re-computes when the training method changes (QLoRA=4-bit vs LoRA/Full=fp16).
  const vramMap = useMemo(() => {
    const fitMap = buildModelVramMap(
      hfResults,
      trainingMethod as VramTrainingMethod,
      gpu,
    );
    const map = new Map<
      string,
      { est: number; status: VramFitStatus | null; detail: string | null }
    >();
    for (const r of hfResults) {
      const detail = r.totalParams ? formatCompact(r.totalParams) : null;
      const fit = fitMap.get(r.id);
      map.set(r.id, {
        est: fit?.est ?? 0,
        status: fit?.status ?? null,
        detail,
      });
    }
    return map;
  }, [hfResults, gpu, trainingMethod]);

  const comboboxAnchorRef = useRef<HTMLDivElement>(null);
  const localComboboxAnchorRef = useRef<HTMLDivElement>(null);
  const { scrollRef, sentinelRef } = useInfiniteScroll(
    fetchMore,
    hfResults.length,
  );

  return (
    <div data-tour="studio-model" className="col-span-1 md:col-span-2 xl:col-span-12">
      <SectionCard
        icon={<HugeiconsIcon icon={ChipIcon} className="size-5" />}
        title="Model"
        description="Select base model and training method"
        accent="emerald"
        featured={true}
        badge="2x Faster Training"
        className="shadow-border ring-1 ring-border"
      >
        <div className="grid gap-4 md:grid-cols-2 xl:grid-cols-4">
          <div data-tour="studio-local-model" className="flex flex-col gap-2">
            <span className="flex items-center gap-1.5 text-xs font-medium text-muted-foreground">
              Local Model
            <Tooltip>
              <TooltipTrigger asChild={true}>
                <button
                  type="button"
                  className="text-foreground/70 hover:text-foreground"
                >
                  <HugeiconsIcon
                    icon={InformationCircleIcon}
                    className="size-3"
                  />
                </button>
              </TooltipTrigger>
              <TooltipContent>
                Path to a locally downloaded model or a custom HF repo.
              </TooltipContent>
            </Tooltip>
          </span>
          <div ref={localComboboxAnchorRef}>
            <Combobox
              items={localResultIds}
              filteredItems={localFilteredIds}
              filter={null}
              value={localModelInput || null}
              onValueChange={(id) => {
                const next = id ?? "";
                setLocalModelInput(next);
                if (next) setSelectedModel(next);
              }}
              onInputValueChange={setLocalModelInput}
              itemToStringValue={(id) => id}
              autoHighlight={true}
            >
              <ComboboxInput
                placeholder={
                  isLoadingLocalModels
                    ? "Scanning local and cached models..."
                    : "./models/my-model"
                }
                className="w-full bg-foreground text-background [&_input]:text-background [&_input]:placeholder:text-background/40 [&_svg]:text-background/50 hover:bg-foreground/90"
                onBlur={() => applyLocalModel(localModelInput)}
                onKeyDown={(event) => {
                  if (event.key !== "Enter") return;
                  event.preventDefault();
                  applyLocalModel(localModelInput);
                }}
              >
                <InputGroupAddon>
                  <HugeiconsIcon icon={FolderSearchIcon} className="size-4" />
                </InputGroupAddon>
              </ComboboxInput>
              <ComboboxContent
                anchor={localComboboxAnchorRef}
                className={DARK_COMBOBOX_CONTENT}
              >
                {isLoadingLocalModels ? (
                  <div className="flex items-center justify-center gap-2 py-4 text-xs text-muted-foreground">
                    <Spinner className="size-4" /> Scanning...
                  </div>
                ) : localModelsError ? (
                  <div className="px-3 py-2 text-xs text-red-500">
                    {localModelsError}
                  </div>
                ) : (
                  <ComboboxEmpty>No local models found</ComboboxEmpty>
                )}
                <ComboboxList className="p-1">
                  {(id: string) => {
                    const model = localMetaById.get(id);
                    const source =
                      model?.source === "hf_cache" ? "HF cache" : "Local dir";
                    return (
                      <ComboboxItem key={id} value={id} className="gap-2">
                        <Tooltip>
                          <TooltipTrigger asChild={true}>
                            <span className="block min-w-0 flex-1 truncate">
                              {model?.display_name ?? id}
                            </span>
                          </TooltipTrigger>
                          <TooltipContent side="left" className="max-w-xs break-all">
                            {model?.path ?? id}
                          </TooltipContent>
                        </Tooltip>
                        <span className="ml-auto shrink-0 text-[10px] text-muted-foreground">
                          {source}
                        </span>
                      </ComboboxItem>
                    );
                  }}
                </ComboboxList>
              </ComboboxContent>
            </Combobox>
          </div>
          {isLoadingLocalModels ? (
            <p className="text-[10px] text-muted-foreground">Scanning local models...</p>
          ) : localModelsError ? (
            <p className="text-[10px] text-red-500">{localModelsError}</p>
          ) : (
            <p className="text-[10px] text-muted-foreground">
              {trainableLocalModels.length > 0
                ? `${trainableLocalModels.length} local/cached models found`
                : "No local models found. Enter path manually."}
            </p>
          )}
        </div>

          <div data-tour="studio-base-model" className="flex flex-col gap-2">
          <span className="flex items-center gap-1.5 text-xs font-medium text-muted-foreground">
            Hugging Face Model
            <Tooltip>
              <TooltipTrigger asChild={true}>
                <button
                  type="button"
                  className="text-foreground/70 hover:text-foreground"
                >
                  <HugeiconsIcon
                    icon={InformationCircleIcon}
                    className="size-3"
                  />
                </button>
              </TooltipTrigger>
              <TooltipContent>
                Search Hugging Face models or pick from our recommended list.{" "}
                <a
                  href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/what-model-should-i-use"
                  target="_blank"
                  rel="noopener noreferrer"
                  className="text-primary underline"
                >
                  Read more
                </a>
              </TooltipContent>
            </Tooltip>
          </span>
          <div
            ref={comboboxAnchorRef}
            onKeyDown={(event) => {
              if (event.key !== "Enter") return;
              if (!(event.target instanceof HTMLInputElement)) return;
              event.preventDefault();
              if (hfResults.length > 0) {
                handleModelSelect(hfResults[0].id);
              } else {
                const text = event.target.value.trim();
                if (text) handleModelSelect(text);
              }
            }}
          >
            <Combobox
              items={resultIds}
              filteredItems={resultIds}
              filter={null}
              value={selectedModel}
              onValueChange={handleModelSelect}
              onInputValueChange={handleInputChange}
              itemToStringValue={(id) => id}
              autoHighlight={true}
            >
              <ComboboxInput placeholder="Search models..." className="w-full">
                <InputGroupAddon>
                  <HugeiconsIcon icon={Search01Icon} className="size-4" />
                </InputGroupAddon>
              </ComboboxInput>
              <ComboboxContent anchor={comboboxAnchorRef}>
                {isLoading ? (
                  <div className="flex items-center justify-center py-4 gap-2 text-xs text-muted-foreground">
                    <Spinner className="size-4" /> Searching…
                  </div>
                ) : (
                  <ComboboxEmpty>No models found</ComboboxEmpty>
                )}
                <div
                  ref={scrollRef}
                  className="max-h-64 overflow-y-auto overscroll-contain [scrollbar-width:thin]"
                >
                  <ComboboxList className="p-1 !max-h-none !overflow-visible">
                    {(id: string) => {
                      const entry = vramMap.get(id);
                      const detail = entry?.detail ?? null;
                      const fitStatus = entry?.status ?? null;
                      const vramEst = entry?.est ?? null;
                      const exceeds = fitStatus === "exceeds";

                      return (
                        <ComboboxItem
                          key={id}
                          value={id}
                          className={`gap-2 ${exceeds ? "opacity-50" : ""}`}
                        >
                          <Tooltip>
                            <TooltipTrigger asChild={true}>
                              <span className={`block min-w-0 flex-1 truncate ${exceeds ? "line-through decoration-muted-foreground/50" : ""}`}>
                                {id}
                              </span>
                            </TooltipTrigger>
                            <TooltipContent
                              side="left"
                              className="max-w-xs break-all"
                            >
                              {id}
                              {vramEst != null && vramEst > 0 && gpu.available && (
                                <span className="block text-[10px] mt-1">
                                  {exceeds
                                    ? `Needs ~${vramEst}GB VRAM (GPU: ${gpu.memoryTotalGb}GB)`
                                    : fitStatus === "tight"
                                      ? `~${vramEst}GB VRAM (tight fit on ${gpu.memoryTotalGb}GB)`
                                      : `~${vramEst}GB VRAM`}
                                </span>
                              )}
                            </TooltipContent>
                          </Tooltip>
                          <span className="ml-auto flex items-center gap-1.5 shrink-0">
                            {fitStatus === "exceeds" && (
                              <span className="text-[9px] font-medium text-red-400">
                                OOM
                              </span>
                            )}
                            {fitStatus === "tight" && (
                              <span className="text-[9px] font-medium text-amber-400">
                                TIGHT
                              </span>
                            )}
                            {detail && (
                              <span className="text-[10px] text-muted-foreground">
                                {detail}
                              </span>
                            )}
                          </span>
                        </ComboboxItem>
                      );
                    }}
                  </ComboboxList>
                  <div ref={sentinelRef} className="h-px" />
                  {isLoadingMore && (
                    <div className="flex items-center justify-center py-2">
                      <Spinner className="size-3.5 text-muted-foreground" />
                    </div>
                  )}
                </div>
              </ComboboxContent>
            </Combobox>
          </div>
        </div>

          <div data-tour="studio-method" className="flex flex-col gap-2">
          <span className="flex items-center gap-1.5 text-xs font-medium text-muted-foreground">
            Method
            <Tooltip>
              <TooltipTrigger asChild={true}>
                <button
                  type="button"
                  className="text-foreground/70 hover:text-foreground"
                >
                  <HugeiconsIcon
                    icon={InformationCircleIcon}
                    className="size-3"
                  />
                </button>
              </TooltipTrigger>
              <TooltipContent className="max-w-xs">
                QLoRA uses 4-bit quantization for lowest VRAM. LoRA uses 16-bit.
                Full updates all weights.{" "}
                <a
                  href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                  target="_blank"
                  rel="noopener noreferrer"
                  className="text-primary underline"
                >
                  Read more
                </a>
              </TooltipContent>
            </Tooltip>
          </span>
          <Select
            value={trainingMethod}
            onValueChange={(v) => setTrainingMethod(v as TrainingMethod)}
          >
            <SelectTrigger className={DARK_TRIGGER}>
              <SelectValue />
            </SelectTrigger>
            <SelectContent
              position="popper"
              className={`${DARK_CONTENT} w-[var(--radix-select-trigger-width)]`}
            >
              <SelectItem value="qlora">
                <span className="flex items-center gap-2">
                  <span
                    className={`size-2 shrink-0 rounded-full ${METHOD_DOTS.qlora}`}
                  />
                  QLoRA (4-bit)
                </span>
              </SelectItem>
              <SelectItem value="lora">
                <span className="flex items-center gap-2">
                  <span
                    className={`size-2 shrink-0 rounded-full ${METHOD_DOTS.lora}`}
                  />
                  LoRA (16-bit)
                </span>
              </SelectItem>
              <SelectItem value="full">
                <span className="flex items-center gap-2">
                  <span
                    className={`size-2 shrink-0 rounded-full ${METHOD_DOTS.full}`}
                  />
                  Full Fine-tune
                </span>
              </SelectItem>
            </SelectContent>
          </Select>
        </div>

        <div className="flex flex-col gap-2">
          <span className="text-xs font-medium text-muted-foreground">
            Hugging Face Token (Optional)
          </span>
          <InputGroup>
            <InputGroupAddon>
              <HugeiconsIcon icon={Key01Icon} className="size-4" />
            </InputGroupAddon>
            <InputGroupInput
              type="password"
              autoComplete="new-password"
              name="hf-token"
              placeholder="hf_..."
              value={hfToken}
              onChange={(e) => setHfToken(e.target.value)}
            />
          </InputGroup>
          {(tokenValidationError ?? hfSearchError) && (
            <p className="text-xs text-destructive">
              {tokenValidationError ?? hfSearchError}
              {" — "}
              <a
                href="https://huggingface.co/settings/tokens"
                target="_blank"
                rel="noopener noreferrer"
                className="underline"
              >
                Get or update token
              </a>
            </p>
          )}
          {isCheckingToken && (
            <p className="text-xs text-muted-foreground">Checking token…</p>
          )}
        </div>
        </div>
      </SectionCard>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/studio/sections/params-section.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { SectionCard } from "@/components/section-card";
import { Checkbox } from "@/components/ui/checkbox";
import {
  Collapsible,
  CollapsibleContent,
  CollapsibleTrigger,
} from "@/components/ui/collapsible";
import { Input } from "@/components/ui/input";
import {
  Combobox,
  ComboboxContent,
  ComboboxEmpty,
  ComboboxInput,
  ComboboxItem,
  ComboboxList,
} from "@/components/ui/combobox";
import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import { Slider } from "@/components/ui/slider";
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import {
  CONTEXT_LENGTHS,
  LR_SCHEDULER_OPTIONS,
  OPTIMIZER_OPTIONS,
  TARGET_MODULES,
} from "@/config/training";
import { useMaxStepsEpochsToggle, useTrainingConfigStore } from "@/features/training";
import type { GradientCheckpointing } from "@/types/training";
import {
  ArrowDown01Icon,
  InformationCircleIcon,
  Settings04Icon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { type ReactElement, type ReactNode, useEffect, useRef, useState } from "react";

function Row({
  label,
  tooltip,
  children,
}: { label: string; tooltip?: ReactNode; children: ReactNode }): ReactElement {
  return (
    <div className="flex items-center justify-between">
      <span className="flex items-center gap-1.5 text-xs font-medium text-muted-foreground">
        {label}
        {tooltip && (
          <Tooltip>
            <TooltipTrigger asChild={true}>
              <button
                type="button"
                className="text-foreground/70 hover:text-foreground"
              >
                <HugeiconsIcon
                  icon={InformationCircleIcon}
                  className="size-3"
                />
              </button>
            </TooltipTrigger>
            <TooltipContent>{tooltip}</TooltipContent>
          </Tooltip>
        )}
      </span>
      {children}
    </div>
  );
}

function SliderRow({
  label,
  tooltip,
  value,
  onChange,
  min,
  max,
  step,
  format,
}: {
  label: string;
  tooltip?: ReactNode;
  value: number;
  onChange: (v: number) => void;
  min: number;
  max: number;
  step: number;
  format?: (v: number) => string;
}): ReactElement {
  return (
    <Row label={label} tooltip={tooltip}>
      <div className="flex items-center gap-3">
        <Slider
          value={[value]}
          onValueChange={([v]) => onChange(v)}
          min={min}
          max={max}
          step={step}
          className="w-32"
        />
        <input
          type="number"
          value={format ? format(value) : value}
          onChange={(e) => onChange(Number(e.target.value))}
          min={min}
          max={max}
          step={step}
          className="w-12 text-right font-mono text-xs font-medium bg-muted/50 border border-border rounded-lg px-1.5 py-0.5 focus:outline-none focus:ring-1 focus:ring-primary/30 [&::-webkit-inner-spin-button]:appearance-none"
        />
      </div>
    </Row>
  );
}

export function ParamsSection(): ReactElement {
  const store = useTrainingConfigStore();
  const isLora = store.trainingMethod !== "full";
  const showVisionLora = store.isVisionModel && store.isDatasetImage === true;
  const [loraOpen, setLoraOpen] = useState(false);
  const [hyperOpen, setHyperOpen] = useState(false);
  const [ctxInput, setCtxInput] = useState(String(store.contextLength));
  const ctxAnchorRef = useRef<HTMLDivElement>(null);
  const ctxItems = CONTEXT_LENGTHS.map(String);

  // Keep input in sync when the store value changes externally
  // (e.g. model defaults being applied after model selection).
  useEffect(() => {
    setCtxInput(String(store.contextLength));
  }, [store.contextLength]);

  const trySetContextLength = (input: string): number | null => {
    const n = Number(input);
    if (Number.isInteger(n) && n > 0) {
      store.setContextLength(n);
      return n;
    }
    return null;
  };

  const { useEpochs, toggleUseEpochs } = useMaxStepsEpochsToggle({
    maxSteps: store.maxSteps,
    epochs: store.epochs,
    saveSteps: store.saveSteps,
    setMaxSteps: store.setMaxSteps,
    setEpochs: store.setEpochs,
    setSaveSteps: store.setSaveSteps,
  });

  const maxStepsSliderMax = Math.max(500, store.maxSteps, 30);
  const epochsSliderMax = Math.max(20, store.epochs, 1);

  return (
    <div data-tour="studio-params" className="col-span-1 xl:col-span-4">
      <SectionCard
        icon={<HugeiconsIcon icon={Settings04Icon} className="size-5" />}
        title="Parameters"
        description="Configure training hyperparameters"
        accent="orange"
        className="md:min-h-[470px]"
      >
        <div className="flex flex-col gap-4">
          {/* Max Steps / Epochs */}
          <div className="flex flex-col gap-2">
            <div
              key={useEpochs ? "epochs" : "steps"}
              className="flex flex-col gap-2 animate-in fade-in-0 slide-in-from-bottom-1 duration-200"
            >
              <div className="flex items-center justify-between">
                <span className="flex items-center gap-1.5 text-xs font-medium text-muted-foreground">
                  {useEpochs ? "Epochs" : "Max Steps"}
                  <Tooltip>
                    <TooltipTrigger asChild={true}>
                      <button
                        type="button"
                        className="text-foreground/70 hover:text-foreground"
                      >
                        <HugeiconsIcon
                          icon={InformationCircleIcon}
                          className="size-3"
                        />
                      </button>
                    </TooltipTrigger>
                    <TooltipContent>
                      {useEpochs
                        ? "Number of full passes over the dataset."
                        : "Override total optimizer steps."}{" "}
                      <a
                        href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                        target="_blank"
                        rel="noopener noreferrer"
                        className="text-primary underline"
                      >
                        Read more
                      </a>
                    </TooltipContent>
                  </Tooltip>
                </span>
                <div className="flex items-center gap-3">
                  <button
                    type="button"
                    onClick={toggleUseEpochs}
                    className="text-xs text-primary underline cursor-pointer"
                  >
                    {useEpochs ? "Use Max Steps" : "Use Epochs"}
                  </button>
                  <input
                    type="number"
                    value={useEpochs ? store.epochs : store.maxSteps}
                    onChange={(e) => {
                      const raw = e.target.value;
                      if (raw === "") return;

                      const value = Number(raw);
                      if (!Number.isFinite(value) || value < 1) return;

                      if (useEpochs) {
                        store.setEpochs(value);
                      } else {
                        store.setMaxSteps(value);
                      }
                    }}
                    min={1}
                    max={useEpochs ? epochsSliderMax : maxStepsSliderMax}
                    step={1}
                    className="w-16 text-right font-mono text-xs font-medium bg-muted/50 border border-border rounded-lg px-1.5 py-0.5 focus:outline-none focus:ring-1 focus:ring-primary/30 [&::-webkit-inner-spin-button]:appearance-none"
                  />
                </div>
              </div>
              <Slider
                value={[
                  useEpochs
                    ? Math.min(epochsSliderMax, Math.max(1, store.epochs))
                    : Math.min(maxStepsSliderMax, Math.max(1, store.maxSteps)),
                ]}
                onValueChange={([v]) =>
                  useEpochs ? store.setEpochs(v) : store.setMaxSteps(v)
                }
                min={1}
                max={useEpochs ? epochsSliderMax : maxStepsSliderMax}
                step={1}
              />
              <p className="text-[10px] text-muted-foreground">
                {useEpochs
                  ? "Each epoch is one full pass over your dataset."
                  : "Limits training to a fixed number of optimizer steps."}
              </p>
            </div>
          </div>

          {/* Context length */}
          <div className="flex flex-col gap-2">
            <span className="flex items-center gap-1.5 text-xs font-medium text-muted-foreground">
              Context Length
              <Tooltip>
                <TooltipTrigger asChild={true}>
                  <button
                    type="button"
                    className="text-foreground/70 hover:text-foreground"
                  >
                    <HugeiconsIcon
                      icon={InformationCircleIcon}
                      className="size-3"
                    />
                  </button>
                </TooltipTrigger>
                <TooltipContent>
                  Maximum number of tokens per training sample.{" "}
                  <a
                    href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                    target="_blank"
                    rel="noopener noreferrer"
                    className="text-primary underline"
                  >
                    Read more
                  </a>
                </TooltipContent>
              </Tooltip>
            </span>
            <div ref={ctxAnchorRef}>
              <Combobox
                items={ctxItems}
                filteredItems={ctxItems}
                filter={null}
                value={String(store.contextLength)}
                onValueChange={(v) => {
                  if (v && trySetContextLength(v)) {
                    setCtxInput(v);
                  }
                }}
                onInputValueChange={setCtxInput}
                itemToStringValue={(id) => Number(id).toLocaleString()}
                autoHighlight={false}
              >
                <ComboboxInput
                  placeholder={String(store.contextLength)}
                  className="w-full font-mono"
                  onBlur={() => {
                    trySetContextLength(ctxInput);
                    setCtxInput(String(store.contextLength));
                  }}
                  onKeyDown={(e) => {
                    if (e.key !== "Enter") { return; }
                    const n = trySetContextLength(ctxInput);
                    if (n === null) { return; }
                    if (!ctxItems.includes(ctxInput.trim())) {
                      e.stopPropagation();
                      e.preventDefault();
                    }
                    setCtxInput(String(n));
                  }}
                />
                <ComboboxContent anchor={ctxAnchorRef}>
                  <ComboboxEmpty>Enter a custom value</ComboboxEmpty>
                  <ComboboxList className="p-1">
                    {(id: string) => (
                      <ComboboxItem key={id} value={id} className="font-mono">
                        {Number(id).toLocaleString()}
                      </ComboboxItem>
                    )}
                  </ComboboxList>
                </ComboboxContent>
              </Combobox>
            </div>
            <p className="text-[10px] text-muted-foreground">
              Max sequence length for training samples
            </p>
          </div>

          {/* Learning Rate */}
          <div className="flex flex-col gap-2">
            <span className="flex items-center gap-1.5 text-xs font-medium text-muted-foreground">
              Learning Rate
              <Tooltip>
                <TooltipTrigger asChild={true}>
                  <button
                    type="button"
                    className="text-foreground/70 hover:text-foreground"
                  >
                    <HugeiconsIcon
                      icon={InformationCircleIcon}
                      className="size-3"
                    />
                  </button>
                </TooltipTrigger>
                <TooltipContent>
                  Step size for weight updates. Lower values train slower but more
                  stably.{" "}
                  <a
                    href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                    target="_blank"
                    rel="noopener noreferrer"
                    className="text-primary underline"
                  >
                    Read more
                  </a>
                </TooltipContent>
              </Tooltip>
            </span>
            <Input
              type="number"
              step="0.00001"
              value={store.learningRate}
              onChange={(e) => store.setLearningRate(Number(e.target.value))}
              className="w-full font-mono"
            />
            <p className="text-[10px] text-muted-foreground">
              Recommended: 2e-4 for LoRA, 2e-5 for full fine-tune
            </p>
          </div>

          {/* LoRA Settings */}
          {isLora && (
            <div>
              <button
                type="button"
                onClick={() => setLoraOpen(!loraOpen)}
                className="flex w-full cursor-pointer items-center gap-1.5 text-xs text-muted-foreground"
              >
                <HugeiconsIcon
                  icon={ArrowDown01Icon}
                  className={`size-3.5 transition-transform ${loraOpen ? "rotate-180" : ""}`}
                />
                LoRA Settings
              </button>
              <div
                className={`${loraOpen ? "" : "hidden"} pt-1.5 mt-4 flex flex-col gap-4`}
              >
                <SliderRow
                  label="Rank"
                  tooltip={
                    <>
                      Dimension of the low-rank matrices. Higher = more capacity.{" "}
                      <a
                        href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                        target="_blank"
                        rel="noopener noreferrer"
                        className="text-primary underline"
                      >
                        Read more
                      </a>
                    </>
                  }
                  value={store.loraRank}
                  onChange={store.setLoraRank}
                  min={4}
                  max={128}
                  step={4}
                />
                <SliderRow
                  label="Alpha"
                  tooltip={
                    <>
                      Scaling factor for LoRA updates. Usually 2x rank.{" "}
                      <a
                        href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                        target="_blank"
                        rel="noopener noreferrer"
                        className="text-primary underline"
                      >
                        Read more
                      </a>
                    </>
                  }
                  value={store.loraAlpha}
                  onChange={store.setLoraAlpha}
                  min={4}
                  max={256}
                  step={4}
                />
                <SliderRow
                  label="Dropout"
                  tooltip={
                    <>
                      Dropout probability for LoRA layers to reduce overfitting.{" "}
                      <a
                        href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                        target="_blank"
                        rel="noopener noreferrer"
                        className="text-primary underline"
                      >
                        Read more
                      </a>
                    </>
                  }
                  value={store.loraDropout}
                  onChange={store.setLoraDropout}
                  min={0}
                  max={0.5}
                  step={0.01}
                  format={(v) => v.toFixed(2)}
                />

                {/* Vision checkboxes */}
                {showVisionLora && (
                  <div className="flex flex-col gap-2 pt-1">
                    {(
                      [
                        [
                          "finetuneVisionLayers",
                          "Vision layers",
                          store.finetuneVisionLayers,
                          store.setFinetuneVisionLayers,
                        ],
                        [
                          "finetuneLanguageLayers",
                          "Language layers",
                          store.finetuneLanguageLayers,
                          store.setFinetuneLanguageLayers,
                        ],
                        [
                          "finetuneAttentionModules",
                          "Attention modules",
                          store.finetuneAttentionModules,
                          store.setFinetuneAttentionModules,
                        ],
                        [
                          "finetuneMLPModules",
                          "MLP modules",
                          store.finetuneMLPModules,
                          store.setFinetuneMLPModules,
                        ],
                      ] as const
                    ).map(([key, label, value, setter]) => (
                      <div key={key} className="flex items-center gap-2">
                        <Checkbox
                          id={key}
                          checked={value as boolean}
                          onCheckedChange={(v) =>
                            (setter as (v: boolean) => void)(!!v)
                          }
                        />
                        <label
                          htmlFor={key}
                          className="text-xs cursor-pointer text-muted-foreground"
                        >
                          {label}
                        </label>
                      </div>
                    ))}
                  </div>
                )}

                {/* Text target modules */}
                {!showVisionLora && (
                  <div className="flex flex-col gap-2 pt-1">
                    <span className="text-xs font-medium text-muted-foreground">
                      Target Modules
                    </span>
                    <div className="flex flex-wrap gap-1.5">
                      {TARGET_MODULES.map((mod) => {
                        const active = store.targetModules.includes(mod);
                        return (
                          <button
                            key={mod}
                            type="button"
                            onClick={() => {
                              store.setTargetModules(
                                active
                                  ? store.targetModules.filter((m) => m !== mod)
                                  : [...store.targetModules, mod],
                              );
                            }}
                            className={`cursor-pointer rounded-full border px-2.5 py-0.5 text-[11px] font-mono transition-colors ${active
                                ? "border-orange-300 bg-orange-50 text-orange-700 dark:border-orange-700 dark:bg-orange-950 dark:text-orange-300"
                                : "text-muted-foreground hover:bg-muted/50"
                              }`}
                          >
                            {mod}
                          </button>
                        );
                      })}
                    </div>
                  </div>
                )}

                {/* LoRA variant */}
                <div className="flex gap-2">
                  {(
                    [
                      {
                        value: "lora",
                        label: "Enable LoRA",
                        desc: "Train with LoRA",
                      },
                      { value: "rslora", label: "RS-LoRA", desc: "Stable Rank" },
                      {
                        value: "loftq",
                        label: "LoftQ",
                        desc: "Memory Efficient",
                      },
                    ] as const
                  ).map((opt) => (
                    <button
                      key={opt.value}
                      type="button"
                      onClick={() => store.setLoraVariant(opt.value)}
                      className={`flex-1 corner-squircle rounded-xl border px-3 py-2 text-left transition-colors cursor-pointer ${store.loraVariant === opt.value
                          ? "border-primary/50 bg-primary/5 ring-1 ring-primary/20"
                          : "border-border hover:border-foreground/20"
                        }`}
                    >
                      <p className="text-xs font-medium">{opt.label}</p>
                      <p className="text-[10px] text-muted-foreground">
                        {opt.desc}
                      </p>
                    </button>
                  ))}
                </div>
              </div>
            </div>
          )}

          {/* Training Hyperparams */}
          <Collapsible open={hyperOpen} onOpenChange={setHyperOpen}>
            <CollapsibleTrigger className="flex w-full cursor-pointer items-center gap-1.5 text-xs text-muted-foreground">
              <HugeiconsIcon
                icon={ArrowDown01Icon}
                className={`size-3.5 transition-transform ${hyperOpen ? "rotate-180" : ""}`}
              />
              Training Hyperparameters
            </CollapsibleTrigger>
            <CollapsibleContent className="mt-3 data-[state=open]:overflow-visible">
              <Tabs defaultValue="optimization" className="w-full">
                <TabsList className="w-full">
                  <TabsTrigger
                    value="optimization"
                    className="flex-1 !corner-squircle text-xs cursor-pointer"
                  >
                    Optimization
                  </TabsTrigger>
                  <TabsTrigger
                    value="schedule"
                    className="flex-1 text-xs cursor-pointer"
                  >
                    Schedule
                  </TabsTrigger>
                  <TabsTrigger
                    value="memory"
                    className="flex-1 text-xs cursor-pointer"
                  >
                    Memory
                  </TabsTrigger>
                </TabsList>

                <TabsContent
                  value="optimization"
                  className="mt-3 flex flex-col gap-3"
                >
                  <Row
                    label="Optimizer"
                    tooltip={
                      <>
                        Optimization algorithm. 8-bit variants reduce memory usage.
                        Fused is recommended for vision models.{" "}
                        <a
                          href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                          target="_blank"
                          rel="noopener noreferrer"
                          className="text-primary underline"
                        >
                          Read more
                        </a>
                      </>
                    }
                  >
                    <Select
                      value={store.optimizerType}
                      onValueChange={(v) => store.setOptimizerType(v)}
                    >
                      <SelectTrigger className="w-48">
                        <SelectValue />
                      </SelectTrigger>
                      <SelectContent>
                        {OPTIMIZER_OPTIONS.map((opt) => (
                          <SelectItem
                            key={opt.value}
                            value={opt.value}
                          >
                            {opt.label}
                          </SelectItem>
                        ))}
                      </SelectContent>
                    </Select>
                  </Row>
                  <Row
                    label="LR scheduler"
                    tooltip={
                      <>
                        How the learning rate changes over training. Linear decays
                        steadily; cosine decays in a curve.{" "}
                        <a
                          href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                          target="_blank"
                          rel="noopener noreferrer"
                          className="text-primary underline"
                        >
                          Read more
                        </a>
                      </>
                    }
                  >
                    <Select
                      value={store.lrSchedulerType}
                      onValueChange={(v) => store.setLrSchedulerType(v)}
                    >
                      <SelectTrigger className="w-48">
                        <SelectValue />
                      </SelectTrigger>
                      <SelectContent>
                        {LR_SCHEDULER_OPTIONS.map((opt) => (
                          <SelectItem
                            key={opt.value}
                            value={opt.value}
                          >
                            {opt.label}
                          </SelectItem>
                        ))}
                      </SelectContent>
                    </Select>
                  </Row>
                  <SliderRow
                    label="Batch Size"
                    tooltip={
                      <>
                        Samples processed per step. Higher uses more VRAM.{" "}
                        <a
                          href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                          target="_blank"
                          rel="noopener noreferrer"
                          className="text-primary underline"
                        >
                          Read more
                        </a>
                      </>
                    }
                    value={store.batchSize}
                    onChange={store.setBatchSize}
                    min={1}
                    max={32}
                    step={1}
                  />
                  <SliderRow
                    label="Grad Accum"
                    tooltip={
                      <>
                        Simulates larger batch sizes without extra VRAM.{" "}
                        <a
                          href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                          target="_blank"
                          rel="noopener noreferrer"
                          className="text-primary underline"
                        >
                          Read more
                        </a>
                      </>
                    }
                    value={store.gradientAccumulation}
                    onChange={store.setGradientAccumulation}
                    min={1}
                    max={64}
                    step={1}
                  />
                  <Row
                    label="Weight Decay"
                    tooltip={
                      <>
                        L2 regularization to prevent overfitting.{" "}
                        <a
                          href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                          target="_blank"
                          rel="noopener noreferrer"
                          className="text-primary underline"
                        >
                          Read more
                        </a>
                      </>
                    }
                  >
                    <Input
                      type="number"
                      step="0.001"
                      value={store.weightDecay}
                      onChange={(e) =>
                        store.setWeightDecay(Number(e.target.value))
                      }
                      className="w-28 font-mono"
                    />
                  </Row>
                </TabsContent>

                <TabsContent
                  value="schedule"
                  className="mt-3 flex flex-col gap-3"
                >
                  <SliderRow
                    label="Warmup Steps"
                    tooltip={
                      <>
                        Gradually increase LR at training start for stability.{" "}
                        <a
                          href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                          target="_blank"
                          rel="noopener noreferrer"
                          className="text-primary underline"
                        >
                          Read more
                        </a>
                      </>
                    }
                    value={store.warmupSteps}
                    onChange={store.setWarmupSteps}
                    min={0}
                    max={100}
                    step={1}
                  />
                  {!useEpochs && (
                    <SliderRow
                      label="Epochs"
                      tooltip={
                        <>
                          Number of full passes over the dataset. Set 0 to run by
                          max steps.{" "}
                          <a
                            href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                            target="_blank"
                            rel="noopener noreferrer"
                            className="text-primary underline"
                          >
                            Read more
                          </a>
                        </>
                      }
                      value={store.epochs}
                      onChange={store.setEpochs}
                      min={0}
                      max={epochsSliderMax}
                      step={1}
                    />
                  )}
                  <Row
                    label="Save Steps"
                    tooltip={
                      <>
                        Save a checkpoint every N steps. 0 to disable.{" "}
                        <a
                          href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                          target="_blank"
                          rel="noopener noreferrer"
                          className="text-primary underline"
                        >
                          Read more
                        </a>
                      </>
                    }
                  >
                    <Input
                      type="number"
                      value={store.saveSteps}
                      onChange={(e) => store.setSaveSteps(Number(e.target.value))}
                      className="w-28 font-mono"
                    />
                  </Row>
                  <Row
                    label="Eval Steps"
                    tooltip="Fraction of total training steps between evaluations (0-1). Set to 0 to disable evaluation. E.g. 0.01 = evaluate every 1% of steps."
                  >
                    <Input
                      type="number"
                      step="0.01"
                      min="0.0"
                      max="1.0"
                      value={store.evalSteps}
                      onChange={(e) => store.setEvalSteps(Number(e.target.value))}
                      className="w-28 font-mono"
                    />
                  </Row>
                  <Row label="Seed" tooltip="Random seed for reproducibility.">
                    <Input
                      type="number"
                      value={store.randomSeed}
                      onChange={(e) =>
                        store.setRandomSeed(Number(e.target.value))
                      }
                      className="w-28 font-mono"
                    />
                  </Row>
                </TabsContent>

                <TabsContent value="memory" className="mt-3 flex flex-col gap-3">
                  <Row
                    label="Grad Checkpoint"
                    tooltip={
                      <>
                        Trade compute for memory by recomputing activations.{" "}
                        <a
                          href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/lora-hyperparameters-guide"
                          target="_blank"
                          rel="noopener noreferrer"
                          className="text-primary underline"
                        >
                          Read more
                        </a>
                      </>
                    }
                  >
                    <Select
                      value={store.gradientCheckpointing}
                      onValueChange={(v) =>
                        store.setGradientCheckpointing(v as GradientCheckpointing)
                      }
                    >
                      <SelectTrigger className="w-32">
                        <SelectValue />
                      </SelectTrigger>
                      <SelectContent>
                        <SelectItem value="none">None</SelectItem>
                        <SelectItem value="true">Standard</SelectItem>
                        <SelectItem value="unsloth">Unsloth</SelectItem>
                      </SelectContent>
                    </Select>
                  </Row>
                  {!showVisionLora && !store.isEmbeddingModel && (
                    <div className="flex items-center gap-2">
                      <Checkbox
                        id="packing"
                        checked={store.packing}
                        onCheckedChange={(v) => store.setPacking(!!v)}
                      />
                      <label
                        htmlFor="packing"
                        className="text-xs cursor-pointer text-muted-foreground"
                      >
                        Enable packing
                      </label>
                    </div>
                  )}
                  {!store.isEmbeddingModel && (
                    <div className="flex items-center gap-2">
                      <Checkbox
                        id="trainOnCompletions"
                        checked={store.trainOnCompletions}
                        onCheckedChange={(v) => store.setTrainOnCompletions(!!v)}
                      />
                      <label
                        htmlFor="trainOnCompletions"
                        className="text-xs cursor-pointer text-muted-foreground"
                      >
                        Assistant completions only
                      </label>
                    </div>
                  )}
                </TabsContent>
              </Tabs>
            </CollapsibleContent>
          </Collapsible>
        </div>
      </SectionCard>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/studio/sections/progress-section-lib.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { TrainingPhase } from "@/features/training";

export const phaseLabel: Record<TrainingPhase, string> = {
  idle: "Idle",
  downloading_model: "Downloading model",
  downloading_dataset: "Downloading dataset",
  loading_model: "Loading model",
  loading_dataset: "Loading dataset",
  configuring: "Configuring",
  training: "Training",
  completed: "Completed",
  error: "Error",
  stopped: "Stopped",
};

export const phaseColors: Record<TrainingPhase, string> = {
  idle: "bg-muted text-muted-foreground",
  downloading_model:
    "bg-sky-100 text-sky-700 dark:bg-sky-900 dark:text-sky-300",
  downloading_dataset:
    "bg-sky-100 text-sky-700 dark:bg-sky-900 dark:text-sky-300",
  loading_model:
    "bg-amber-100 text-amber-700 dark:bg-amber-900 dark:text-amber-300",
  loading_dataset:
    "bg-amber-100 text-amber-700 dark:bg-amber-900 dark:text-amber-300",
  configuring: "bg-blue-100 text-blue-700 dark:bg-blue-900 dark:text-blue-300",
  training:
    "bg-emerald-100 text-emerald-700 dark:bg-emerald-900 dark:text-emerald-300",
  completed:
    "bg-emerald-100 text-emerald-700 dark:bg-emerald-900 dark:text-emerald-300",
  error: "bg-red-100 text-red-700 dark:bg-red-900 dark:text-red-300",
  stopped: "bg-muted text-muted-foreground",
};

export function formatDuration(seconds: number | null): string {
  if (seconds == null || seconds < 0) return "--";
  const total = Math.floor(seconds);
  const min = Math.floor(total / 60);
  const sec = total % 60;
  return `${min}m ${sec}s`;
}

export function formatNumber(value: number | null | undefined, digits: number): string {
  if (value == null || !Number.isFinite(value)) return "--";
  return value.toFixed(digits);
}


================================================
FILE: studio/frontend/src/features/studio/sections/progress-section.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { SectionCard } from "@/components/section-card";
import {
  AlertDialog,
  AlertDialogAction,
  AlertDialogCancel,
  AlertDialogContent,
  AlertDialogDescription,
  AlertDialogFooter,
  AlertDialogHeader,
  AlertDialogTitle,
} from "@/components/ui/alert-dialog";
import { Button } from "@/components/ui/button";
import {
  Popover,
  PopoverContent,
  PopoverTrigger,
} from "@/components/ui/popover";
import { Progress } from "@/components/ui/progress";
import { OPTIMIZER_OPTIONS } from "@/config/training";
import { setTrainingCompareHandoff } from "@/features/chat";
import {
  useTrainingActions,
  useTrainingConfigStore,
  useTrainingRuntimeStore,
} from "@/features/training";
import { useGpuUtilization } from "@/hooks";
import { cn } from "@/lib/utils";
import {
  ChartAverageIcon,
  DashboardSpeed01Icon,
  Notebook01Icon,
  RamMemoryIcon,
  StopIcon,
  TemperatureIcon,
  ZapIcon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { Link, useNavigate } from "@tanstack/react-router";
import { type ReactElement, type ReactNode, useEffect, useState } from "react";
import { useShallow } from "zustand/react/shallow";
import { ChartSettingsSheet } from "./charts/chart-settings-sheet";
import {
  formatDuration,
  formatNumber,
  phaseColors,
  phaseLabel,
} from "./progress-section-lib";

type ConfigGroup = {
  section: string;
  rows: [string, string | number | null | undefined][];
};

function configRow(
  label: string,
  value: string | number | null | undefined,
): [string, string | number | null | undefined] {
  return [label, value];
}

export function ProgressSection(): ReactElement {
  const navigate = useNavigate();
  const runtime = useTrainingRuntimeStore(
    useShallow((state) => ({
      phase: state.phase,
      message: state.message,
      error: state.error,
      currentStep: state.currentStep,
      totalSteps: state.totalSteps,
      currentEpoch: state.currentEpoch,
      currentLoss: state.currentLoss,
      currentLearningRate: state.currentLearningRate,
      currentGradNorm: state.currentGradNorm,
      progressPercent: state.progressPercent,
      elapsedSeconds: state.elapsedSeconds,
      etaSeconds: state.etaSeconds,
      currentNumTokens: state.currentNumTokens,
      isTrainingRunning: state.isTrainingRunning,
      lossHistory: state.lossHistory,
      lrHistory: state.lrHistory,
      gradNormHistory: state.gradNormHistory,
    })),
  );

  const config = useTrainingConfigStore(
    useShallow((state) => ({
      selectedModel: state.selectedModel,
      trainingMethod: state.trainingMethod,
      epochs: state.epochs,
      batchSize: state.batchSize,
      learningRate: state.learningRate,
      maxSteps: state.maxSteps,
      contextLength: state.contextLength,
      warmupSteps: state.warmupSteps,
      optimizerType: state.optimizerType,
      loraRank: state.loraRank,
      loraAlpha: state.loraAlpha,
      loraDropout: state.loraDropout,
      loraVariant: state.loraVariant,
    })),
  );

  const { stopTrainingRun } = useTrainingActions();
  const gpu = useGpuUtilization(runtime.isTrainingRunning);
  const [stopDialogOpen, setStopDialogOpen] = useState(false);
  const [stopRequested, setStopRequested] = useState(false);

  useEffect(() => {
    if (!runtime.isTrainingRunning) {
      setStopRequested(false);
    }
  }, [runtime.isTrainingRunning]);

  const pct =
    runtime.totalSteps > 0
      ? Math.min(
          100,
          Math.max(
            0,
            Math.round((runtime.currentStep / runtime.totalSteps) * 100),
          ),
        )
      : Math.round(runtime.progressPercent);

  const elapsed = runtime.elapsedSeconds;
  const derivedEta =
    elapsed != null && pct > 0
      ? Math.round((elapsed * (100 - pct)) / Math.max(pct, 1))
      : null;
  const eta = runtime.etaSeconds ?? derivedEta;

  const stepsPerSecond =
    elapsed != null && elapsed > 0 ? runtime.currentStep / elapsed : null;
  const showHalfwayHint =
    runtime.phase === "training" && pct >= 50 && pct < 100;
  const showCompletedHint = runtime.phase === "completed";
  const handleCompareInChat = async () => {
    setTrainingCompareHandoff(config.selectedModel);
    await navigate({ to: "/chat" });
  };
  const requestStop = async (saveCheckpoint: boolean) => {
    setStopRequested(true);
    setStopDialogOpen(false);
    useTrainingRuntimeStore.getState().setStopRequested(true);
    try {
      const ok = await stopTrainingRun(saveCheckpoint);
      if (!ok) {
        setStopRequested(false);
      }
    } catch {
      setStopRequested(false);
    }
  };

  const stoppedLoss = getDisplayMetric(
    runtime.isTrainingRunning,
    runtime.currentLoss,
    runtime.lossHistory,
  );
  const stoppedLr = getDisplayMetric(
    runtime.isTrainingRunning,
    runtime.currentLearningRate,
    runtime.lrHistory,
  );
  const stoppedGradNorm = runtime.isTrainingRunning
    ? runtime.currentGradNorm
    : (lastNonZeroValue(runtime.gradNormHistory) ?? runtime.currentGradNorm);

  const optimizerLabel =
    OPTIMIZER_OPTIONS.find((o) => o.value === config.optimizerType)?.label ??
    config.optimizerType;

  const configItems: ConfigGroup[] = [
    {
      section: "Hyperparams",
      rows: [
        configRow("Epochs", config.epochs),
        configRow("Batch size", config.batchSize),
        configRow("Learning rate", config.learningRate),
        configRow("Optimizer", optimizerLabel),
        configRow("Max steps", config.maxSteps),
        configRow("Context length", config.contextLength),
        configRow("Warmup steps", config.warmupSteps),
      ],
    },
    ...(config.trainingMethod !== "full"
      ? [
          {
            section: "LoRA",
            rows: [
              configRow("Rank", config.loraRank),
              configRow("Alpha", config.loraAlpha),
              configRow("Dropout", config.loraDropout),
              configRow("Variant", config.loraVariant),
            ],
          },
        ]
      : []),
  ];

  return (
    <SectionCard
      icon={<HugeiconsIcon icon={ChartAverageIcon} className="size-5" />}
      title="Training Progress"
      description={runtime.message || "Live training metrics"}
      accent="emerald"
      className="shadow-border border border-border/60 bg-card/90 ring-0 backdrop-blur-sm"
      headerAction={
        <TrainingHeaderActions
          configItems={configItems}
          isTrainingRunning={runtime.isTrainingRunning}
          onOpenStopDialog={setStopDialogOpen}
          onRequestStop={requestStop}
          stopDialogOpen={stopDialogOpen}
          stopRequested={stopRequested}
        />
      }
    >
      <div className="grid grid-cols-1 gap-5 lg:grid-cols-[minmax(0,1.2fr)_minmax(18rem,0.8fr)]">
        <div className="flex flex-col gap-4">
          <div className="flex flex-wrap items-center gap-2">
            <span
              className={`rounded-full px-2.5 py-1 text-[10px] font-semibold ${phaseColors[runtime.phase]}`}
            >
              {phaseLabel[runtime.phase]}
            </span>
            <span className="text-[10px] tabular-nums text-muted-foreground">
              Epoch {runtime.currentEpoch.toFixed(2)}
            </span>
            <span className="rounded-full border border-border/60 px-2.5 py-1 text-[10px] font-medium tabular-nums text-muted-foreground">
              {pct}% complete
            </span>
          </div>

          <div className="flex flex-col gap-2">
            <div className="flex justify-between text-xs text-muted-foreground">
              <span>
                Step {runtime.currentStep} / {runtime.totalSteps || "--"}
              </span>
              <span>{pct}%</span>
            </div>
            <Progress value={pct} className="h-2 bg-foreground/[0.05]" />
          </div>

          <MilestoneCallout
            showCompletedHint={showCompletedHint}
            showHalfwayHint={showHalfwayHint}
            onCompareInChat={handleCompareInChat}
          />

          {runtime.error && (
            <p className="rounded-2xl border border-destructive/30 bg-destructive/5 px-3 py-2 text-xs text-red-500 leading-relaxed">
              {runtime.error}
            </p>
          )}

          <div className="grid gap-x-4 gap-y-3 pt-1 sm:grid-cols-2 xl:grid-cols-5">
            <MetricStat
              label="Loss"
              valueClassName="text-2xl font-bold tracking-tight"
            >
              {stoppedLoss.toFixed(4)}
            </MetricStat>
            <MetricStat label="LR">{stoppedLr.toExponential(2)}</MetricStat>
            <MetricStat label="Grad Norm">
              {formatNumber(stoppedGradNorm, 3)}
            </MetricStat>
            <MetricStat label="Model" valueClassName="truncate">
              {config.selectedModel ?? "--"}
            </MetricStat>
            <MetricStat label="Method">
              {config.trainingMethod === "qlora" ? "QLoRA" : config.trainingMethod === "lora" ? "LoRA" : "Full"}
            </MetricStat>
          </div>

          <div className="flex flex-wrap gap-x-4 gap-y-1 text-xs text-muted-foreground">
            <span>Elapsed: {formatDuration(elapsed)}</span>
            <span>ETA: {formatDuration(eta)}</span>
            <span>
              {stepsPerSecond == null
                ? "-- steps/s"
                : `${stepsPerSecond.toFixed(2)} steps/s`}
            </span>
            {runtime.currentNumTokens != null && (
              <span>Tokens: {runtime.currentNumTokens}</span>
            )}
          </div>
        </div>

        <div className="flex flex-col gap-3">
          <div className="flex items-center justify-between">
            <p className="text-xs font-medium text-muted-foreground">
              GPU Monitor
            </p>
            <span className="text-[11px] text-muted-foreground">Live</span>
          </div>
          <div className="grid grid-cols-2 gap-2.5">
            <GpuStat
              label="Utilization"
              icon={
                <HugeiconsIcon
                  icon={DashboardSpeed01Icon}
                  className="size-3.5"
                />
              }
              value={
                gpu.gpu_utilization_pct != null
                  ? `${gpu.gpu_utilization_pct}%`
                  : "--"
              }
              pct={gpu.gpu_utilization_pct ?? 0}
            />
            <GpuStat
              label="Temperature"
              icon={
                <HugeiconsIcon icon={TemperatureIcon} className="size-3.5" />
              }
              value={
                gpu.temperature_c != null ? `${gpu.temperature_c}°C` : "--"
              }
              pct={gpu.temperature_c ?? 0}
              max={100}
            />
            <GpuStat
              label="VRAM"
              icon={<HugeiconsIcon icon={RamMemoryIcon} className="size-3.5" />}
              value={
                gpu.vram_used_gb != null && gpu.vram_total_gb != null
                  ? `${gpu.vram_used_gb} / ${gpu.vram_total_gb} GB`
                  : "--"
              }
              pct={gpu.vram_utilization_pct ?? 0}
            />
            <GpuStat
              label="Power"
              icon={<HugeiconsIcon icon={ZapIcon} className="size-3.5" />}
              value={
                gpu.power_draw_w != null
                  ? gpu.power_limit_w != null
                    ? `${gpu.power_draw_w} / ${gpu.power_limit_w} W`
                    : `${gpu.power_draw_w} W`
                  : "--"
              }
              pct={gpu.power_utilization_pct ?? 0}
            />
          </div>
        </div>
      </div>
    </SectionCard>
  );
}

function TrainingHeaderActions({
  configItems,
  isTrainingRunning,
  onOpenStopDialog,
  onRequestStop,
  stopDialogOpen,
  stopRequested,
}: {
  configItems: ConfigGroup[];
  isTrainingRunning: boolean;
  onOpenStopDialog: (open: boolean) => void;
  onRequestStop: (saveCheckpoint: boolean) => Promise<void>;
  stopDialogOpen: boolean;
  stopRequested: boolean;
}): ReactElement {
  return (
    <div className="flex items-center gap-2">
      <Popover>
        <PopoverTrigger asChild={true}>
          <Button
            type="button"
            variant="ghost"
            size="icon-sm"
            className="rounded-full text-muted-foreground hover:bg-muted hover:text-foreground"
            aria-label="Open training config"
          >
            <HugeiconsIcon icon={Notebook01Icon} className="size-4" />
          </Button>
        </PopoverTrigger>
        <PopoverContent className="w-72" align="end">
          <div className="flex flex-col gap-3">
            <p className="text-xs font-semibold">Training Config</p>
            {configItems.map((group) => (
              <div key={group.section} className="flex flex-col gap-1">
                <p className="text-[10px] font-semibold uppercase tracking-wider text-muted-foreground">
                  {group.section}
                </p>
                {group.rows.map(([label, value]) => (
                  <div key={label} className="flex justify-between text-xs">
                    <span className="text-muted-foreground">{label}</span>
                    <span className="font-medium tabular-nums">
                      {String(value)}
                    </span>
                  </div>
                ))}
              </div>
            ))}
          </div>
        </PopoverContent>
      </Popover>
      <ChartSettingsSheet />
      <AlertDialog open={stopDialogOpen} onOpenChange={onOpenStopDialog}>
        <Button
          data-tour="studio-training-stop"
          variant="destructive"
          size="sm"
          className={cn(
            "h-8 rounded-full px-3.5 text-xs shadow-sm",
            stopRequested ? "cursor-not-allowed opacity-60" : "cursor-pointer",
          )}
          onClick={() => onOpenStopDialog(true)}
          disabled={!isTrainingRunning || stopRequested}
        >
          <HugeiconsIcon icon={StopIcon} className="size-3" />
          {stopRequested ? "Stopping…" : "Stop"}
        </Button>
        <AlertDialogContent overlayClassName="bg-background/40 supports-backdrop-filter:backdrop-blur-[1px]">
          <AlertDialogHeader>
            <AlertDialogTitle>Stop Training</AlertDialogTitle>
            <AlertDialogDescription>
              Choose how you want to stop the current training run.
            </AlertDialogDescription>
          </AlertDialogHeader>
          <AlertDialogFooter>
            <AlertDialogCancel>Continue Training</AlertDialogCancel>
            <AlertDialogAction
              variant="destructive"
              onClick={() => onRequestStop(false)}
            >
              Cancel Training
            </AlertDialogAction>
            <AlertDialogAction onClick={() => onRequestStop(true)}>
              Stop and Save
            </AlertDialogAction>
          </AlertDialogFooter>
        </AlertDialogContent>
      </AlertDialog>
    </div>
  );
}

function MilestoneCallout({
  showCompletedHint,
  showHalfwayHint,
  onCompareInChat,
}: {
  showCompletedHint: boolean;
  showHalfwayHint: boolean;
  onCompareInChat: () => Promise<void>;
}): ReactElement | null {
  if (!(showHalfwayHint || showCompletedHint)) {
    return null;
  }

  return (
    <div className="corner-squircle rounded-2xl border border-border/60 bg-muted/30 px-3 py-2.5">
      <div className="flex items-start justify-between gap-3">
        <div className="min-w-0">
          {!showCompletedHint && (
            <p className="text-[10px] font-medium uppercase tracking-[0.12em] text-muted-foreground">
              Milestone
            </p>
          )}
          <p
            className={cn(
              "text-xs text-foreground/85",
              !showCompletedHint && "mt-1",
            )}
          >
            {showCompletedHint
              ? "Training done. Next step: compare base vs fine-tuned outputs."
              : "Halfway done. Training is past 50%."}
          </p>
        </div>
        {!showCompletedHint && (
          <span className="rounded-full border border-border/60 bg-background/80 px-2 py-0.5 text-[10px] font-medium text-muted-foreground">
            50%+
          </span>
        )}
      </div>
      {showCompletedHint && (
        <div className="mt-2 flex flex-wrap gap-2">
          <Button size="xs" onClick={onCompareInChat}>
            Compare in Chat
          </Button>
          <Button asChild={true} size="xs" variant="outline">
            <Link to="/export">Export Model</Link>
          </Button>
        </div>
      )}
    </div>
  );
}

function MetricStat({
  label,
  children,
  valueClassName,
}: {
  label: string;
  children: ReactNode;
  valueClassName?: string;
}): ReactElement {
  return (
    <div className="min-w-0">
      <p className="text-[11px] text-muted-foreground">{label}</p>
      <p
        className={`mt-1 text-base font-semibold tabular-nums ${valueClassName ?? ""}`}
      >
        {children}
      </p>
    </div>
  );
}

function lastNonZeroValue(points: { value: number }[]): number | null {
  for (let i = points.length - 1; i >= 0; i -= 1) {
    const value = points[i]?.value;
    if (Number.isFinite(value) && value !== 0) {
      return value;
    }
  }
  return null;
}

function getDisplayMetric(
  isTrainingRunning: boolean,
  currentValue: number,
  history: { value: number }[],
): number {
  if (isTrainingRunning) {
    return currentValue;
  }
  return lastNonZeroValue(history) ?? currentValue;
}

function GpuStat({
  label,
  icon,
  value,
  pct,
  max,
}: {
  label: string;
  icon: ReactNode;
  value: string;
  pct: number;
  max?: number;
}): ReactElement {
  const clamped = Math.max(0, Math.min(pct, max ?? 100));
  let barColor = "bg-red-500";
  if (clamped < 60) {
    barColor = "bg-emerald-500";
  } else if (clamped < 95) {
    barColor = "bg-amber-500";
  }

  return (
    <div className="corner-squircle flex flex-col gap-2 rounded-2xl border border-border/50 bg-background/60 p-3">
      <div className="flex items-center justify-between text-xs">
        <span className="flex items-center gap-1.5 text-muted-foreground">
          {icon}
          {label}
        </span>
        <span className="font-medium tabular-nums">{value}</span>
      </div>
      <div className="h-2 w-full overflow-hidden rounded-full bg-muted/80">
        <div
          className={`h-full rounded-full ${barColor} transition-all duration-300`}
          style={{ width: `${clamped}%` }}
        />
      </div>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/studio/sections/training-section.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { SectionCard } from "@/components/section-card";
import { Button } from "@/components/ui/button";
import { ChartContainer } from "@/components/ui/chart";
import type { ChartConfig } from "@/components/ui/chart";
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import {
  parseYamlConfig,
  serializeConfigToYaml,
  useTrainingActions,
  useTrainingConfigStore,
  validateTrainingConfig,
} from "@/features/training";
import {
  Archive04Icon,
  ChartAverageIcon,
  CleanIcon,
  CloudUploadIcon,
  Rocket01Icon,
} from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { useRef } from "react";
import { toast } from "sonner";
import { CartesianGrid, Line, LineChart, XAxis, YAxis } from "recharts";

const chartConfig = {
  loss: { label: "Loss", color: "#3b82f6" },
} satisfies ChartConfig;

const placeholderData = [
  { step: 0, loss: 2.5 },
  { step: 10, loss: 2.1 },
  { step: 20, loss: 1.7 },
  { step: 30, loss: 1.3 },
  { step: 40, loss: 1.0 },
  { step: 50, loss: 0.8 },
];

export function TrainingSection() {
  const store = useTrainingConfigStore();
  const { isStarting, startError, startTrainingRun } = useTrainingActions();
  const isIncompatible =
    (!store.isVisionModel && store.isDatasetImage === true) ||
    (!store.isAudioModel && store.isDatasetAudio === true);
  const configValidation = validateTrainingConfig(store);
  const fileInputRef = useRef<HTMLInputElement>(null);

  const handleFileUpload = (e: React.ChangeEvent<HTMLInputElement>) => {
    const file = e.target.files?.[0];
    if (!file) return;
    e.target.value = "";

    const reader = new FileReader();
    reader.onload = () => {
      try {
        const config = parseYamlConfig(reader.result as string);
        store.applyConfigPatch(config);
        toast.success("Config loaded", { description: file.name });
      } catch (err) {
        toast.error("Failed to load config", {
          description:
            err instanceof Error ? err.message : "Invalid YAML file",
        });
      }
    };
    reader.onerror = () => {
      toast.error("Failed to read file");
    };
    reader.readAsText(file);
  };

  const handleSaveConfig = () => {
    const yamlStr = serializeConfigToYaml(store, store.isVisionModel);
    const blob = new Blob([yamlStr], { type: "text/yaml" });
    const url = URL.createObjectURL(blob);
    const a = document.createElement("a");
    a.href = url;

    const model = (store.selectedModel ?? "model").split("/").pop();
    const method = store.trainingMethod ?? "qlora";
    const dataset = (store.dataset ?? "dataset").split("/").pop();
    const timestamp = new Date().toISOString().replace(/[:T]/g, "-").slice(0, 19);
    a.download = `${model}_${method}_${dataset}_${timestamp}.yaml`;

    a.click();
    URL.revokeObjectURL(url);
  };

  const handleResetConfig = () => {
    store.resetToModelDefaults();
    toast.success("Parameters reset to model defaults");
  };

  return (
    <div data-tour="studio-training" className="col-span-1 xl:col-span-4">
      <SectionCard
        icon={<HugeiconsIcon icon={ChartAverageIcon} className="size-5" />}
        title="Training"
        description="Monitor and control training"
        accent="blue"
        className="md:min-h-[470px]"
      >
        <div className="flex flex-col gap-4">
        {/* Loss chart */}
        <div className="relative  ">
          <ChartContainer
            config={chartConfig}
            className="h-[180px] w-full relative right-8 blur"
          >
            <LineChart data={placeholderData} accessibilityLayer={true}>
              <CartesianGrid vertical={false} strokeDasharray="3 3" />
              <XAxis
                dataKey="step"
                tickLine={false}
                axisLine={false}
                tickMargin={8}
                fontSize={10}
              />
              <YAxis
                tickLine={false}
                axisLine={false}
                tickMargin={8}
                fontSize={10}
              />
              <Line
                type="monotone"
                dataKey="loss"
                stroke="var(--color-loss)"
                strokeWidth={2}
                dot={false}
              />
            </LineChart>
          </ChartContainer>
          <div className="absolute inset-0 flex flex-col items-center justify-center gap-1">
            <HugeiconsIcon
              icon={ChartAverageIcon}
              className="size-5 text-muted-foreground/50"
            />
            <p className="text-sm font-medium text-muted-foreground">
              No training data yet
            </p>
            <p className="text-xs text-muted-foreground/60">
              Start training to see loss progress
            </p>
          </div>
        </div>

        {/* Start/Stop */}
        <Button
          data-tour="studio-start"
          className="w-full cursor-pointer bg-gradient-to-r from-emerald-500 to-teal-500 text-white hover:from-emerald-600 hover:to-teal-600"
          onClick={() => void startTrainingRun()}
          disabled={isStarting || isIncompatible || store.isCheckingDataset || !configValidation.ok}
        >
          <HugeiconsIcon icon={Rocket01Icon} className="size-4" />
          {isStarting ? "Starting..." : store.isCheckingDataset ? "Checking dataset..." : "Start Training"}
        </Button>
        {startError && (
          <p className="text-xs text-red-500 leading-relaxed">{startError}</p>
        )}
        {isIncompatible && (
          <p className="text-xs text-red-500 leading-relaxed">
            Text model is not compatible with a multimodal dataset. Switch to a vision model or choose a text-only dataset.
          </p>
        )}
        {!configValidation.ok && configValidation.message && !isIncompatible && (
          <p className="text-xs text-red-500 leading-relaxed">{configValidation.message}</p>
        )}

        {/* Upload / Save / Reset */}
        <p className="text-xs text-muted-foreground">Training Config</p>
        <div className="grid grid-cols-3 gap-2">
          <Tooltip>
            <TooltipTrigger asChild>
              <Button
                variant="outline"
                size="sm"
                className="cursor-pointer"
                onClick={() => fileInputRef.current?.click()}
              >
                <HugeiconsIcon icon={CloudUploadIcon} className="size-3.5" />
                Upload
              </Button>
            </TooltipTrigger>
            <TooltipContent>Load a saved YAML config</TooltipContent>
          </Tooltip>
          <Tooltip>
            <TooltipTrigger asChild>
              <Button
                data-tour="studio-save"
                variant="outline"
                size="sm"
                className="cursor-pointer"
                onClick={handleSaveConfig}
              >
                <HugeiconsIcon icon={Archive04Icon} className="size-3.5" />
                Save
              </Button>
            </TooltipTrigger>
            <TooltipContent>Download current config as YAML</TooltipContent>
          </Tooltip>
          <Tooltip>
            <TooltipTrigger asChild>
              <Button
                variant="outline"
                size="sm"
                className="cursor-pointer"
                onClick={handleResetConfig}
                disabled={!store.selectedModel}
              >
                <HugeiconsIcon icon={CleanIcon} className="size-3.5" />
                Reset
              </Button>
            </TooltipTrigger>
            <TooltipContent>Reset to model defaults</TooltipContent>
          </Tooltip>
        </div>
        <input
          ref={fileInputRef}
          type="file"
          accept=".yaml,.yml"
          className="hidden"
          onChange={handleFileUpload}
        />
        </div>
      </SectionCard>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/studio/studio-page.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import {
  shouldShowTrainingView,
  useDatasetPreviewDialogStore,
  useTrainingActions,
  useTrainingConfigStore,
  useTrainingRuntimeLifecycle,
  useTrainingRuntimeStore,
} from "@/features/training";
import { GuidedTour, useGuidedTourController } from "@/features/tour";
import { studioTourSteps, studioTrainingTourSteps } from "./tour";
import { ArrowLeft01Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { type ReactElement, useEffect } from "react";
import { DatasetPreviewDialog } from "./sections/dataset-preview-dialog";
import { DatasetSection } from "./sections/dataset-section";
import { ModelSection } from "./sections/model-section";
import { ParamsSection } from "./sections/params-section";
import { TrainingSection } from "./sections/training-section";
import { TrainingView } from "./training-view";

const STUDIO_TOUR_KEY = "tour:studio:v1";

export function StudioPage(): ReactElement {
  useTrainingRuntimeLifecycle();
  const showTrainingView = useTrainingRuntimeStore(shouldShowTrainingView);
  const isTrainingRunning = useTrainingRuntimeStore((state) => state.isTrainingRunning);
  const runtimeMessage = useTrainingRuntimeStore((state) => state.message);
  const runtimePhase = useTrainingRuntimeStore((state) => state.phase);
  const isHydratingRuntime = useTrainingRuntimeStore((state) => state.isHydrating);
  const hasHydratedRuntime = useTrainingRuntimeStore((state) => state.hasHydrated);
  const { dismissTrainingRun } = useTrainingActions();

  const config = useTrainingConfigStore();
  const selectedModel = useTrainingConfigStore((s) => s.selectedModel);
  const ensureModelDefaultsLoaded = useTrainingConfigStore(
    (s) => s.ensureModelDefaultsLoaded,
  );
  const ensureDatasetChecked = useTrainingConfigStore(
    (s) => s.ensureDatasetChecked,
  );
  const dialogOpen = useDatasetPreviewDialogStore((s) => s.open);
  const dialogMode = useDatasetPreviewDialogStore((s) => s.mode);
  const dialogInitial = useDatasetPreviewDialogStore((s) => s.initialData);
  const closeDialog = useDatasetPreviewDialogStore((s) => s.close);

  const stopRequested = useTrainingRuntimeStore((state) => state.stopRequested);
  const canGoBack =
    showTrainingView &&
    !isHydratingRuntime &&
    (stopRequested ||
      (!isTrainingRunning &&
        (runtimePhase === "stopped" ||
          runtimePhase === "error" ||
          runtimePhase === "completed" ||
          runtimePhase === "idle")));
  const tourEnabled = hasHydratedRuntime && !isHydratingRuntime;
  const isConfigTour = !showTrainingView;
  const tourSteps = showTrainingView ? studioTrainingTourSteps : studioTourSteps;
  const tour = useGuidedTourController({
    id: "studio",
    steps: tourSteps,
    enabled: tourEnabled,
    autoKey: isConfigTour ? STUDIO_TOUR_KEY : undefined,
    autoWhen: isConfigTour,
  });

  const setTourOpen = tour.setOpen;
  useEffect(() => {
    setTourOpen(false);
  }, [showTrainingView, setTourOpen]);

  useEffect(() => {
    ensureModelDefaultsLoaded();
    ensureDatasetChecked();
  }, [selectedModel, ensureModelDefaultsLoaded, ensureDatasetChecked]);

  return (
    <div className="relative min-h-screen overflow-hidden bg-background">
      <main className="relative z-10 mx-auto max-w-7xl px-4 py-4 sm:px-6">
        <GuidedTour {...tour.tourProps} celebrate={isConfigTour} />

        <DatasetPreviewDialog
          open={dialogOpen}
          onOpenChange={(open) => {
            if (!open) closeDialog();
          }}
          datasetSource={config.datasetSource}
          datasetName={
            config.datasetSource === "huggingface" ? config.dataset : config.uploadedFile
          }
          hfToken={config.hfToken.trim() || null}
          datasetSubset={config.datasetSubset}
          datasetSplit={config.datasetSplit}
          mode={dialogMode}
          initialData={dialogInitial}
          isVlm={config.isVisionModel && config.isDatasetImage === true}
        />

        {canGoBack && (
          <Button
            variant="ghost"
            size="sm"
            className="mb-2 cursor-pointer gap-1.5 text-muted-foreground"
            onClick={() => void dismissTrainingRun()}
          >
            <HugeiconsIcon icon={ArrowLeft01Icon} className="size-4" />
            Back to configuration
          </Button>
        )}

        <div className="mb-6 flex flex-col gap-0.5 sm:mb-8">
          <h1 className="text-2xl font-semibold tracking-tight">
            Fine-tuning Studio
          </h1>
          <p className="text-sm text-muted-foreground">
            {showTrainingView
              ? runtimeMessage || "Training in progress"
              : "Configure and start training"}
          </p>
        </div>

        {!hasHydratedRuntime && isHydratingRuntime ? (
          <div className="rounded-xl border bg-card p-8 text-sm text-muted-foreground">
            Loading training runtime...
          </div>
        ) : showTrainingView ? (
          <TrainingView />
        ) : (
          <div className="grid grid-cols-1 items-start gap-4 md:grid-cols-2 md:gap-6 xl:grid-cols-12">
            <ModelSection />
            <DatasetSection />
            <ParamsSection />
            <TrainingSection />
          </div>
        )}
      </main>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/studio/tour/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export { studioTourSteps } from "./steps";
export { studioTrainingTourSteps } from "./training";


================================================
FILE: studio/frontend/src/features/studio/tour/steps/base-model.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { ReadMore, type TourStep } from "@/features/tour";

export const studioBaseModelStep: TourStep = {
  id: "base-model",
  target: "studio-base-model",
  title: "Hugging Face Model",
  body: (
    <>
      Paste <span className="font-mono">org/model</span> or search. Pick a base
      model close to your task (chat/instruct vs base). Smaller models iterate
      faster; scale up once prompts + data look good.{" "}
      <ReadMore href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/what-model-should-i-use" />
    </>
  ),
};


================================================
FILE: studio/frontend/src/features/studio/tour/steps/dataset.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { ReadMore, type TourStep } from "@/features/tour";

export const studioDatasetStep: TourStep = {
  id: "dataset",
  target: "studio-dataset",
  title: "Dataset",
  body: (
    <>
      Search Hub or paste <span className="font-mono">user/dataset</span>. Preview
      a few rows: formatting matters more than size. We’ll try to auto-convert
      your dataset into a supported training format. If we can’t infer it
      cleanly, we’ll prompt you to map the fields manually. If outputs look off
      in Chat later, dataset formatting/template is the first thing to check.{" "}
      <ReadMore href="https://unsloth.ai/docs/get-started/fine-tuning-llms-guide/datasets-guide" />
    </>
  ),
};


================================================
FILE: studio/frontend/src/features/studio/tour/steps/index.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { TourStep } from "@/features/tour";
import { studioBaseModelStep } from "./base-model";
import { studioDatasetStep } from "./dataset";
import { studioLocalModelStep } from "./local-model";
import { studioMethodStep } from "./method";
import { studioNavStep } from "./nav";
import { studioParamsStep } from "./params";
import { studioSaveStep } from "./save";
import { studioStartStep } from "./start";

export const studioTourSteps: TourStep[] = [
  studioNavStep,
  studioLocalModelStep,
  studioBaseModelStep,
  studioMethodStep,
  studioDatasetStep,
  studioParamsStep,
  studioStartStep,
  studioSaveStep,
];


================================================
FILE: studio/frontend/src/features/studio/tour/steps/local-model.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { ReadMore, type TourStep } from "@/features/tour";

export const studioLocalModelStep: TourStep = {
  id: "local-model",
  target: "studio-local-model",
  title: "Local model path",
  body: (
    <>
      Use this if you already downloaded weights locally (eg{" "}
      <span className="font-mono">./models/...</span>) to avoid re-downloading.
      Folder should look like a Hugging Face model (config + tokenizer + weights).{" "}
      <ReadMore href="https://unsloth.ai/docs/basics/fine-tuning-llms-guide" />
    </>
  ),
};


================================================
FILE: studio/frontend/src/features/studio/tour/steps/method.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { ReadMore, type TourStep } from "@/features/tour";

export const studioMethodStep: TourStep = {
  id: "method",
  target: "studio-method",
  title: "Method: QLoRA vs LoRA vs Full",
  body: (
    <>
      LoRA: trains small adapter weights (fast, common default). QLoRA: LoRA on
      4-bit base weights (much lower VRAM). Full: updates all weights (highest
      cost, usually needs more data to be worth it).{" "}
      <ReadMore href="https://unsloth.ai/docs/basics/lora-hyperparameters-guide" />
    </>
  ),
};


================================================
FILE: studio/frontend/src/features/studio/tour/steps/nav.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { ReadMore, type TourStep } from "@/features/tour";

export const studioNavStep: TourStep = {
  id: "nav",
  target: "navbar",
  title: "Quick orientation",
  body: (
    <>
      Studio: pick base model, dataset, hyperparams, then start training. After
      you start, you’ll see a Training view with live loss/metrics. Chat is for
      testing base vs LoRA adapters. Export packages checkpoints for deployment.{" "}
      <ReadMore href="https://unsloth.ai/docs/get-started/fine-tuning-for-beginners" />
    </>
  ),
};


================================================
FILE: studio/frontend/src/features/studio/tour/steps/params.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { ReadMore, type TourStep } from "@/features/tour";

export const studioParamsStep: TourStep = {
  id: "params",
  target: "studio-params",
  title: "Dial hyperparams",
  body: (
    <>
      Start boring, then iterate. We usually recommend starting with 1-3 epochs
      (higher can overfit fast). If you’re unsure, change 1 knob at a time, and
      watch train vs eval loss.{" "}
      <ReadMore href="https://unsloth.ai/docs/basics/lora-hyperparameters-guide" />
    </>
  ),
};


================================================
FILE: studio/frontend/src/features/studio/tour/steps/save.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { TourStep } from "@/features/tour";

export const studioSaveStep: TourStep = {
  id: "save",
  target: "studio-save",
  title: "Save config",
  body: (
    <>
      Save your training config as a YAML file. Re-running the same baseline
      makes it obvious if a change helped (or if you just got lucky).
    </>
  ),
};


================================================
FILE: studio/frontend/src/features/studio/tour/steps/start.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { TourStep } from "@/features/tour";

export const studioStartStep: TourStep = {
  id: "start",
  target: "studio-start",
  title: "Start training",
  body: (
    <>
      Kick off training. If it errors immediately, check HF token / local paths
      / dataset access first. Start with a small run to sanity-check loss + sample
      outputs before burning hours.
    </>
  ),
};


================================================
FILE: studio/frontend/src/features/studio/tour/training/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export { studioTrainingTourSteps } from "./steps";


================================================
FILE: studio/frontend/src/features/studio/tour/training/steps.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { TourStep } from "@/features/tour";

export const studioTrainingTourSteps: TourStep[] = [
  {
    id: "nav",
    target: "navbar",
    title: "Training view",
    body: (
      <>
        This view updates live as training runs. Watch loss, speed, and ETA, and
        use Stop if you need to bail out or save.
      </>
    ),
  },
  {
    id: "progress",
    target: "studio-training-progress",
    title: "Progress + ETA",
    body: (
      <>
        Phase shows what we’re doing (loading model/dataset, configuring,
        training). ETA is rough early on; it stabilizes after a few steps.
      </>
    ),
  },
  {
    id: "train-loss",
    target: "studio-training-loss",
    title: "Training loss",
    body: (
      <>
        Training loss should generally trend down. Absolute values vary by
        dataset + tokenizer, so use it for direction more than “a magic number”.
        If loss goes very low (eg below ~0.2), that can be a sign you’re
        overfitting. If loss plateaus high, you likely need better data
        formatting, more data, or different hyperparams.
      </>
    ),
  },
  {
    id: "eval-loss",
    target: "studio-eval-loss",
    title: "Eval loss (validation)",
    body: (
      <>
        Eval loss is your sanity check. If training loss keeps dropping but eval
        loss goes up, you’re likely overfitting. To track it, set an eval dataset
        and `eval_steps` (setting `eval_steps=1` can be very slow).
      </>
    ),
  },
  {
    id: "stop",
    target: "studio-training-stop",
    title: "Stop / save",
    body: (
      <>
        Stop training any time. “Stop and Save” keeps the checkpoint/adapters so
        you can export or compare later.
      </>
    ),
  },
];


================================================
FILE: studio/frontend/src/features/studio/training-start-overlay.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  AlertDialog,
  AlertDialogAction,
  AlertDialogCancel,
  AlertDialogContent,
  AlertDialogDescription,
  AlertDialogFooter,
  AlertDialogHeader,
  AlertDialogTitle,
} from "@/components/ui/alert-dialog";
import { Button } from "@/components/ui/button";
import {
  AnimatedSpan,
  Terminal,
  TypingAnimation,
} from "@/components/ui/terminal";
import { useTrainingActions, useTrainingRuntimeStore } from "@/features/training";
import { Cancel01Icon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { useEffect, useState, type ReactElement } from "react";

type TrainingStartOverlayProps = {
  message: string
  currentStep: number
}

export function TrainingStartOverlay({
  message,
  currentStep,
}: TrainingStartOverlayProps): ReactElement {
  const { stopTrainingRun, dismissTrainingRun } = useTrainingActions();
  const isStarting = useTrainingRuntimeStore((s) => s.isStarting);
  const [cancelDialogOpen, setCancelDialogOpen] = useState(false);
  const [cancelRequested, setCancelRequested] = useState(false);

  useEffect(() => {
    if (!isStarting) {
      setCancelRequested(false);
    }
  }, [isStarting]);

  return (
    <div className="pointer-events-none absolute inset-0 z-30 flex items-center justify-center rounded-2xl bg-background/45 backdrop-blur-[1px]">
      <div className="pointer-events-auto relative flex w-[860px] max-w-[calc(100%-2rem)] flex-col items-center gap-4">
        <img
          src="/unsloth-gem.png"
          alt="Unsloth mascot"
          className="size-24 object-contain"
        />
        <div className="relative w-full">
          <AlertDialog open={cancelDialogOpen} onOpenChange={setCancelDialogOpen}>
            <Button
              variant="ghost"
              size="icon"
              className="absolute right-3 top-3 z-10 size-7 cursor-pointer rounded-full text-muted-foreground/60 hover:bg-destructive/10 hover:text-destructive"
              onClick={() => setCancelDialogOpen(true)}
              disabled={cancelRequested}
            >
              <HugeiconsIcon icon={Cancel01Icon} className="size-3.5" />
            </Button>
            <AlertDialogContent overlayClassName="bg-background/40 supports-backdrop-filter:backdrop-blur-[1px]">
              <AlertDialogHeader>
                <AlertDialogTitle>Cancel Training</AlertDialogTitle>
                <AlertDialogDescription>
                  Do you want to cancel the current training run?
                </AlertDialogDescription>
              </AlertDialogHeader>
              <AlertDialogFooter>
                <AlertDialogCancel>Continue Training</AlertDialogCancel>
                <AlertDialogAction
                  variant="destructive"
                  onClick={() => {
                    setCancelRequested(true);
                    setCancelDialogOpen(false);
                    useTrainingRuntimeStore.getState().setStopRequested(true);
                    void stopTrainingRun(false).then((ok) => {
                      if (ok) {
                        void dismissTrainingRun();
                      } else {
                        setCancelRequested(false);
                      }
                    });
                  }}
                >
                  Cancel Training
                </AlertDialogAction>
              </AlertDialogFooter>
            </AlertDialogContent>
          </AlertDialog>
          <Terminal
            className="w-full min-h-[390px] rounded-2xl px-7 py-6 text-left"
            startOnView={false}
          >
          <TypingAnimation
            duration={36}
            className="bg-gradient-to-r from-emerald-300 via-lime-300 to-teal-300 bg-clip-text font-semibold text-transparent"
          >
            {"> unsloth training starts..."}
          </TypingAnimation>
          <AnimatedSpan className="my-2">
            <pre className="whitespace-pre text-muted-foreground inline-block">{`==((====))==\n   \\\\   /|\nO^O/ \\_/ \\\n\\        /\n "-____-"`}</pre>
          </AnimatedSpan>
          <TypingAnimation duration={44}>
            {"> Preparing model and dataset..."}
          </TypingAnimation>
          <TypingAnimation duration={44}>
            {"> We are getting everything ready for your run..."}
          </TypingAnimation>
          <AnimatedSpan className="mt-2 text-muted-foreground">
            {`> ${message || "starting training..."} | waiting for first step... (${currentStep})`}
          </AnimatedSpan>
          </Terminal>
        </div>
      </div>
    </div>
  )
}


================================================
FILE: studio/frontend/src/features/studio/training-view.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { cn } from "@/lib/utils";
import { useTrainingRuntimeStore } from "@/features/training";
import type { ReactElement } from "react";
import { useShallow } from "zustand/react/shallow";
import { ChartsSection } from "./sections/charts-section";
import { ProgressSection } from "./sections/progress-section";
import { TrainingStartOverlay } from "./training-start-overlay";

export function TrainingView(): ReactElement {
  const runtime = useTrainingRuntimeStore(
    useShallow((state) => ({
      phase: state.phase,
      message: state.message,
      currentStep: state.currentStep,
      firstStepReceived: state.firstStepReceived,
      isStarting: state.isStarting,
    })),
  );

  const isPreparingPhase =
    runtime.phase === "downloading_model" ||
    runtime.phase === "downloading_dataset" ||
    runtime.phase === "loading_model" ||
    runtime.phase === "loading_dataset" ||
    runtime.phase === "configuring";
  const isWaitingForFirstStep =
    runtime.phase === "training" && !runtime.firstStepReceived;
  const showOverlay =
    runtime.isStarting ||
    isPreparingPhase ||
    (isWaitingForFirstStep && runtime.currentStep <= 0);

  return (
    <div className={cn("relative", showOverlay && "min-h-[72vh]")}>
      <div
        className={cn(
          "relative z-10 flex flex-col gap-6 transition-[filter]",
          showOverlay && "blur",
        )}
      >
        <div data-tour="studio-training-progress">
          <ProgressSection />
        </div>
        <ChartsSection />
      </div>
      {showOverlay ? (
        <TrainingStartOverlay
          message={runtime.message}
          currentStep={runtime.currentStep}
        />
      ) : null}
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/tour/components/guided-tour.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { Button } from "@/components/ui/button";
import { cn } from "@/lib/utils";
import { HugeiconsIcon } from "@hugeicons/react";
import { ArrowLeft01Icon, ArrowRight01Icon, Cancel01Icon, CheckmarkCircle01Icon } from "@hugeicons/core-free-icons";
import { Dialog as DialogPrimitive } from "radix-ui";
import { AnimatePresence, motion } from "motion/react";
import { useEffect, useId, useLayoutEffect, useMemo, useRef, useState } from "react";
import { cssEscape, toRect } from "../lib/dom";
import { fireConfettiFireworks } from "../lib/confetti-fireworks";
import { computeCardPos, padded, pickPlacement } from "../lib/layout";
import { SpotlightOverlay } from "./spotlight-overlay";
import type { Placement, Rect, TourStep } from "../types";

type GuidedTourProps = { open: boolean; onOpenChange: (open: boolean) => void; steps: TourStep[]; onSkip: () => void; onComplete: () => void; celebrate?: boolean }; // confetti on complete only

export function GuidedTour({
  open,
  onOpenChange,
  steps,
  onSkip,
  onComplete,
  celebrate = false,
}: GuidedTourProps) {
  const maskId = `${useId()}-tour-mask`;
  const [idx, setIdx] = useState(0);
  const [vw, setVw] = useState(0);
  const [vh, setVh] = useState(0);
  const [targetRect, setTargetRect] = useState<Rect | null>(null);
  const [placement, setPlacement] = useState<Placement>("right");
  const [cardPos, setCardPos] = useState<{ left: number; top: number }>({
    left: 12,
    top: 12,
  });
  const cardRef = useRef<HTMLDivElement>(null);
  const closeLockRef = useRef(false);
  const rafRef = useRef<number | null>(null);
  const lastRectRef = useRef<Rect | null>(null);
  const activeStepRef = useRef<TourStep | null>(null);

  const step = steps[idx] ?? null;
  const total = steps.length;
  const isLast = idx === total - 1;

  const spotlightRect = useMemo(() => {
    if (!targetRect || !vw || !vh) return null;
    const pad = step?.target === "navbar" ? 4 : 14;
    return padded(targetRect, pad, vw, vh);
  }, [step?.target, targetRect, vw, vh]);

  useEffect(() => {
    if (!open) return;
    const prev = activeStepRef.current;
    if (prev && prev.id !== step?.id) {
      void prev.onExit?.();
    }
    activeStepRef.current = step;
    if (step) {
      void step.onEnter?.();
    }
  }, [open, step?.id]); // run before target lookup effect below

  useEffect(() => {
    if (open) return;
    const prev = activeStepRef.current;
    activeStepRef.current = null;
    if (prev) {
      void prev.onExit?.();
    }
  }, [open]);

  useEffect(() => {
    if (!open) return;
    setIdx(0);
    setTargetRect(null);
    closeLockRef.current = false;
    lastRectRef.current = null;
  }, [open]);

  useEffect(() => {
    if (!open) return;
    function onResize() {
      setVw(window.innerWidth);
      setVh(window.innerHeight);
    }
    onResize();
    window.addEventListener("resize", onResize);
    return () => window.removeEventListener("resize", onResize);
  }, [open]);

  useEffect(() => {
    if (!open || !step) return;

    const sel = `[data-tour="${cssEscape(step.target)}"]`;
    let el: HTMLElement | null = null;
    let ro: ResizeObserver | null = null;
    let retryTimer = 0;
    let retries = 0;

    let raf = 0;
    let t = 0;

    function findTarget(): HTMLElement | null {
      const found = document.querySelector(sel);
      if (!(found instanceof HTMLElement)) return null;
      return found;
    }

    function isUsableTarget(candidate: HTMLElement): boolean {
      const r = candidate.getBoundingClientRect();
      return r.width >= 6 && r.height >= 6;
    }

    function rectChanged(a: Rect | null, b: Rect): boolean {
      if (!a) return true;
      return (
        Math.abs(a.x - b.x) > 0.5 ||
        Math.abs(a.y - b.y) > 0.5 ||
        Math.abs(a.w - b.w) > 0.5 ||
        Math.abs(a.h - b.h) > 0.5
      );
    }

    function read(candidate: HTMLElement) {
      const r = candidate.getBoundingClientRect();
      const next = toRect(r);
      const prev = lastRectRef.current;
      if (rectChanged(prev, next)) {
        lastRectRef.current = next;
        setTargetRect(next);
      }
    }

    function schedule() {
      if (rafRef.current != null) return;
      rafRef.current = window.requestAnimationFrame(() => {
        rafRef.current = null;
        if (el) read(el);
      });
    }

    function attach(candidate: HTMLElement) {
      el = candidate;

      if (step.target !== "navbar") {
        el.scrollIntoView({
          block: "center",
          inline: "center",
          behavior: "smooth",
        });
      }

      raf = window.requestAnimationFrame(() => read(el!));
      t = window.setTimeout(schedule, 240);

      ro = new ResizeObserver(() => schedule());
      ro.observe(el);
      window.addEventListener("scroll", schedule, { capture: true, passive: true });
      window.addEventListener("resize", schedule, { passive: true });
    }

    function tryAttach(): boolean {
      const candidate = findTarget();
      if (!candidate) return false;
      if (!isUsableTarget(candidate)) return false;
      attach(candidate);
      return true;
    }

    if (!tryAttach()) {
      setTargetRect(null);
      retryTimer = window.setInterval(() => {
        retries += 1;
        if (tryAttach() || retries > 40) {
          window.clearInterval(retryTimer);
        }
      }, 50);
    }

    return () => {
      window.cancelAnimationFrame(raf);
      window.clearTimeout(t);
      if (retryTimer) window.clearInterval(retryTimer);
      ro?.disconnect();
      window.removeEventListener("scroll", schedule, true);
      window.removeEventListener("resize", schedule);
      if (rafRef.current != null) {
        window.cancelAnimationFrame(rafRef.current);
        rafRef.current = null;
      }
    };
  }, [open, step?.id]);

  useLayoutEffect(() => {
    if (!open || !spotlightRect || !vw || !vh) return;
    const card = cardRef.current?.getBoundingClientRect();
    if (!card) return;

    const gap = 14;
    const picked = pickPlacement(spotlightRect, { w: card.width, h: card.height }, vw, vh, gap);
    setPlacement(picked);
    setCardPos(
      computeCardPos(
        picked,
        spotlightRect,
        { w: card.width, h: card.height },
        vw,
        vh,
        gap,
      ),
    );
  }, [open, spotlightRect, vw, vh, idx]);

  function requestClose(reason: "skip" | "complete") {
    if (closeLockRef.current) return;
    closeLockRef.current = true;
    if (reason === "skip") {
      onSkip();
    } else {
      if (celebrate) void fireConfettiFireworks();
      onComplete();
    }
    onOpenChange(false);
  }

  return (
    <DialogPrimitive.Root
      open={open}
      onOpenChange={(v) => {
        if (v) onOpenChange(true);
        else requestClose("skip");
      }}
      modal={true}
    >
      <DialogPrimitive.Portal>
        <AnimatePresence>
          {open && (
            <>
              <DialogPrimitive.Overlay asChild>
                <motion.div
                  className="fixed inset-0 z-50"
                  initial={{ opacity: 0 }}
                  animate={{ opacity: 1 }}
                  exit={{ opacity: 0 }}
                  transition={{ duration: 0.18 }}
                >
                  <SpotlightOverlay rect={spotlightRect} vw={vw} vh={vh} maskId={maskId} />
                  {spotlightRect && (
                    <motion.div
                      className="fixed z-[51] pointer-events-none rounded-[22px] ring-1 ring-white/10"
                      initial={false}
                      animate={{
                        left: spotlightRect.x,
                        top: spotlightRect.y,
                        width: spotlightRect.w,
                        height: spotlightRect.h,
                        boxShadow:
                          "0 0 0 1px rgba(34, 211, 238, 0.12), 0 0 0 6px rgba(16, 185, 129, 0.08), 0 18px 90px rgba(0,0,0,0.55)",
                      }}
                      transition={{ type: "spring", stiffness: 260, damping: 30 }}
                    />
                  )}
                </motion.div>
              </DialogPrimitive.Overlay>

              <DialogPrimitive.Content
                onPointerDownOutside={(e) => e.preventDefault()}
                onInteractOutside={(e) => e.preventDefault()}
                className={cn(
                  "fixed z-[52] outline-none",
                  "w-[min(420px,calc(100vw-1.5rem))]",
                )}
                style={{
                  left: cardPos.left,
                  top: cardPos.top,
                }}
              >
                <motion.div
                  ref={cardRef}
                  initial={{ opacity: 0, scale: 0.985, y: 8 }}
                  animate={{ opacity: 1, scale: 1, y: 0 }}
                  exit={{ opacity: 0, scale: 0.99, y: 10 }}
                  transition={{ duration: 0.22, ease: [0.165, 0.84, 0.44, 1] }}
                  className={cn(
                    "relative overflow-hidden rounded-[28px] corner-squircle",
                    "bg-white/95 text-foreground ring-1 ring-black/10 dark:bg-zinc-900/96 dark:text-zinc-100 dark:ring-white/12",
                    "shadow-[0_30px_120px_rgba(0,0,0,0.35)]",
                  )}
                  style={{
                    fontFamily: "'Figtree Variable', ui-sans-serif, sans-serif",
                  }}
                >
                  <div
                    className={cn(
                      "absolute z-10 size-3 rotate-45 rounded-[3px] bg-white/95 ring-1 ring-black/10 dark:bg-zinc-900/96 dark:ring-white/12",
                      placement === "right" &&
                        "-left-1 top-1/2 -translate-y-1/2",
                      placement === "left" &&
                        "-right-1 top-1/2 -translate-y-1/2",
                      placement === "bottom" &&
                        "left-1/2 -top-1 -translate-x-1/2",
                      placement === "top" &&
                        "left-1/2 -bottom-1 -translate-x-1/2",
                    )}
                    aria-hidden={true}
                  />
                  <div className="absolute inset-x-0 top-0 h-20 bg-gradient-to-b from-emerald-400/18 via-cyan-300/6 to-transparent dark:from-emerald-400/24 dark:via-cyan-300/12" />
                  <div className="absolute -left-14 -top-16 size-44 rounded-full bg-emerald-400/20 blur-2xl dark:bg-emerald-400/26" />
                  <div className="absolute -right-14 -bottom-16 size-44 rounded-full bg-cyan-300/18 blur-2xl dark:bg-cyan-300/24" />

                  <div className="relative p-5">
                    <div className="flex items-start justify-between gap-3">
                      <div className="min-w-0">
                        <div className="inline-flex items-center gap-2 rounded-full bg-black/[0.04] px-2.5 py-1 text-[10px] font-mono text-foreground/60 ring-1 ring-black/10 dark:bg-white/[0.04] dark:text-zinc-200/75 dark:ring-white/14">
                          {idx + 1}/{total}
                          <span className="size-1 rounded-full bg-emerald-500/70" />
                          guided tour
                        </div>
                        <DialogPrimitive.Title
                          className="mt-2 text-[18px] leading-tight"
                          style={{ fontFamily: "var(--font-serif)" }}
                        >
                          {step?.title ?? "Quick tour"}
                        </DialogPrimitive.Title>
                        <DialogPrimitive.Description className="mt-1.5 text-sm leading-relaxed text-foreground/70 dark:text-zinc-200/75">
                          {step?.body ?? "Let’s get you oriented."}
                        </DialogPrimitive.Description>
                      </div>

                      <Button
                        variant="ghost"
                        size="icon-sm"
                        className="text-foreground/60 hover:text-foreground hover:bg-black/[0.05] dark:text-zinc-300/70 dark:hover:text-zinc-100 dark:hover:bg-white/[0.08]"
                        onClick={() => requestClose("skip")}
                        aria-label="Skip tour"
                      >
                        <HugeiconsIcon icon={Cancel01Icon} className="size-4" />
                      </Button>
                    </div>

                    <div className="mt-5 flex items-center justify-between gap-3">
                      <Button
                        variant="ghost"
                        className="text-foreground/60 hover:text-foreground hover:bg-black/[0.05] dark:text-zinc-300/70 dark:hover:text-zinc-100 dark:hover:bg-white/[0.08]"
                        onClick={() => requestClose("skip")}
                      >
                        Skip
                      </Button>

                      <div className="flex items-center gap-2">
                        <Button
                          variant="outline"
                          className="border-black/10 bg-white/70 text-foreground hover:bg-white hover:text-foreground dark:border-white/15 dark:bg-white/[0.07] dark:text-zinc-100 dark:hover:bg-white/[0.12]"
                          disabled={idx === 0}
                          onClick={() => setIdx((i) => Math.max(0, i - 1))}
                        >
                          <HugeiconsIcon icon={ArrowLeft01Icon} className="size-4" />
                          Back
                        </Button>
                        {isLast ? (
                          <Button
                            variant="dark"
                            className="bg-gradient-to-r from-emerald-500 to-cyan-400 text-white hover:from-emerald-600 hover:to-cyan-500"
                            onClick={() => requestClose("complete")}
                          >
                            <HugeiconsIcon icon={CheckmarkCircle01Icon} className="size-4" />
                            Done
                          </Button>
                        ) : (
                          <Button
                            variant="dark"
                            className="bg-gradient-to-r from-emerald-500 to-cyan-400 text-white hover:from-emerald-600 hover:to-cyan-500"
                            onClick={() => setIdx((i) => Math.min(total - 1, i + 1))}
                          >
                            Next
                            <HugeiconsIcon icon={ArrowRight01Icon} className="size-4" />
                          </Button>
                        )}
                      </div>
                    </div>
                  </div>

                  <div className="h-px bg-gradient-to-r from-transparent via-black/10 to-transparent dark:via-white/14" />
                  <div className="px-5 py-3 text-[11px] text-foreground/55 dark:text-zinc-300/65">
                    Tip: `Esc` skips. Tour blocks clicks so you can read.
                  </div>
                </motion.div>
              </DialogPrimitive.Content>
            </>
          )}
        </AnimatePresence>
      </DialogPrimitive.Portal>
    </DialogPrimitive.Root>
  );
}


================================================
FILE: studio/frontend/src/features/tour/components/read-more.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export function ReadMore({ href = "#" }: { href?: string }) {
  return (
    <a
      href={href}
      onClick={(e) => {
        if (href === "#") e.preventDefault();
      }}
      className="text-emerald-600 underline underline-offset-2 hover:text-emerald-700"
    >
      Read more
    </a>
  );
}


================================================
FILE: studio/frontend/src/features/tour/components/spotlight-overlay.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { motion } from "motion/react";
import type { Rect } from "../types";

type SpotlightOverlayProps = {
  rect: Rect | null;
  vw: number;
  vh: number;
  maskId: string;
};

export function SpotlightOverlay({ rect, vw, vh, maskId }: SpotlightOverlayProps) {
  const hole = rect ?? { x: vw / 2 - 140, y: vh / 2 - 90, w: 280, h: 180 };
  const r = 22;

  return (
    <svg
      className="absolute inset-0 size-full"
      viewBox={`0 0 ${vw} ${vh}`}
      preserveAspectRatio="none"
      aria-hidden={true}
    >
      <defs>
        <radialGradient id={`${maskId}-v`} cx="50%" cy="45%" r="80%">
          <stop offset="0%" stopColor="rgba(6, 9, 15, 0.35)" />
          <stop offset="55%" stopColor="rgba(6, 9, 15, 0.65)" />
          <stop offset="100%" stopColor="rgba(6, 9, 15, 0.88)" />
        </radialGradient>
        <mask id={maskId}>
          <rect x="0" y="0" width={vw} height={vh} fill="white" />
          <motion.rect
            x={hole.x}
            y={hole.y}
            width={hole.w}
            height={hole.h}
            rx={r}
            fill="black"
            transition={{ type: "spring", stiffness: 260, damping: 30 }}
          />
        </mask>
      </defs>
      <rect
        x="0"
        y="0"
        width={vw}
        height={vh}
        fill={`url(#${maskId}-v)`}
        mask={`url(#${maskId})`}
      />
    </svg>
  );
}


================================================
FILE: studio/frontend/src/features/tour/hooks/use-guided-tour-controller.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useCallback, useEffect, useMemo, useState } from "react";
import type { TourStep } from "../types";

export const TOUR_OPEN_EVENT = "omx:tour:open";

export type TourOpenDetail = {
  id?: string;
};

export function useGuidedTourController({
  id,
  steps,
  enabled = true,
  autoKey,
  autoWhen = false,
}: {
  id: string;
  steps: TourStep[];
  enabled?: boolean;
  autoKey?: string;
  autoWhen?: boolean;
}) {
  const [open, setOpen] = useState(false);
  const [hasRuntime, setHasRuntime] = useState(false);

  useEffect(() => setHasRuntime(true), []);

  useEffect(() => {
    if (!hasRuntime || !enabled) return;
    if (!autoKey || !autoWhen) return;
    if (steps.length === 0) return;
    if (localStorage.getItem(autoKey)) return;
    setOpen(true);
  }, [autoKey, autoWhen, enabled, hasRuntime, steps.length]);

  useEffect(() => {
    if (!hasRuntime || !enabled) return;
    function onOpen(e: Event) {
      const ce = e as CustomEvent<TourOpenDetail>;
      if (ce.detail?.id && ce.detail.id !== id) return;
      if (steps.length === 0) return;
      setOpen(true);
    }
    window.addEventListener(TOUR_OPEN_EVENT, onOpen);
    return () => window.removeEventListener(TOUR_OPEN_EVENT, onOpen);
  }, [enabled, hasRuntime, id, steps.length]);

  const onSkip = useCallback(() => {
    if (!autoKey) return;
    localStorage.setItem(autoKey, "skipped");
  }, [autoKey]);

  const onComplete = useCallback(() => {
    if (!autoKey) return;
    localStorage.setItem(autoKey, "done");
  }, [autoKey]);

  const tourProps = useMemo(
    () => ({
      open,
      onOpenChange: setOpen,
      steps,
      onSkip,
      onComplete,
    }),
    [onComplete, onSkip, open, steps],
  );

  return { open, setOpen, onSkip, onComplete, tourProps };
}


================================================
FILE: studio/frontend/src/features/tour/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export { GuidedTour } from "./components/guided-tour";
export { ReadMore } from "./components/read-more";
export { TOUR_OPEN_EVENT, useGuidedTourController } from "./hooks/use-guided-tour-controller";
export type { TourStep } from "./types";


================================================
FILE: studio/frontend/src/features/tour/types.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { ReactNode } from "react";

export type TourStep = {
  id: string;
  target: string; // data-tour="<target>"
  title: string;
  body: ReactNode;
  onEnter?: () => void | Promise<void>;
  onExit?: () => void | Promise<void>;
};

export type Rect = { x: number; y: number; w: number; h: number };

export type Placement = "right" | "left" | "top" | "bottom";


================================================
FILE: studio/frontend/src/features/training/api/datasets-api.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  CheckFormatResponse,
  LocalDatasetsResponse,
  UploadDatasetResponse,
} from "../types/datasets";
import { authFetch } from "@/features/auth";

type CheckDatasetFormatArgs = {
  datasetName: string;
  hfToken: string | null;
  subset?: string | null;
  split?: string | null;
  isVlm?: boolean;
};

export async function checkDatasetFormat({
  datasetName,
  hfToken,
  subset,
  split,
  isVlm,
}: CheckDatasetFormatArgs): Promise<CheckFormatResponse> {
  const res = await authFetch("/api/datasets/check-format", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      dataset_name: datasetName,
      hf_token: hfToken || undefined,
      subset: subset || undefined,
      split: split || "train",
      is_vlm: !!isVlm,
    }),
  });

  if (!res.ok) {
    const body = await res.json().catch(() => null);
    throw new Error(body?.detail || `Request failed (${res.status})`);
  }

  return res.json();
}

export async function uploadTrainingDataset(
  file: File,
): Promise<UploadDatasetResponse> {
  const form = new FormData();
  form.append("file", file);

  const res = await authFetch("/api/datasets/upload", {
    method: "POST",
    body: form,
  });

  if (!res.ok) {
    const body = await res.json().catch(() => null);
    throw new Error(body?.detail || `Upload failed (${res.status})`);
  }

  return res.json();
}

// ── AI Assist ────────────────────────────────────────────────────────

type AiAssistMappingArgs = {
  columns: string[];
  samples: Record<string, unknown>[];
  datasetName?: string | null;
  hfToken?: string | null;
  modelName?: string | null;
  modelType?: "text" | "vision" | "audio" | "embeddings" | null;
};

export type AiAssistMappingResponse = {
  success: boolean;
  suggested_mapping?: Record<string, string> | null;
  warning?: string | null;
  // Conversion advisor fields
  system_prompt?: string | null;
  label_mapping?: Record<string, Record<string, string>> | null;
  dataset_type?: string | null;
  is_conversational?: boolean | null;
  user_notification?: string | null;
};

export async function aiAssistMapping({
  columns,
  samples,
  datasetName,
  hfToken,
  modelName,
  modelType,
}: AiAssistMappingArgs): Promise<AiAssistMappingResponse> {
  const res = await authFetch("/api/datasets/ai-assist-mapping", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({
      columns,
      samples: samples.slice(0, 5),
      dataset_name: datasetName || undefined,
      hf_token: hfToken || undefined,
      model_name: modelName || undefined,
      model_type: modelType || undefined,
    }),
  });

  if (!res.ok) {
    const body = await res.json().catch(() => null);
    throw new Error(body?.detail || `AI assist failed (${res.status})`);
  }

  return res.json();
}

export async function listLocalDatasets(): Promise<LocalDatasetsResponse> {
  const res = await authFetch("/api/datasets/local");
  if (!res.ok) {
    const body = await res.json().catch(() => null);
    throw new Error(body?.detail || `Request failed (${res.status})`);
  }
  return res.json();
}


================================================
FILE: studio/frontend/src/features/training/api/mappers.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { TrainingConfigState } from "../types/config";
import type { TrainingStartRequest } from "../types/api";

const BACKEND_LORA_TYPE = "LoRA/QLoRA";
const BACKEND_FULL_TYPE = "Full Finetuning";

function parseSliceValue(value: string | null): number | null {
  if (value == null) return null;
  const trimmed = value.trim();
  if (!trimmed) return null;
  const num = Number(trimmed);
  if (!Number.isFinite(num) || !Number.isInteger(num) || num < 0) return null;
  return num;
}

export function toBackendTrainingType(trainingMethod: string): string {
  return trainingMethod === "full" ? BACKEND_FULL_TYPE : BACKEND_LORA_TYPE;
}

export function buildTrainingStartPayload(
  config: TrainingConfigState,
): TrainingStartRequest {
  const adapterMethod = config.trainingMethod !== "full";
  const isQloraMethod = config.trainingMethod === "qlora";
  const isEmbedding = config.isEmbeddingModel;
  const hfDataset = config.datasetSource === "huggingface" ? config.dataset : null;
  const localDatasets =
    config.datasetSource === "upload" && config.uploadedFile
      ? [config.uploadedFile]
      : [];
  let customFormatMapping: Record<string, unknown> | undefined =
    Object.keys(config.datasetManualMapping).length > 0
      ? { ...config.datasetManualMapping }
      : undefined;

  // Inject conversion advisor metadata into the mapping (__ prefix keys)
  const hasAdvisorMeta =
    config.datasetSystemPrompt ||
    Object.keys(config.datasetLabelMapping).length > 0;
  if (customFormatMapping && hasAdvisorMeta) {
    if (config.datasetSystemPrompt) {
      customFormatMapping.__system_prompt = config.datasetSystemPrompt;
    }
    if (Object.keys(config.datasetLabelMapping).length > 0) {
      customFormatMapping.__label_mapping = config.datasetLabelMapping;
    }
  }

  return {
    model_name: config.selectedModel ?? "",
    training_type: toBackendTrainingType(config.trainingMethod),
    hf_token: config.hfToken.trim() || null,
    load_in_4bit: adapterMethod ? isQloraMethod : false,
    max_seq_length: config.contextLength,
    trust_remote_code: config.trustRemoteCode ?? false,
    hf_dataset: hfDataset,
    subset: hfDataset ? config.datasetSubset : null,
    train_split: hfDataset ? config.datasetSplit : null,
    eval_split: hfDataset ? config.datasetEvalSplit : null,
    dataset_slice_start: parseSliceValue(config.datasetSliceStart),
    dataset_slice_end: parseSliceValue(config.datasetSliceEnd),
    local_datasets: localDatasets,
    local_eval_datasets:
      config.datasetSource === "upload" && config.uploadedEvalFile
        ? [config.uploadedEvalFile]
        : [],
    format_type: config.datasetFormat,
    custom_format_mapping: customFormatMapping,
    num_epochs: config.epochs,
    learning_rate: String(config.learningRate),
    batch_size: config.batchSize,
    gradient_accumulation_steps: config.gradientAccumulation,
    warmup_steps: isEmbedding ? null : config.warmupSteps,
    warmup_ratio: isEmbedding ? 0.03 : null,
    max_steps: config.maxSteps,
    save_steps: config.saveSteps,
    eval_steps: config.evalSteps,
    weight_decay: config.weightDecay,
    random_seed: config.randomSeed,
    packing: isEmbedding ? false : config.packing,
    optim: config.optimizerType,
    lr_scheduler_type: config.lrSchedulerType,
    use_lora: adapterMethod,
    lora_r: config.loraRank,
    lora_alpha: config.loraAlpha,
    lora_dropout: config.loraDropout,
    target_modules: adapterMethod ? config.targetModules : [],
    gradient_checkpointing: config.gradientCheckpointing,
    use_rslora: config.loraVariant === "rslora",
    use_loftq: config.loraVariant === "loftq",
    train_on_completions: isEmbedding ? false : config.trainOnCompletions,
    finetune_vision_layers: config.finetuneVisionLayers,
    finetune_language_layers: config.finetuneLanguageLayers,
    finetune_attention_modules: config.finetuneAttentionModules,
    finetune_mlp_modules: config.finetuneMLPModules,
    is_dataset_image: isEmbedding ? false : !!config.isDatasetImage,
    is_dataset_audio: isEmbedding ? false : config.isDatasetAudio,
    is_embedding: isEmbedding,
    enable_wandb: config.enableWandb,
    wandb_token: config.enableWandb ? config.wandbToken.trim() || null : null,
    wandb_project: config.enableWandb
      ? config.wandbProject.trim() || null
      : null,
    enable_tensorboard: config.enableTensorboard,
    tensorboard_dir: config.enableTensorboard
      ? config.tensorboardDir.trim() || null
      : null,
  };
}


================================================
FILE: studio/frontend/src/features/training/api/models-api.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { authFetch } from "@/features/auth";

interface VisionCheckResponse {
  model_name: string;
  is_vision: boolean;
}

interface EmbeddingCheckResponse {
  model_name: string;
  is_embedding: boolean;
}

interface BackendTrainingDefaults {
  max_seq_length?: number;
  num_epochs?: number;
  learning_rate?: number | string;
  optim?: string;
  lr_scheduler_type?: string;
  batch_size?: number;
  gradient_accumulation_steps?: number;
  warmup_steps?: number;
  max_steps?: number;
  save_steps?: number;
  eval_steps?: number;
  weight_decay?: number;
  random_seed?: number;
  packing?: boolean;
  train_on_completions?: boolean;
  gradient_checkpointing?: "none" | "true" | "unsloth";
  trust_remote_code?: boolean;
}

interface BackendLoraDefaults {
  lora_r?: number;
  lora_alpha?: number;
  lora_dropout?: number;
  target_modules?: string[];
  use_rslora?: boolean;
  use_loftq?: boolean;
  finetune_vision_layers?: boolean;
  finetune_language_layers?: boolean;
  finetune_attention_modules?: boolean;
  finetune_mlp_modules?: boolean;
}

interface BackendLoggingDefaults {
  enable_wandb?: boolean;
  wandb_project?: string;
  enable_tensorboard?: boolean;
  tensorboard_dir?: string;
  log_frequency?: number;
}

export interface BackendModelConfig {
  audio_type?: string | null;
  training?: BackendTrainingDefaults;
  lora?: BackendLoraDefaults;
  logging?: BackendLoggingDefaults;
}

export interface ModelConfigResponse {
  id: string;
  model_name?: string | null;
  config?: BackendModelConfig | null;
  is_vision: boolean;
  is_embedding?: boolean;
  is_audio: boolean;
  is_lora: boolean;
  base_model?: string | null;
  model_type?: "text" | "vision" | "audio" | "embeddings" | null;
  max_position_embeddings?: number | null;
  model_size_bytes?: number | null;
}

export interface LocalModelInfo {
  id: string;
  display_name: string;
  path: string;
  source: "models_dir" | "hf_cache";
  model_id?: string | null;
  updated_at?: number | null;
}

interface LocalModelListResponse {
  models_dir: string;
  hf_cache_dir?: string | null;
  models: LocalModelInfo[];
}

/**
 * Check whether a model is a vision model by asking the backend.
 * Calls GET /api/models/check-vision/{model_name}.
 */
export async function checkVisionModel(modelName: string): Promise<boolean> {
  const encoded = encodeURIComponent(modelName);
  const response = await authFetch(`/api/models/check-vision/${encoded}`);
  if (!response.ok) {
    // If the check fails (e.g. network error), default to non-vision
    return false;
  }
  const data = (await response.json()) as VisionCheckResponse;
  return data.is_vision;
}

/**
 * Check whether a model is an embedding model by asking the backend.
 * Calls GET /api/models/check-embedding/{model_name}.
 */
export async function checkEmbeddingModel(
  modelName: string,
): Promise<boolean> {
  const encoded = encodeURIComponent(modelName);
  const response = await authFetch(`/api/models/check-embedding/${encoded}`);
  if (!response.ok) {
    // If the check fails (e.g. network error), default to non-embedding
    return false;
  }
  const data = (await response.json()) as EmbeddingCheckResponse;
  return data.is_embedding;
}

export async function getModelConfig(
  modelName: string,
  signal?: AbortSignal,
  hfToken?: string,
): Promise<ModelConfigResponse> {
  const encoded = encodeURIComponent(modelName);
  const params = hfToken ? `?hf_token=${encodeURIComponent(hfToken)}` : "";
  const response = await authFetch(`/api/models/config/${encoded}${params}`, { signal });
  if (!response.ok) {
    throw new Error(`Failed to fetch model config (${response.status})`);
  }
  return (await response.json()) as ModelConfigResponse;
}

export async function listLocalModels(
  signal?: AbortSignal,
): Promise<LocalModelInfo[]> {
  const response = await authFetch("/api/models/local", { signal });
  if (!response.ok) {
    throw new Error(`Failed to fetch local models (${response.status})`);
  }
  const data = (await response.json()) as LocalModelListResponse;
  return data.models;
}


================================================
FILE: studio/frontend/src/features/training/api/train-api.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { authFetch } from "@/features/auth";
import type {
  TrainingStartRequest,
  TrainingStartResponse,
  TrainingStopResponse,
} from "../types/api";
import type {
  TrainingMetricsResponse,
  TrainingProgressPayload,
  TrainingStatusResponse,
} from "../types/runtime";

function isAbortError(error: unknown): boolean {
  return error instanceof DOMException && error.name === "AbortError";
}

async function readError(response: Response): Promise<string> {
  try {
    const payload = (await response.json()) as { detail?: string; message?: string };
    return payload.detail || payload.message || `Request failed (${response.status})`;
  } catch {
    return `Request failed (${response.status})`;
  }
}

async function parseJson<T>(response: Response): Promise<T> {
  if (!response.ok) {
    throw new Error(await readError(response));
  }
  return (await response.json()) as T;
}

export async function startTraining(
  payload: TrainingStartRequest,
): Promise<TrainingStartResponse> {
  const response = await authFetch("/api/train/start", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify(payload),
  });
  return parseJson<TrainingStartResponse>(response);
}

export async function stopTraining(save = true): Promise<TrainingStopResponse> {
  const response = await authFetch("/api/train/stop", {
    method: "POST",
    headers: { "Content-Type": "application/json" },
    body: JSON.stringify({ save }),
  });
  return parseJson<TrainingStopResponse>(response);
}

export async function resetTraining(): Promise<void> {
  const response = await authFetch("/api/train/reset", { method: "POST" });
  if (!response.ok) {
    throw new Error(await readError(response));
  }
}

export async function getTrainingStatus(): Promise<TrainingStatusResponse> {
  const response = await authFetch("/api/train/status");
  return parseJson<TrainingStatusResponse>(response);
}

export async function getTrainingMetrics(): Promise<TrainingMetricsResponse> {
  const response = await authFetch("/api/train/metrics");
  return parseJson<TrainingMetricsResponse>(response);
}

type ProgressEventName = "progress" | "heartbeat" | "complete" | "error";

interface ParsedSseEvent {
  event: ProgressEventName;
  payload: TrainingProgressPayload;
  id: number | null;
}

function parseSseEvent(rawEvent: string): ParsedSseEvent | null {
  const lines = rawEvent.split(/\r?\n/);
  let eventName: ProgressEventName = "progress";
  let id: number | null = null;
  const dataLines: string[] = [];

  for (const line of lines) {
    if (!line) {
      continue;
    }
    if (line.startsWith("event:")) {
      const value = line.slice(6).trim();
      if (
        value === "progress" ||
        value === "heartbeat" ||
        value === "complete" ||
        value === "error"
      ) {
        eventName = value;
      }
      continue;
    }
    if (line.startsWith("id:")) {
      const value = Number(line.slice(3).trim());
      id = Number.isFinite(value) ? value : null;
      continue;
    }
    if (line.startsWith("data:")) {
      dataLines.push(line.slice(5).trimStart());
    }
  }

  if (dataLines.length === 0) {
    return null;
  }

  const parsed = JSON.parse(dataLines.join("\n")) as TrainingProgressPayload;
  return { event: eventName, payload: parsed, id };
}

export async function streamTrainingProgress(options: {
  signal: AbortSignal;
  lastEventId?: number | null;
  onOpen?: () => void;
  onEvent: (event: ParsedSseEvent) => void;
}): Promise<void> {
  const headers = new Headers();
  if (typeof options.lastEventId === "number") {
    headers.set("Last-Event-ID", String(options.lastEventId));
  }

  const response = await authFetch("/api/train/progress", {
    method: "GET",
    headers,
    signal: options.signal,
  });

  if (!response.ok) {
    throw new Error(await readError(response));
  }

  if (!response.body) {
    throw new Error("Progress stream unavailable");
  }

  options.onOpen?.();

  const reader = response.body.getReader();
  const decoder = new TextDecoder();
  let buffer = "";

  while (true) {
    const { value, done } = await reader.read();
    if (done) {
      break;
    }

    buffer += decoder.decode(value, { stream: true });

    let separatorIndex = buffer.search(/\r?\n\r?\n/);
    while (separatorIndex >= 0) {
      const rawEvent = buffer.slice(0, separatorIndex);
      const separatorLength = buffer[separatorIndex] === "\r" ? 4 : 2;
      buffer = buffer.slice(separatorIndex + separatorLength);

      if (rawEvent.startsWith("retry:")) {
        separatorIndex = buffer.search(/\r?\n\r?\n/);
        continue;
      }

      try {
        const event = parseSseEvent(rawEvent);
        if (event) {
          options.onEvent(event);
        }
      } catch (error) {
        if (!isAbortError(error)) {
          throw error;
        }
      }

      separatorIndex = buffer.search(/\r?\n\r?\n/);
    }
  }
}

export { isAbortError };


================================================
FILE: studio/frontend/src/features/training/components/hf-dataset-subset-split-selectors.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
} from "@/components/ui/select";
import { Spinner } from "@/components/ui/spinner";
import {
  Tooltip,
  TooltipContent,
  TooltipTrigger,
} from "@/components/ui/tooltip";
import {
  Field,
  FieldLabel,
} from "@/components/ui/field";
import { useHfDatasetSplits } from "@/hooks";
import { InformationCircleIcon } from "@hugeicons/core-free-icons";
import { HugeiconsIcon } from "@hugeicons/react";
import { useEffect } from "react";

type Props = {
  variant: "wizard" | "studio";
  enabled: boolean;
  datasetName: string | null;
  accessToken?: string;
  datasetSubset: string | null;
  setDatasetSubset: (v: string | null) => void;
  datasetSplit: string | null;
  setDatasetSplit: (v: string | null) => void;
  datasetEvalSplit: string | null;
  setDatasetEvalSplit: (v: string | null) => void;
};

export function HfDatasetSubsetSplitSelectors({
  variant,
  enabled,
  datasetName,
  accessToken,
  datasetSubset,
  setDatasetSubset,
  datasetSplit,
  setDatasetSplit,
  datasetEvalSplit,
  setDatasetEvalSplit,
}: Props) {
  const {
    subsets: hfSubsets,
    splits: hfSplits,
    isLoading,
    error,
  } = useHfDatasetSplits(enabled ? datasetName : null, datasetSubset, {
    accessToken,
  });
  const showPlaceholderDropdowns =
    variant === "studio" && !enabled && !datasetName;

  // Auto-select subset and split in one pass to avoid racing effects
  useEffect(() => {
    if (hfSubsets.length === 0) return;

    // --- subset ---
    if (!datasetSubset || !hfSubsets.includes(datasetSubset)) {
      const pick = hfSubsets.includes("default") ? "default" : hfSubsets[0];
      setDatasetSubset(pick);
      return;
    }

    // --- split (only once subset is settled) ---
    if (hfSplits.length === 0) return;
    if (!datasetSplit || !hfSplits.includes(datasetSplit)) {
      const pick = hfSplits.includes("train") ? "train" : hfSplits[0];
      setDatasetSplit(pick);
    }
  }, [
    hfSubsets,
    hfSplits,
    datasetSubset,
    setDatasetSubset,
    datasetSplit,
    setDatasetSplit,
  ]);

  const showDropdowns = !isLoading && !error && hfSubsets.length > 0;

  return (
    <>
      {showPlaceholderDropdowns && (
        <>
          <div className="grid gap-3 sm:grid-cols-2">
            <SelectorDropdown
              variant={variant}
              label="Subset"
              tooltip="Select which subset (config) of the dataset to use."
              value={null}
              onChange={setDatasetSubset}
              options={[]}
              placeholder="Select a subset..."
              disabled={true}
            />
            <SelectorDropdown
              variant={variant}
              label="Train Split"
              tooltip="Select which split to use for training."
              value={null}
              onChange={setDatasetSplit}
              options={[]}
              placeholder="Select a split..."
              disabled={true}
            />
          </div>
          <SelectorDropdown
            variant={variant}
            label="Evaluation Split"
            tooltip="Select which split to use for evaluation. None means no evaluation during training."
            value={null}
            onChange={setDatasetEvalSplit}
            options={[]}
            placeholder="None"
            allowNone
            disabled={true}
          />
        </>
      )}

      {isLoading && (
        <div
          className={
            variant === "wizard"
              ? "flex items-center gap-2 text-xs text-muted-foreground py-1"
              : "flex items-center gap-2 rounded-lg border bg-muted/20 px-3.5 py-3 text-xs text-muted-foreground"
          }
        >
          <Spinner className="size-3.5" />
          Loading dataset configs and splits...
        </div>
      )}

      {error && (
        <div
          className={
            variant === "wizard"
              ? "rounded-lg border border-amber-200 bg-amber-50 px-3 py-2 text-xs text-amber-700 dark:border-amber-800 dark:bg-amber-950 dark:text-amber-400"
              : "rounded-lg border border-amber-200 bg-amber-50 px-3.5 py-2.5 text-xs text-amber-700 dark:border-amber-800 dark:bg-amber-950 dark:text-amber-400"
          }
        >
          {error}
        </div>
      )}

      {showDropdowns && (
        <>
          {variant === "studio" ? (
            <div className="grid gap-3 sm:grid-cols-2">
              <SelectorDropdown
                variant={variant}
                label="Subset"
                tooltip="Select which subset (config) of the dataset to use."
                value={datasetSubset}
                onChange={setDatasetSubset}
                options={hfSubsets}
                placeholder="Select a subset..."
              />
              <SelectorDropdown
                variant={variant}
                label="Train Split"
                tooltip="Select which split to use for training."
                value={datasetSplit}
                onChange={setDatasetSplit}
                options={hfSplits}
                placeholder="Select a split..."
              />
            </div>
          ) : (
            <>
              <SelectorDropdown
                variant={variant}
                label="Subset"
                tooltip="Select which subset (config) of the dataset to use."
                value={datasetSubset}
                onChange={setDatasetSubset}
                options={hfSubsets}
                placeholder="Select a subset..."
              />
              <SelectorDropdown
                variant={variant}
                label="Train Split"
                tooltip="Select which split to use for training."
                value={datasetSplit}
                onChange={setDatasetSplit}
                options={hfSplits}
                placeholder="Select a split..."
              />
            </>
          )}
          <SelectorDropdown
            variant={variant}
            label="Evaluation Split"
            tooltip="Select which split to use for evaluation. None means no evaluation during training."
            value={datasetEvalSplit}
            onChange={setDatasetEvalSplit}
            options={hfSplits}
            placeholder="None"
            allowNone
          />
        </>
      )}
    </>
  );
}

function SelectorDropdown({
  variant,
  label,
  tooltip,
  value,
  onChange,
  options,
  placeholder,
  allowNone = false,
  disabled = false,
}: {
  variant: "wizard" | "studio";
  label: string;
  tooltip: string;
  value: string | null;
  onChange: (v: string | null) => void;
  options: string[];
  placeholder: string;
  allowNone?: boolean;
  disabled?: boolean;
}) {
  const selectValue =
    value ?? (allowNone && !disabled ? "_none" : undefined);

  if (variant === "wizard") {
    return (
      <Field>
        <FieldLabel className="flex items-center gap-1.5">
          {label}
          <Tooltip>
            <TooltipTrigger asChild={true}>
              <button
                type="button"
                className="text-muted-foreground/50 hover:text-muted-foreground"
              >
                <HugeiconsIcon
                  icon={InformationCircleIcon}
                  className="size-3.5"
                />
              </button>
            </TooltipTrigger>
            <TooltipContent className="max-w-xs">
              {tooltip}
            </TooltipContent>
          </Tooltip>
        </FieldLabel>
        <Select
          value={selectValue}
          onValueChange={(v) => onChange(v === "_none" ? null : v)}
          disabled={disabled}
        >
          <SelectTrigger className="w-full">
            <SelectValue placeholder={placeholder} />
          </SelectTrigger>
          <SelectContent>
            {allowNone && (
              <SelectItem value="_none">None</SelectItem>
            )}
            {options.map((opt) => (
              <SelectItem key={opt} value={opt}>
                {opt}
              </SelectItem>
            ))}
          </SelectContent>
        </Select>
      </Field>
    );
  }

  return (
    <div className="flex flex-col gap-1.5">
      <span className="flex items-center gap-1.5 text-xs font-medium text-muted-foreground">
        {label}
        <Tooltip>
          <TooltipTrigger asChild={true}>
            <button
              type="button"
              className="text-foreground/70 hover:text-foreground"
            >
              <HugeiconsIcon
                icon={InformationCircleIcon}
                className="size-3"
              />
            </button>
          </TooltipTrigger>
          <TooltipContent>
            {tooltip}
          </TooltipContent>
        </Tooltip>
      </span>
      <Select
        value={selectValue}
        onValueChange={(v) => onChange(v === "_none" ? null : v)}
        disabled={disabled}
      >
        <SelectTrigger className="w-full">
          <SelectValue placeholder={placeholder} />
        </SelectTrigger>
        <SelectContent>
          {allowNone && (
            <SelectItem value="_none">None</SelectItem>
          )}
          {options.map((opt) => (
            <SelectItem key={opt} value={opt}>
              {opt}
            </SelectItem>
          ))}
        </SelectContent>
      </Select>
    </div>
  );
}


================================================
FILE: studio/frontend/src/features/training/hooks/use-max-steps-epochs-toggle.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useCallback, useEffect, useState } from "react";

const PREV_MAX_STEPS_KEY = "unsloth_prev_max_steps";
const PREV_SAVE_STEPS_KEY = "unsloth_prev_save_steps";
const DEFAULT_MAX_STEPS = 60;
const DEFAULT_EPOCHS = 3;

function readStoredNumber(key: string, fallback: number): number {
  if (typeof window === "undefined") return fallback;
  try {
    const value = window.localStorage.getItem(key);
    if (value === null) return fallback;
    const parsed = Number(value);
    return Number.isFinite(parsed) ? parsed : fallback;
  } catch {
    return fallback;
  }
}

function writeStoredNumber(key: string, value: number): void {
  if (typeof window === "undefined") return;
  try {
    window.localStorage.setItem(key, String(value));
  } catch {
    // Best effort only; ignore storage errors in restricted environments.
  }
}

function normalizePrevMaxSteps(value: number): number {
  return Number.isFinite(value) && value > 0 ? value : DEFAULT_MAX_STEPS;
}

function normalizePrevSaveSteps(value: number): number {
  return Number.isFinite(value) && value >= 0 ? value : 0;
}

type UseMaxStepsEpochsToggleParams = {
  maxSteps: number;
  epochs: number;
  saveSteps: number;
  setMaxSteps: (value: number) => void;
  setEpochs: (value: number) => void;
  setSaveSteps: (value: number) => void;
  defaultEpochs?: number;
};

type UseMaxStepsEpochsToggleResult = {
  useEpochs: boolean;
  toggleUseEpochs: () => void;
};

export function useMaxStepsEpochsToggle({
  maxSteps,
  epochs,
  saveSteps,
  setMaxSteps,
  setEpochs,
  setSaveSteps,
  defaultEpochs = DEFAULT_EPOCHS,
}: UseMaxStepsEpochsToggleParams): UseMaxStepsEpochsToggleResult {
  const useEpochs = maxSteps === 0;
  const [prevMaxSteps, setPrevMaxSteps] = useState(() =>
    normalizePrevMaxSteps(readStoredNumber(PREV_MAX_STEPS_KEY, DEFAULT_MAX_STEPS)),
  );
  const [prevSaveSteps, setPrevSaveSteps] = useState(() => {
    if (maxSteps === 0 && saveSteps > 0) {
      return normalizePrevSaveSteps(saveSteps);
    }
    return normalizePrevSaveSteps(readStoredNumber(PREV_SAVE_STEPS_KEY, 0));
  });

  useEffect(() => {
    if (maxSteps > 0) {
      const normalized = normalizePrevMaxSteps(maxSteps);
      setPrevMaxSteps(normalized);
      writeStoredNumber(PREV_MAX_STEPS_KEY, normalized);
    }
  }, [maxSteps]);

  useEffect(() => {
    if (!useEpochs) {
      const normalized = normalizePrevSaveSteps(saveSteps);
      setPrevSaveSteps(normalized);
      writeStoredNumber(PREV_SAVE_STEPS_KEY, normalized);
    }
  }, [saveSteps, useEpochs]);

  const toggleUseEpochs = useCallback(() => {
    if (useEpochs) {
      setMaxSteps(normalizePrevMaxSteps(prevMaxSteps));
      setSaveSteps(normalizePrevSaveSteps(prevSaveSteps));
      return;
    }

    setMaxSteps(0);
    setEpochs(epochs || defaultEpochs);
  }, [
    defaultEpochs,
    epochs,
    prevMaxSteps,
    prevSaveSteps,
    setEpochs,
    setMaxSteps,
    setSaveSteps,
    useEpochs,
  ]);

  return { useEpochs, toggleUseEpochs };
}


================================================
FILE: studio/frontend/src/features/training/hooks/use-training-actions.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useCallback } from "react";
import { checkDatasetFormat } from "../api/datasets-api";
import { buildTrainingStartPayload } from "../api/mappers";
import { startTraining, stopTraining, resetTraining } from "../api/train-api";
import { syncTrainingRuntimeFromBackend } from "../lib/sync-runtime";
import { validateTrainingConfig } from "../lib/validation";
import { useDatasetPreviewDialogStore } from "../stores/dataset-preview-dialog-store";
import { useTrainingConfigStore } from "../stores/training-config-store";
import { useTrainingRuntimeStore } from "../stores/training-runtime-store";
import type { TrainingConfigState } from "../types/config";
import { toast } from "sonner";

/** Chatml → format-specific role remap (only for formats that differ from chatml). */
const ROLE_REMAP: Record<string, Record<string, string>> = {
  alpaca: { user: "instruction", system: "input", assistant: "output" },
  sharegpt: { user: "human", assistant: "gpt", system: "system" },
};

function normalizeTrainingStartError(message: string): string {
  const normalized = message.toLowerCase();
  const isLegacyDatasetScriptError =
    normalized.includes("failed to check dataset format") &&
    normalized.includes("dataset scripts are no longer supported");

  if (isLegacyDatasetScriptError) {
    return "This Hub dataset relies on a legacy custom script and isn’t supported in this training flow.";
  }

  return message;
}

export function useTrainingActions() {
  const isStarting = useTrainingRuntimeStore((state) => state.isStarting);
  const startError = useTrainingRuntimeStore((state) => state.startError);

  const startTrainingRun = useCallback(async (): Promise<boolean> => {
    const config = useTrainingConfigStore.getState();
    const runtimeStore = useTrainingRuntimeStore.getState();
    const dialogStore = useDatasetPreviewDialogStore.getState();

    runtimeStore.setStartError(null);
    const validation = validateTrainingConfig(config);
    if (!validation.ok) {
      runtimeStore.setStartError(validation.message);
      return false;
    }

    runtimeStore.setStarting(true);

    try {
      const datasetName = getDatasetName(config);
      let isVlm = config.isVisionModel && config.isDatasetImage === true;

      if (datasetName) {
        const check = await checkDatasetFormat({
          datasetName,
          hfToken: config.hfToken.trim() || null,
          subset: config.datasetSubset,
          split: config.datasetSplit,
          isVlm,
        });

        // Backend auto-detects image/audio from dataset content.
        // Sync these flags into the store so buildTrainingStartPayload picks them up.
        const isAudio = !!check.is_audio;
        const isImage = !!check.is_image;

        if (isImage && config.isVisionModel) {
          isVlm = true;
        }
        if (isImage !== config.isDatasetImage || isAudio !== config.isDatasetAudio) {
          useTrainingConfigStore.setState({
            isDatasetImage: isImage,
            isDatasetAudio: isAudio,
          });
        }

        const needsReview = check.requires_manual_mapping || check.detected_format === "custom_heuristic";
        if (needsReview && !hasManualMapping(config, isVlm, isAudio)) {
          // Pre-fill from suggested_mapping or VLM detected columns
          const hint: Record<string, string> = {};
          if (check.suggested_mapping) {
            const table = ROLE_REMAP[config.datasetFormat];
            for (const [col, role] of Object.entries(check.suggested_mapping)) {
              hint[col] = table ? (table[role] ?? role) : role;
            }
          } else if (isAudio) {
            if (check.detected_audio_column) hint[check.detected_audio_column] = "audio";
            if (check.detected_text_column) hint[check.detected_text_column] = "text";
            if (check.detected_speaker_column) hint[check.detected_speaker_column] = "speaker_id";
          } else if (isVlm) {
            if (check.detected_image_column) hint[check.detected_image_column] = "image";
            if (check.detected_text_column) hint[check.detected_text_column] = "text";
          }

          if (Object.keys(hint).length > 0) {
            useTrainingConfigStore.getState().setDatasetManualMapping(hint);
          }

          runtimeStore.setStarting(false);
          dialogStore.openMapping(check);
          return false;
        }
      }

      // Abort if cancel was requested during dataset check
      if (useTrainingRuntimeStore.getState().stopRequested) {
        runtimeStore.setStarting(false);
        return false;
      }

      // Re-read config after potential store updates from dataset check
      const payload = buildTrainingStartPayload(useTrainingConfigStore.getState());
      const response = await startTraining(payload);

      if (response.status === "error") {
        const rawMessage = response.error || response.message;
        const safeMessage = normalizeTrainingStartError(rawMessage);
        runtimeStore.setStartError(safeMessage);
        runtimeStore.setStarting(false);
        return false;
      }

      runtimeStore.setStartQueued(response.job_id, response.message);
      await syncTrainingRuntimeFromBackend();
      return true;
    } catch (error) {
      const rawMessage =
        error instanceof Error ? error.message : "Failed to start training";
      const safeMessage = normalizeTrainingStartError(rawMessage);
      runtimeStore.setStartError(safeMessage);
      runtimeStore.setStarting(false);
      return false;
    }
  }, []);

  const stopTrainingRun = useCallback(async (save = true): Promise<boolean> => {
    const runtimeStore = useTrainingRuntimeStore.getState();
    runtimeStore.setStartError(null);

    try {
      await stopTraining(save);
      await syncTrainingRuntimeFromBackend();
      return true;
    } catch (error) {
      const message =
        error instanceof Error ? error.message : "Failed to stop training";
      runtimeStore.setRuntimeError(message);
      return false;
    }
  }, []);

  const dismissTrainingRun = useCallback(async (): Promise<void> => {
    try {
      await resetTraining();
      useTrainingRuntimeStore.getState().resetRuntime();
    } catch (error) {
      const message =
        error instanceof Error
          ? error.message
          : "Stop training first, then return to configuration.";
      toast.error("Training still active", {
        description: message,
      });
      await syncTrainingRuntimeFromBackend();
    }
  }, []);

  return {
    isStarting,
    startError,
    startTrainingRun,
    stopTrainingRun,
    dismissTrainingRun,
  };
}

function getDatasetName(config: TrainingConfigState): string | null {
  return config.datasetSource === "huggingface"
    ? config.dataset
    : config.uploadedFile;
}

function hasManualMapping(config: TrainingConfigState, isVlm = false, isAudio = false): boolean {
  const mapping = config.datasetManualMapping;
  const roles = new Set(Object.values(mapping));
  if (isAudio) return roles.has("audio") && roles.has("text");
  if (isVlm) return roles.has("image") && roles.has("text");
  const fmt = config.datasetFormat;
  if (fmt === "alpaca") return roles.has("instruction") && roles.has("output");
  if (fmt === "sharegpt") return roles.has("human") && roles.has("gpt");
  return roles.has("user") && roles.has("assistant");
}


================================================
FILE: studio/frontend/src/features/training/hooks/use-training-runtime-lifecycle.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { hasAuthToken } from "@/features/auth";
import { useEffect } from "react";
import {
  getTrainingMetrics,
  getTrainingStatus,
  isAbortError,
  streamTrainingProgress,
} from "../api/train-api";
import { useTrainingRuntimeStore } from "../stores/training-runtime-store";
import type { TrainingRuntimeStore } from "../types/runtime";

const STATUS_POLL_INTERVAL_MS = 3000;
const METRICS_POLL_INTERVAL_MS = 5000;
const STREAM_RECONNECT_DELAY_MS = 1500;

function shouldUseLiveSync(state: TrainingRuntimeStore): boolean {
  return state.isTrainingRunning || state.phase === "training";
}

export function useTrainingRuntimeLifecycle(): void {
  useEffect(() => {
    let disposed = false;
    let openingStream = false;
    let streamController: AbortController | null = null;
    let reconnectTimer: ReturnType<typeof setTimeout> | null = null;

    const runtimeStore = useTrainingRuntimeStore;

    const clearReconnect = () => {
      if (reconnectTimer) {
        clearTimeout(reconnectTimer);
        reconnectTimer = null;
      }
    };

    const stopStream = () => {
      clearReconnect();
      if (streamController) {
        streamController.abort();
        streamController = null;
      }
      runtimeStore.getState().setSseConnected(false);
    };

    const pollMetrics = async () => {
      if (!hasAuthToken()) return;
      const gen = runtimeStore.getState().resetGeneration;
      try {
        const metrics = await getTrainingMetrics();
        if (disposed || runtimeStore.getState().resetGeneration !== gen) {
          return;
        }
        runtimeStore.getState().applyMetrics(metrics);
      } catch (error) {
        if (!isAbortError(error) && !disposed && hasAuthToken()) {
          runtimeStore.getState().setSseConnected(false);
        }
      }
    };

    const pollStatus = async () => {
      if (!hasAuthToken()) return;
      const gen = runtimeStore.getState().resetGeneration;
      try {
        const status = await getTrainingStatus();
        if (disposed || runtimeStore.getState().resetGeneration !== gen) {
          return;
        }

        runtimeStore.getState().applyStatus(status);

        const nextState = runtimeStore.getState();
        if (shouldUseLiveSync(nextState)) {
          void ensureStream();
        } else {
          stopStream();
        }
      } catch (error) {
        if (!isAbortError(error) && !disposed && hasAuthToken()) {
          runtimeStore.getState().setSseConnected(false);
        }
      }
    };

    const ensureStream = async () => {
      const state = runtimeStore.getState();
      if (
        disposed ||
        openingStream ||
        streamController ||
        !shouldUseLiveSync(state)
      ) {
        return;
      }

      clearReconnect();
      openingStream = true;
      const controller = new AbortController();
      streamController = controller;

      try {
        await streamTrainingProgress({
          signal: controller.signal,
          lastEventId: state.lastEventId,
          onOpen: () => {
            runtimeStore.getState().setSseConnected(true);
          },
          onEvent: (event) => {
            const liveStore = runtimeStore.getState();
            if (typeof event.id === "number") {
              liveStore.setLastEventId(event.id);
            }

            liveStore.applyProgress(event.payload, event.id ?? undefined);

            if (event.event === "complete") {
              void pollStatus();
              void pollMetrics();
              stopStream();
            }

            if (event.event === "error") {
              liveStore.setRuntimeError("Training stream error");
              stopStream();
            }
          },
        });
      } catch (error) {
        if (!disposed && !controller.signal.aborted && !isAbortError(error)) {
          runtimeStore.getState().setSseConnected(false);
        }
      } finally {
        openingStream = false;
        if (streamController === controller) {
          streamController = null;
        }
        runtimeStore.getState().setSseConnected(false);

        if (!disposed && !controller.signal.aborted) {
          const liveState = runtimeStore.getState();
          if (shouldUseLiveSync(liveState)) {
            reconnectTimer = setTimeout(() => {
              void ensureStream();
            }, STREAM_RECONNECT_DELAY_MS);
          }
        }
      }
    };

    const hydrate = async () => {
      runtimeStore.getState().setHydrating(true);
      try {
        await Promise.all([pollStatus(), pollMetrics()]);
      } finally {
        if (!disposed) {
          runtimeStore.getState().setHydrating(false);
          runtimeStore.getState().setHasHydrated(true);
        }
      }
    };

    void hydrate();

    const statusTimer = setInterval(() => {
      void pollStatus();
    }, STATUS_POLL_INTERVAL_MS);

    const metricsTimer = setInterval(() => {
      const state = runtimeStore.getState();
      if (shouldUseLiveSync(state) || state.currentStep > 0) {
        void pollMetrics();
      }
    }, METRICS_POLL_INTERVAL_MS);

    return () => {
      disposed = true;
      clearInterval(statusTimer);
      clearInterval(metricsTimer);
      stopStream();
    };
  }, []);
}


================================================
FILE: studio/frontend/src/features/training/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export { useTrainingConfigStore } from "./stores/training-config-store";
export {
  shouldShowTrainingView,
  useTrainingRuntimeStore,
} from "./stores/training-runtime-store";
export { useTrainingActions } from "./hooks/use-training-actions";
export { useTrainingRuntimeLifecycle } from "./hooks/use-training-runtime-lifecycle";
export { useMaxStepsEpochsToggle } from "./hooks/use-max-steps-epochs-toggle";
export { HfDatasetSubsetSplitSelectors } from "./components/hf-dataset-subset-split-selectors";
export { useDatasetPreviewDialogStore } from "./stores/dataset-preview-dialog-store";
export { uploadTrainingDataset } from "./api/datasets-api";
export { listLocalModels } from "./api/models-api";
export type { LocalModelInfo } from "./api/models-api";
export type { TrainingPhase } from "./types/runtime";
export { parseYamlConfig, serializeConfigToYaml } from "./lib/yaml-config";
export { validateTrainingConfig } from "./lib/validation";


================================================
FILE: studio/frontend/src/features/training/stores/dataset-preview-dialog-store.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { create } from "zustand";
import type { CheckFormatResponse } from "../types/datasets";

export type DatasetPreviewDialogMode = "preview" | "mapping";

type DatasetPreviewDialogState = {
  open: boolean;
  mode: DatasetPreviewDialogMode;
  initialData: CheckFormatResponse | null;
};

type DatasetPreviewDialogActions = {
  openPreview: () => void;
  openMapping: (data: CheckFormatResponse) => void;
  close: () => void;
};

const initialState: DatasetPreviewDialogState = {
  open: false,
  mode: "preview",
  initialData: null,
};

export const useDatasetPreviewDialogStore = create<
  DatasetPreviewDialogState & DatasetPreviewDialogActions
>()((set) => ({
  ...initialState,

  openPreview: () => set({ open: true, mode: "preview", initialData: null }),
  openMapping: (data) => set({ open: true, mode: "mapping", initialData: data }),
  close: () => set({ open: false, initialData: null, mode: "preview" }),
}));


================================================
FILE: studio/frontend/src/features/training/stores/training-config-store.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { DEFAULT_HYPERPARAMS, STEPS } from "@/config/training";
import { authFetch } from "@/features/auth";
import type { ModelType, StepNumber, TrainingMethod } from "@/types/training";
import { create } from "zustand";
import { persist } from "zustand/middleware";
import { checkDatasetFormat } from "../api/datasets-api";
import { checkVisionModel, getModelConfig } from "../api/models-api";
import { mapBackendModelConfigToTrainingPatch } from "../lib/model-defaults";
import type { BackendModelConfig } from "../api/models-api";
import type { TrainingConfigState, TrainingConfigStore } from "../types/config";

const MIN_STEP: StepNumber = 1;
const MAX_STEP: StepNumber = STEPS.length as StepNumber;

/**
 * Auto-select LoRA (16-bit) vs QLoRA (4-bit) based on model size and GPU memory.
 *
 * Rule: if model_size_gb * 1.5 * context_scale fits in free VRAM, use "lora" (16-bit).
 * Otherwise use "qlora" (4-bit).
 *
 * Context scale: <=8192 = 1.0, >8192 = 1.7, >=16384 = 2.0, >=32768 = 4.0
 */
async function autoSelectTrainingMethod(
  modelSizeBytes: number,
  contextLength: number,
): Promise<TrainingMethod | null> {
  try {
    const res = await authFetch("/api/system/hardware");
    if (!res.ok) return null;
    const data = await res.json();
    const freeGb: number | null = data?.gpu?.vram_free_gb ?? null;
    if (freeGb == null) return null;

    const modelSizeGb = modelSizeBytes / (1024 ** 3);

    let contextScale = 1.0;
    if (contextLength >= 32768) contextScale = 4.0;
    else if (contextLength >= 16384) contextScale = 2.0;
    else if (contextLength > 8192) contextScale = 1.7;

    const estimatedUsage = modelSizeGb * 1.5 * contextScale;
    return estimatedUsage <= freeGb ? "lora" : "qlora";
  } catch {
    return null;
  }
}

function emptyManualMapping(): TrainingConfigState["datasetManualMapping"] {
  return {};
}

const initialState: TrainingConfigState = {
  currentStep: MIN_STEP,
  modelType: null,
  selectedModel: null,
  trainingMethod: "qlora",
  hfToken: "",
  datasetSource: "huggingface",
  datasetFormat: "auto",
  dataset: null,
  datasetSubset: null,
  datasetSplit: null,
  datasetEvalSplit: null,
  datasetManualMapping: emptyManualMapping(),
  datasetSystemPrompt: "",
  datasetUserTemplate: "",
  datasetAssistantTemplate: "",
  datasetLabelMapping: {},
  datasetAdvisorNotification: null,
  datasetSliceStart: null,
  datasetSliceEnd: null,
  uploadedFile: null,
  uploadedEvalFile: null,
  isCheckingVision: false,
  isVisionModel: false,
  isEmbeddingModel: false,
  isAudioModel: false,
  isLoadingModelDefaults: false,
  modelDefaultsError: null,
  modelDefaultsAppliedFor: null,
  isCheckingDataset: false,
  isDatasetImage: null,
  isDatasetAudio: false,
  maxPositionEmbeddings: null,
  ...DEFAULT_HYPERPARAMS,
};

// AbortController for in-flight dataset multimodal checks.
let _datasetCheckController: AbortController | null = null;

// AbortController for in-flight model default loads.
let _modelConfigController: AbortController | null = null;

// Track whether the user has manually toggled trainOnCompletions
// since the last auto-set (model load or dataset change).
let _trainOnCompletionsManuallySet = false;

const NON_PERSISTED_STATE_KEYS: ReadonlySet<keyof TrainingConfigState> = new Set([
  "modelType",
  "isCheckingVision",
  "isEmbeddingModel",
  "isAudioModel",
  "isLoadingModelDefaults",
  "modelDefaultsError",
  "modelDefaultsAppliedFor",
  "isCheckingDataset",
  "isDatasetImage",
  "isDatasetAudio",
  "trainOnCompletions",
  "maxPositionEmbeddings",
]);

function partializePersistedState(
  state: TrainingConfigStore,
): Partial<TrainingConfigStore> {
  return Object.fromEntries(
    Object.entries(state).filter(([key]) => {
      const stateKey = key as keyof TrainingConfigState;
      return !NON_PERSISTED_STATE_KEYS.has(stateKey);
    }),
  ) as Partial<TrainingConfigStore>;
}

function clampStep(step: number): StepNumber {
  return Math.min(MAX_STEP, Math.max(MIN_STEP, step)) as StepNumber;
}

function canProceedForStep(state: TrainingConfigState): boolean {
  switch (state.currentStep) {
    case 1:
      return state.modelType !== null;
    case 2:
      return state.selectedModel !== null;
    case 3:
      return state.datasetSource === "upload"
        ? state.uploadedFile !== null
        : state.dataset !== null;
    case 4:
    case 5:
      return true;
    default:
      return false;
  }
}

export const useTrainingConfigStore = create<TrainingConfigStore>()(
  persist(
    (set, get) => {
      const loadAndApplyModelDefaults = (modelName: string) => {
        _modelConfigController?.abort();
        const controller = new AbortController();
        _modelConfigController = controller;
        set({
          isLoadingModelDefaults: true,
          isCheckingVision: true,
          modelDefaultsError: null,
        });

        void getModelConfig(modelName, controller.signal, get().hfToken || undefined)
          .then((modelDetails) => {
            if (controller.signal.aborted) return;
            if (get().selectedModel !== modelName) return;

            _trainOnCompletionsManuallySet = false;
            const patch = mapBackendModelConfigToTrainingPatch(modelDetails.config);

            // If vision model + image dataset already known, override
            // trainOnCompletions to false regardless of backend default.
            if (modelDetails.is_vision && get().isDatasetImage === true) {
              patch.trainOnCompletions = false;
            }

            const isAudio = !!modelDetails.is_audio;
            // Pure audio model → always uncheck trainOnCompletions.
            if (isAudio && !modelDetails.is_vision) {
              patch.trainOnCompletions = false;
            }
            // Audio-capable vision model (e.g. gemma3n) + audio dataset → uncheck.
            if (isAudio && modelDetails.is_vision && get().isDatasetAudio) {
              patch.trainOnCompletions = false;
            }

            // Use backend-provided model_type when available, otherwise
            // infer from capability flags.
            const isEmbedding = !!modelDetails.is_embedding;
            const inferredModelType: ModelType = modelDetails.model_type
              ?? (isEmbedding ? "embeddings" : modelDetails.is_vision ? "vision" : modelDetails.is_audio ? "audio" : "text");

            // Auto-select training method based on model size vs GPU memory.
            // If model_size * 1.5 * context_scale fits in free VRAM, use LoRA 16-bit.
            // Otherwise use QLoRA 4-bit.
            const modelSizeBytes = modelDetails.model_size_bytes;
            if (modelSizeBytes && modelSizeBytes > 0) {
              void autoSelectTrainingMethod(modelSizeBytes, patch.contextLength ?? get().contextLength)
                .then((method) => {
                  if (get().selectedModel !== modelName) return;
                  if (method) set({ trainingMethod: method });
                });
            }

            set({
              ...patch,
              modelType: inferredModelType,
              isVisionModel: modelDetails.is_vision,
              isEmbeddingModel: isEmbedding,
              isAudioModel: isAudio,
              isLoadingModelDefaults: false,
              isCheckingVision: false,
              modelDefaultsError: null,
              modelDefaultsAppliedFor: modelName,
              maxPositionEmbeddings: modelDetails.max_position_embeddings ?? null,
            });
          })
          .catch((error) => {
            if (controller.signal.aborted) return;
            if (get().selectedModel !== modelName) return;

            set({
              isLoadingModelDefaults: false,
              isEmbeddingModel: false,
              isAudioModel: false,
              modelDefaultsError:
                error instanceof Error
                  ? error.message
                  : "Failed to load model defaults",
            });

            // Fallback vision check if config endpoint fails.
            void checkVisionModel(modelName)
              .then((isVision) => {
                if (get().selectedModel !== modelName) return;
                set({
                  modelType: isVision ? "vision" : "text",
                  isVisionModel: isVision,
                  isEmbeddingModel: false,
                  isAudioModel: false,
                  isCheckingVision: false,
                });
              })
              .catch(() => {
                if (get().selectedModel !== modelName) return;
                set({ isCheckingVision: false, isEmbeddingModel: false, isAudioModel: false });
              });
          });
      };

      const runDatasetCheck = (datasetName: string, split: string) => {
        _datasetCheckController?.abort();
        const controller = new AbortController();
        _datasetCheckController = controller;
        set({ isCheckingDataset: true });

        const state = get();
        checkDatasetFormat({
          datasetName,
          hfToken: state.hfToken.trim() || null,
          subset: state.datasetSubset,
          split,
          isVlm: state.isVisionModel,
        })
          .then((res) => {
            if (controller.signal.aborted) return;
            const isImage = !!res.is_image;
            const isAudio = !!res.is_audio;
            const updates: Record<string, unknown> = {
              isDatasetImage: isImage,
              isDatasetAudio: isAudio,
              isCheckingDataset: false,
            };
            if (!_trainOnCompletionsManuallySet) {
              const { isVisionModel, isAudioModel } = get();
              if (isVisionModel && isImage) {
                updates.trainOnCompletions = false;
              }
              // Pure audio model → always uncheck regardless of dataset.
              if (isAudioModel && !isVisionModel) {
                updates.trainOnCompletions = false;
              }
              // Audio-capable vision model (e.g. gemma3n) + audio dataset → uncheck.
              if (isAudioModel && isVisionModel && isAudio) {
                updates.trainOnCompletions = false;
              }
            }
            set(updates);
          })
          .catch(() => {
            if (controller.signal.aborted) return;
            set({ isDatasetImage: null, isCheckingDataset: false });
          });
      };

      const resetDatasetState = (): Partial<TrainingConfigStore> => ({
        datasetSubset: null,
        datasetSplit: null,
        datasetEvalSplit: null,
        datasetManualMapping: emptyManualMapping(),
        datasetSystemPrompt: "",
        datasetUserTemplate: "",
        datasetAssistantTemplate: "",
        datasetLabelMapping: {},
        datasetAdvisorNotification: null,
        datasetSliceStart: null,
        datasetSliceEnd: null,
        uploadedEvalFile: null,
        isDatasetImage: null,
        isDatasetAudio: false,
        isCheckingDataset: false,
      });

      return {
        ...initialState,
        setStep: (step) => set({ currentStep: step }),
        nextStep: () => set({ currentStep: clampStep(get().currentStep + 1) }),
        prevStep: () => set({ currentStep: clampStep(get().currentStep - 1) }),
        setModelType: (modelType) => {
          _modelConfigController?.abort();
          _modelConfigController = null;

          set({
            modelType,
            selectedModel: null,
            isCheckingVision: false,
            isVisionModel: false,
            isEmbeddingModel: false,
            isAudioModel: false,
            isDatasetAudio: false,
            isLoadingModelDefaults: false,
            modelDefaultsError: null,
            modelDefaultsAppliedFor: null,
          });
        },
        setSelectedModel: (selectedModel) => {
          const previousModel = get().selectedModel;
          set({ selectedModel, modelDefaultsError: null });

          if (!selectedModel) {
            _modelConfigController?.abort();
            _modelConfigController = null;
            set({
              isCheckingVision: false,
              isVisionModel: false,
              isEmbeddingModel: false,
              isAudioModel: false,
              isDatasetAudio: false,
              isLoadingModelDefaults: false,
              modelDefaultsError: null,
              modelDefaultsAppliedFor: null,
            });
            return;
          }

          const shouldLoadDefaults =
            selectedModel !== previousModel ||
            get().modelDefaultsAppliedFor !== selectedModel;
          if (shouldLoadDefaults) {
            void loadAndApplyModelDefaults(selectedModel);
          }
        },
        ensureModelDefaultsLoaded: () => {
          const state = get();
          if (!state.selectedModel) return;
          if (state.isLoadingModelDefaults) return;
          if (state.modelDefaultsAppliedFor === state.selectedModel) return;
          void loadAndApplyModelDefaults(state.selectedModel);
        },
        setTrainingMethod: (trainingMethod) => set({ trainingMethod }),
        setHfToken: (hfToken) =>
          set({ hfToken: hfToken.trim().replace(/^["']+|["']+$/g, "") }),
        setDatasetSource: (datasetSource) => set({ datasetSource }),
        selectHfDataset: (dataset) => {
          _datasetCheckController?.abort();
          _datasetCheckController = null;
          _trainOnCompletionsManuallySet = false;
          set({
            datasetSource: "huggingface",
            dataset,
            uploadedFile: null,
            ...resetDatasetState(),
          });
        },
        selectLocalDataset: (uploadedFile) => {
          _datasetCheckController?.abort();
          _datasetCheckController = null;
          _trainOnCompletionsManuallySet = false;
          set({
            datasetSource: "upload",
            dataset: null,
            uploadedFile,
            ...resetDatasetState(),
          });
          if (uploadedFile) {
            runDatasetCheck(uploadedFile, "train");
          }
        },
        setDatasetFormat: (datasetFormat) => set({ datasetFormat }),
        setDataset: (dataset) => {
          _datasetCheckController?.abort();
          _datasetCheckController = null;
          _trainOnCompletionsManuallySet = false;
          set({
            dataset,
            datasetSubset: null,
            datasetSplit: null,
            datasetEvalSplit: null,
            datasetManualMapping: emptyManualMapping(),
            datasetSliceStart: null,
            datasetSliceEnd: null,
            isDatasetImage: null,
            isDatasetAudio: false,
            isCheckingDataset: false,
          });
        },
        setDatasetSubset: (datasetSubset) => {
          _datasetCheckController?.abort();
          _datasetCheckController = null;
          _trainOnCompletionsManuallySet = false;
          set({
            datasetSubset,
            datasetSplit: null,
            datasetEvalSplit: null,
            datasetManualMapping: emptyManualMapping(),
            isDatasetImage: null,
            isDatasetAudio: false,
            isCheckingDataset: false,
          });
        },
        setDatasetSplit: (datasetSplit) => {
          set({
            datasetSplit,
            datasetManualMapping: emptyManualMapping(),
            isDatasetImage: null,
            isDatasetAudio: false,
            isCheckingDataset: false,
          });

          const state = get();
          const datasetName =
            state.datasetSource === "huggingface"
              ? state.dataset
              : state.uploadedFile;
          if (!datasetName) return;

          runDatasetCheck(datasetName, datasetSplit || "train");
        },
        ensureDatasetChecked: () => {
          const state = get();
          if (state.isCheckingDataset) return;
          if (state.isDatasetImage !== null) return;

          const datasetName =
            state.datasetSource === "huggingface"
              ? state.dataset
              : state.uploadedFile;
          if (!datasetName) return;

          const split = state.datasetSplit || "train";
          runDatasetCheck(datasetName, split);
        },
        setDatasetEvalSplit: (datasetEvalSplit) => {
          set({
            datasetEvalSplit,
            evalSteps: datasetEvalSplit ? 0.1 : 0,
          });
        },
        setDatasetManualMapping: (datasetManualMapping) =>
          set({ datasetManualMapping }),
        setDatasetAdvisorFields: (fields) =>
          set({
            datasetSystemPrompt: fields.systemPrompt ?? get().datasetSystemPrompt,
            datasetUserTemplate: "",  // templates no longer used
            datasetAssistantTemplate: "",  // templates no longer used
            datasetLabelMapping: fields.labelMapping ?? get().datasetLabelMapping,
            datasetAdvisorNotification: fields.notification !== undefined ? fields.notification : get().datasetAdvisorNotification,
          }),
        clearDatasetAdvisorFields: () =>
          set({
            datasetSystemPrompt: "",
            datasetUserTemplate: "",
            datasetAssistantTemplate: "",
            datasetLabelMapping: {},
            datasetAdvisorNotification: null,
          }),
        setDatasetSliceStart: (datasetSliceStart) => set({ datasetSliceStart }),
        setDatasetSliceEnd: (datasetSliceEnd) => set({ datasetSliceEnd }),
        setUploadedFile: (uploadedFile) => {
          _datasetCheckController?.abort();
          _datasetCheckController = null;
          _trainOnCompletionsManuallySet = false;
          set({
            uploadedFile,
            datasetSubset: null,
            datasetSplit: null,
            datasetEvalSplit: null,
            datasetManualMapping: emptyManualMapping(),
            datasetSliceStart: null,
            datasetSliceEnd: null,
            uploadedEvalFile: null,
            isDatasetImage: null,
            isDatasetAudio: false,
            isCheckingDataset: false,
          });
        },
        setUploadedEvalFile: (uploadedEvalFile) => set({
          uploadedEvalFile,
          evalSteps: uploadedEvalFile ? 0.1 : 0,
        }),
        setEpochs: (epochs) => set({ epochs }),
        setContextLength: (contextLength) => set({ contextLength }),
        setLearningRate: (learningRate) => set({ learningRate }),
        setOptimizerType: (optimizerType) => set({ optimizerType }),
        setLrSchedulerType: (lrSchedulerType) => set({ lrSchedulerType }),
        setLoraRank: (loraRank) => set({ loraRank }),
        setLoraAlpha: (loraAlpha) => set({ loraAlpha }),
        setLoraDropout: (loraDropout) => set({ loraDropout }),
        setLoraVariant: (loraVariant) => set({ loraVariant }),
        setBatchSize: (batchSize) => set({ batchSize }),
        setGradientAccumulation: (gradientAccumulation) =>
          set({ gradientAccumulation }),
        setWeightDecay: (weightDecay) => set({ weightDecay }),
        setWarmupSteps: (warmupSteps) => set({ warmupSteps }),
        setMaxSteps: (maxSteps) => set({ maxSteps }),
        setSaveSteps: (saveSteps) => set({ saveSteps }),
        setEvalSteps: (evalSteps) => set({ evalSteps }),
        setPacking: (packing) => set({ packing }),
        setTrainOnCompletions: (trainOnCompletions) => {
          _trainOnCompletionsManuallySet = true;
          set({ trainOnCompletions });
        },
        setGradientCheckpointing: (gradientCheckpointing) =>
          set({ gradientCheckpointing }),
        setRandomSeed: (randomSeed) => set({ randomSeed }),
        setEnableWandb: (enableWandb) => set({ enableWandb }),
        setWandbToken: (wandbToken) => set({ wandbToken }),
        setWandbProject: (wandbProject) => set({ wandbProject }),
        setEnableTensorboard: (enableTensorboard) => set({ enableTensorboard }),
        setTensorboardDir: (tensorboardDir) => set({ tensorboardDir }),
        setLogFrequency: (logFrequency) => set({ logFrequency }),
        setFinetuneVisionLayers: (finetuneVisionLayers) =>
          set({ finetuneVisionLayers }),
        setFinetuneLanguageLayers: (finetuneLanguageLayers) =>
          set({ finetuneLanguageLayers }),
        setFinetuneAttentionModules: (finetuneAttentionModules) =>
          set({ finetuneAttentionModules }),
        setFinetuneMLPModules: (finetuneMLPModules) =>
          set({ finetuneMLPModules }),
        setTargetModules: (targetModules) => set({ targetModules }),
        canProceed: () => canProceedForStep(get()),
        reset: () => set(initialState),
        resetToModelDefaults: () => {
          const { selectedModel } = get();
          if (!selectedModel) return;
          set({ modelDefaultsAppliedFor: null });
          loadAndApplyModelDefaults(selectedModel);
        },
        applyConfigPatch: (config: BackendModelConfig) => {
          const patch = mapBackendModelConfigToTrainingPatch(config);
          set(patch);
        },
      };
    },
    {
      name: "unsloth_training_config_v1",
      version: 8,
      migrate: (persisted, version) => {
        const s = persisted as Record<string, unknown>;
        if (version < 2 && s.datasetSubset == null && s.datasetConfig != null) {
          s.datasetSubset = s.datasetConfig;
        }
        delete s.datasetConfig;
        if (version < 3 && s.modelDefaultsAppliedFor == null) {
          s.modelDefaultsAppliedFor = null;
        }
        if (version < 4 && s.optimizerType == null) {
          s.optimizerType = DEFAULT_HYPERPARAMS.optimizerType;
        }
        if (version < 5 && s.lrSchedulerType == null) {
          s.lrSchedulerType = DEFAULT_HYPERPARAMS.lrSchedulerType;
        }
        if (version < 6 && s.datasetEvalSplit == null) {
          s.datasetEvalSplit = null;
        }
        if (version < 7) {
          s.datasetSliceStart ??= null;
          s.datasetSliceEnd ??= null;
        }
        if (version < 8) {
          s.datasetSystemPrompt ??= "";
          s.datasetUserTemplate ??= "";
          s.datasetAssistantTemplate ??= "";
          s.datasetLabelMapping ??= {};
          s.datasetAdvisorNotification ??= null;
        }
        return s as unknown as TrainingConfigStore;
      },
      partialize: partializePersistedState,
    },
  ),
);


================================================
FILE: studio/frontend/src/features/training/stores/training-runtime-store.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { create } from "zustand";
import type {
  TrainingMetricsResponse,
  TrainingProgressPayload,
  TrainingRuntimeState,
  TrainingRuntimeStore,
  TrainingSeriesPoint,
  TrainingStatusResponse,
} from "../types/runtime";

const initialState: TrainingRuntimeState = {
  jobId: null,
  phase: "idle",
  isTrainingRunning: false,
  evalEnabled: false,
  message: "Ready to train",
  error: null,
  isHydrating: false,
  hasHydrated: false,
  isStarting: false,
  startError: null,
  sseConnected: false,
  firstStepReceived: false,
  lastEventId: null,
  currentStep: 0,
  totalSteps: 0,
  currentEpoch: 0,
  currentLoss: 0,
  currentLearningRate: 0,
  progressPercent: 0,
  elapsedSeconds: null,
  etaSeconds: null,
  currentGradNorm: null,
  currentNumTokens: null,
  lossHistory: [],
  lrHistory: [],
  gradNormHistory: [],
  evalLossHistory: [],
  resetGeneration: 0,
  stopRequested: false,
};

function sortSeries(points: TrainingSeriesPoint[]): TrainingSeriesPoint[] {
  return [...points].sort((a, b) => a.step - b.step);
}

function toSeries(steps: number[], values: number[]): TrainingSeriesPoint[] {
  const points: TrainingSeriesPoint[] = [];
  for (let i = 0; i < steps.length; i += 1) {
    const step = steps[i];
    const value = values[i];
    if (!Number.isFinite(step) || !Number.isFinite(value)) {
      continue;
    }
    points.push({ step, value });
  }
  return sortSeries(points);
}

function toFiniteNumber(value: unknown): number | null {
  if (typeof value !== "number") return null;
  return Number.isFinite(value) ? value : null;
}

function upsertPoint(
  points: TrainingSeriesPoint[],
  step: number,
  value: number,
): TrainingSeriesPoint[] {
  const next = points.slice();
  const index = next.findIndex((point) => point.step === step);
  if (index >= 0) {
    next[index] = { step, value };
    return next;
  }
  next.push({ step, value });
  return sortSeries(next);
}

function applyMetricHistoryFromStatus(payload: TrainingStatusResponse): {
  lossHistory: TrainingSeriesPoint[] | null;
  lrHistory: TrainingSeriesPoint[] | null;
  gradNormHistory: TrainingSeriesPoint[] | null;
  evalLossHistory: TrainingSeriesPoint[] | null;
} {
  const history = payload.metric_history;
  if (!history || !history.steps?.length) {
    return {
      lossHistory: null,
      lrHistory: null,
      gradNormHistory: null,
      evalLossHistory: null,
    };
  }

  const steps = history.steps;
  const lossHistory = history.loss ? toSeries(steps, history.loss) : null;
  const lrHistory = history.lr ? toSeries(steps, history.lr) : null;
  const gradNormHistory =
    history.grad_norm && history.grad_norm_steps
      ? toSeries(history.grad_norm_steps, history.grad_norm)
      : null;
  const evalLossHistory =
    history.eval_loss && history.eval_steps
      ? toSeries(history.eval_steps, history.eval_loss)
      : null;

  return { lossHistory, lrHistory, gradNormHistory, evalLossHistory };
}

export const useTrainingRuntimeStore = create<TrainingRuntimeStore>()((set) => ({
  ...initialState,

  setStopRequested: (value) => set({ stopRequested: value }),
  setHydrating: (value) => set({ isHydrating: value }),
  setHasHydrated: (value) => set({ hasHydrated: value }),
  setStarting: (value) => set({ isStarting: value }),
  setStartError: (value) => set({ startError: value }),
  setSseConnected: (value) => set({ sseConnected: value }),
  setLastEventId: (value) => set({ lastEventId: value }),

  resetRuntime: () =>
    set((state) => ({
      ...initialState,
      lossHistory: [],
      lrHistory: [],
      gradNormHistory: [],
      evalLossHistory: [],
      resetGeneration: state.resetGeneration + 1,
    })),

  setStartQueued: (jobId, message) =>
    set((state) => ({
      ...state,
      jobId,
      message,
      error: null,
      startError: null,
      phase: "configuring",
      isStarting: false,
      sseConnected: false,
      firstStepReceived: false,
      lastEventId: null,
      currentStep: 0,
      totalSteps: 0,
      currentEpoch: 0,
      currentLoss: 0,
      currentLearningRate: 0,
      progressPercent: 0,
      elapsedSeconds: null,
      etaSeconds: null,
      currentGradNorm: null,
      currentNumTokens: null,
      lossHistory: [],
      lrHistory: [],
      gradNormHistory: [],
      evalLossHistory: [],
      resetGeneration: state.resetGeneration + 1,
    })),

  setRuntimeError: (message) =>
    set({
      error: message,
      phase: "error",
      isStarting: false,
      startError: null,
      sseConnected: false,
    }),

  applyStatus: (payload) =>
    set((state) => {
      const metricHistory = applyMetricHistoryFromStatus(payload);
      const detailStep = payload.details?.step;
      const detailTotal = payload.details?.total_steps;
      const detailLoss = payload.details?.loss;
      const detailLr = payload.details?.learning_rate;
      const detailEpoch = payload.details?.epoch;
      const stopRequested =
        payload.is_training_running ? state.stopRequested : false;

      return {
        ...state,
        jobId: payload.job_id || state.jobId,
        phase: payload.phase,
        isTrainingRunning: payload.is_training_running,
        stopRequested,
        evalEnabled: payload.eval_enabled ?? state.evalEnabled,
        message: payload.message,
        error: payload.error,
        currentStep:
          typeof detailStep === "number" ? Math.max(detailStep, 0) : state.currentStep,
        totalSteps:
          typeof detailTotal === "number"
            ? Math.max(detailTotal, 0)
            : state.totalSteps,
        currentLoss:
          typeof detailLoss === "number" ? detailLoss : state.currentLoss,
        currentLearningRate:
          typeof detailLr === "number" ? detailLr : state.currentLearningRate,
        currentEpoch:
          typeof detailEpoch === "number" ? detailEpoch : state.currentEpoch,
        lossHistory: metricHistory.lossHistory ?? state.lossHistory,
        lrHistory: metricHistory.lrHistory ?? state.lrHistory,
        gradNormHistory: metricHistory.gradNormHistory ?? state.gradNormHistory,
        evalLossHistory: metricHistory.evalLossHistory ?? state.evalLossHistory,
      };
    }),

  applyMetrics: (payload: TrainingMetricsResponse) =>
    set((state) => {
      const lossHistory = toSeries(payload.step_history, payload.loss_history);
      const lrHistory = toSeries(payload.step_history, payload.lr_history);
      const gradNormHistory = toSeries(
        payload.grad_norm_step_history,
        payload.grad_norm_history,
      );
      const latestStep =
        payload.current_step ??
        (payload.step_history.length > 0
          ? payload.step_history[payload.step_history.length - 1]
          : null);

      return {
        ...state,
        lossHistory: lossHistory.length > 0 ? lossHistory : state.lossHistory,
        lrHistory: lrHistory.length > 0 ? lrHistory : state.lrHistory,
        gradNormHistory:
          gradNormHistory.length > 0 ? gradNormHistory : state.gradNormHistory,
        currentStep:
          typeof latestStep === "number"
            ? Math.max(latestStep, state.currentStep)
            : state.currentStep,
        currentLoss:
          typeof payload.current_loss === "number"
            ? payload.current_loss
            : state.currentLoss,
        currentLearningRate:
          typeof payload.current_lr === "number"
            ? payload.current_lr
            : state.currentLearningRate,
      };
    }),

  applyProgress: (payload: TrainingProgressPayload, eventId?: number) =>
    set((state) => {
      const step = Math.max(payload.step, 0);
      const currentLoss = toFiniteNumber(payload.loss);
      const currentLearningRate = toFiniteNumber(payload.learning_rate);
      const currentGradNorm = toFiniteNumber(payload.grad_norm);
      const evalLoss = toFiniteNumber(payload.eval_loss);

      return {
        ...state,
        jobId: payload.job_id || state.jobId,
        currentStep: step,
        totalSteps: Math.max(payload.total_steps, state.totalSteps),
        currentLoss: currentLoss ?? state.currentLoss,
        currentLearningRate: currentLearningRate ?? state.currentLearningRate,
        progressPercent: payload.progress_percent,
        currentEpoch: payload.epoch ?? state.currentEpoch,
        elapsedSeconds: payload.elapsed_seconds,
        etaSeconds: payload.eta_seconds,
        currentGradNorm,
        currentNumTokens: payload.num_tokens,
        firstStepReceived: state.firstStepReceived || step > 0,
        lastEventId: typeof eventId === "number" ? eventId : state.lastEventId,
        lossHistory:
          step > 0 && currentLoss !== null
            ? upsertPoint(state.lossHistory, step, currentLoss)
            : state.lossHistory,
        lrHistory:
          step > 0 && currentLearningRate !== null
            ? upsertPoint(state.lrHistory, step, currentLearningRate)
            : state.lrHistory,
        gradNormHistory:
          step > 0 && currentGradNorm !== null
            ? upsertPoint(state.gradNormHistory, step, currentGradNorm)
            : state.gradNormHistory,
        evalLossHistory:
          step > 0 && evalLoss !== null
            ? upsertPoint(state.evalLossHistory, step, evalLoss)
            : state.evalLossHistory,
      };
    }),
}));

export function shouldShowTrainingView(state: TrainingRuntimeStore): boolean {
  return (
    state.phase !== "idle" ||
    state.isTrainingRunning ||
    state.isStarting ||
    state.lossHistory.length > 0 ||
    state.currentStep > 0
  );
}


================================================
FILE: studio/frontend/src/features/training/types/api.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export interface TrainingStartRequest {
  model_name: string;
  training_type: string;
  hf_token: string | null;
  load_in_4bit: boolean;
  max_seq_length: number;
  /** Allow loading models with custom code. Only enable for repos you trust. */
  trust_remote_code?: boolean;
  hf_dataset: string | null;
  subset: string | null;
  train_split: string | null;
  eval_split: string | null;
  dataset_slice_start: number | null;
  dataset_slice_end: number | null;
  local_datasets: string[];
  local_eval_datasets: string[];
  format_type: string;
  custom_format_mapping?: Record<string, unknown> | null;
  num_epochs: number;
  learning_rate: string;
  batch_size: number;
  gradient_accumulation_steps: number;
  warmup_steps: number | null;
  warmup_ratio: number | null;
  max_steps: number | null;
  save_steps: number;
  eval_steps: number;
  weight_decay: number;
  random_seed: number;
  packing: boolean;
  optim: string;
  lr_scheduler_type: string;
  use_lora: boolean;
  lora_r: number;
  lora_alpha: number;
  lora_dropout: number;
  target_modules: string[];
  gradient_checkpointing: string;
  use_rslora: boolean;
  use_loftq: boolean;
  train_on_completions: boolean;
  finetune_vision_layers: boolean;
  finetune_language_layers: boolean;
  finetune_attention_modules: boolean;
  finetune_mlp_modules: boolean;
  is_dataset_image: boolean;
  is_dataset_audio: boolean;
  is_embedding: boolean;
  enable_wandb: boolean;
  wandb_token: string | null;
  wandb_project: string | null;
  enable_tensorboard: boolean;
  tensorboard_dir: string | null;
}

export interface TrainingStartResponse {
  job_id: string;
  status: "queued" | "error";
  message: string;
  error: string | null;
}

export interface TrainingStopResponse {
  status: "stopped" | "idle";
  message: string;
}


================================================
FILE: studio/frontend/src/features/training/types/config.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type {
  DatasetFormat,
  DatasetSource,
  GradientCheckpointing,
  ModelType,
  StepNumber,
  TrainingMethod,
} from "@/types/training";
import type { BackendModelConfig } from "../api/models-api";

export type LoraVariant = "lora" | "rslora" | "loftq";

/** Column-to-role mapping, e.g. { "problem": "user", "solution": "assistant", "context": "system" } */
export type DatasetManualMapping = Record<string, string>;

export interface TrainingConfigState {
  currentStep: StepNumber;
  modelType: ModelType | null;
  selectedModel: string | null;
  trainingMethod: TrainingMethod;
  hfToken: string;
  datasetSource: DatasetSource;
  datasetFormat: DatasetFormat;
  dataset: string | null;
  datasetSubset: string | null;
  datasetSplit: string | null;
  datasetEvalSplit: string | null;
  datasetManualMapping: DatasetManualMapping;
  datasetSystemPrompt: string;
  datasetUserTemplate: string;
  datasetAssistantTemplate: string;
  datasetLabelMapping: Record<string, Record<string, string>>;
  datasetAdvisorNotification: string | null;
  datasetSliceStart: string | null;
  datasetSliceEnd: string | null;
  uploadedFile: string | null;
  uploadedEvalFile: string | null;
  epochs: number;
  contextLength: number;
  learningRate: number;
  optimizerType: string;
  lrSchedulerType: string;
  loraRank: number;
  loraAlpha: number;
  loraDropout: number;
  loraVariant: LoraVariant;
  batchSize: number;
  gradientAccumulation: number;
  weightDecay: number;
  warmupSteps: number;
  maxSteps: number;
  saveSteps: number;
  evalSteps: number;
  packing: boolean;
  trainOnCompletions: boolean;
  gradientCheckpointing: GradientCheckpointing;
  randomSeed: number;
  enableWandb: boolean;
  wandbToken: string;
  wandbProject: string;
  enableTensorboard: boolean;
  tensorboardDir: string;
  logFrequency: number;
  isCheckingVision: boolean;
  isVisionModel: boolean;
  isEmbeddingModel: boolean;
  isAudioModel: boolean;
  isLoadingModelDefaults: boolean;
  modelDefaultsError: string | null;
  modelDefaultsAppliedFor: string | null;
  isCheckingDataset: boolean;
  isDatasetImage: boolean | null;
  isDatasetAudio: boolean;
  trustRemoteCode: boolean;
  finetuneVisionLayers: boolean;
  finetuneLanguageLayers: boolean;
  finetuneAttentionModules: boolean;
  finetuneMLPModules: boolean;
  targetModules: string[];
  maxPositionEmbeddings: number | null;
}

export interface TrainingConfigActions {
  setStep: (step: StepNumber) => void;
  nextStep: () => void;
  prevStep: () => void;
  setModelType: (type: ModelType) => void;
  setSelectedModel: (model: string | null) => void;
  ensureModelDefaultsLoaded: () => void;
  ensureDatasetChecked: () => void;
  setTrainingMethod: (method: TrainingMethod) => void;
  setHfToken: (token: string) => void;
  setDatasetSource: (source: DatasetSource) => void;
  selectHfDataset: (dataset: string | null) => void;
  selectLocalDataset: (file: string | null) => void;
  setDatasetFormat: (format: DatasetFormat) => void;
  setDataset: (dataset: string | null) => void;
  setDatasetSubset: (subset: string | null) => void;
  setDatasetSplit: (split: string | null) => void;
  setDatasetEvalSplit: (split: string | null) => void;
  setDatasetManualMapping: (mapping: DatasetManualMapping) => void;
  setDatasetAdvisorFields: (fields: {
    systemPrompt?: string;
    labelMapping?: Record<string, Record<string, string>>;
    notification?: string | null;
  }) => void;
  clearDatasetAdvisorFields: () => void;
  setDatasetSliceStart: (value: string | null) => void;
  setDatasetSliceEnd: (value: string | null) => void;
  setUploadedFile: (file: string | null) => void;
  setUploadedEvalFile: (file: string | null) => void;
  setEpochs: (epochs: number) => void;
  setContextLength: (length: number) => void;
  setLearningRate: (rate: number) => void;
  setOptimizerType: (value: string) => void;
  setLrSchedulerType: (value: string) => void;
  setLoraRank: (rank: number) => void;
  setLoraAlpha: (alpha: number) => void;
  setLoraDropout: (dropout: number) => void;
  setLoraVariant: (variant: LoraVariant) => void;
  setBatchSize: (value: number) => void;
  setGradientAccumulation: (value: number) => void;
  setWeightDecay: (value: number) => void;
  setWarmupSteps: (value: number) => void;
  setMaxSteps: (value: number) => void;
  setSaveSteps: (value: number) => void;
  setEvalSteps: (value: number) => void;
  setPacking: (value: boolean) => void;
  setTrainOnCompletions: (value: boolean) => void;
  setGradientCheckpointing: (value: GradientCheckpointing) => void;
  setRandomSeed: (value: number) => void;
  setEnableWandb: (value: boolean) => void;
  setWandbToken: (value: string) => void;
  setWandbProject: (value: string) => void;
  setEnableTensorboard: (value: boolean) => void;
  setTensorboardDir: (value: string) => void;
  setLogFrequency: (value: number) => void;
  setFinetuneVisionLayers: (value: boolean) => void;
  setFinetuneLanguageLayers: (value: boolean) => void;
  setFinetuneAttentionModules: (value: boolean) => void;
  setFinetuneMLPModules: (value: boolean) => void;
  setTargetModules: (value: string[]) => void;
  canProceed: () => boolean;
  reset: () => void;
  resetToModelDefaults: () => void;
  applyConfigPatch: (config: BackendModelConfig) => void;
}

export type TrainingConfigStore = TrainingConfigState & TrainingConfigActions;


================================================
FILE: studio/frontend/src/features/training/types/datasets.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export type CheckFormatResponse = {
  requires_manual_mapping: boolean;
  detected_format: string;
  columns: string[];
  suggested_mapping?: Record<string, string> | null;
  detected_image_column?: string | null;
  detected_audio_column?: string | null;
  detected_text_column?: string | null;
  detected_speaker_column?: string | null;
  preview_samples?: Record<string, unknown>[] | null;
  total_rows?: number | null;
  is_image?: boolean;
  is_audio?: boolean;
  multimodal_columns?: string[] | null;
  warning?: string | null;
};

export type UploadDatasetResponse = {
  filename: string;
  stored_path: string;
};

export type LocalDatasetInfo = {
  metadata?: {
    actual_num_records?: number | null;
    target_num_records?: number | null;
    total_num_batches?: number | null;
    num_completed_batches?: number | null;
    columns?: string[] | null;
  } | null;
  id: string;
  label: string;
  path: string;
  rows?: number | null;
  updated_at?: number | null;
};

export type LocalDatasetsResponse = {
  datasets: LocalDatasetInfo[];
};


================================================
FILE: studio/frontend/src/features/training/types/runtime.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export type TrainingPhase =
  | "idle"
  | "downloading_model"
  | "downloading_dataset"
  | "loading_model"
  | "loading_dataset"
  | "configuring"
  | "training"
  | "completed"
  | "error"
  | "stopped";

export interface TrainingStatusResponse {
  job_id: string;
  phase: TrainingPhase;
  is_training_running: boolean;
  eval_enabled: boolean;
  message: string;
  error: string | null;
  details?: {
    epoch?: number;
    step?: number;
    total_steps?: number;
    loss?: number;
    learning_rate?: number;
  } | null;
  metric_history?: {
    steps?: number[];
    loss?: number[];
    lr?: number[];
    grad_norm?: number[];
    grad_norm_steps?: number[];
    eval_loss?: number[];
    eval_steps?: number[];
  } | null;
}

export interface TrainingMetricsResponse {
  loss_history: number[];
  lr_history: number[];
  step_history: number[];
  grad_norm_history: number[];
  grad_norm_step_history: number[];
  current_loss: number | null;
  current_lr: number | null;
  current_step: number | null;
}

export interface TrainingProgressPayload {
  job_id: string;
  step: number;
  total_steps: number;
  loss: number;
  learning_rate: number;
  progress_percent: number;
  epoch: number | null;
  elapsed_seconds: number | null;
  eta_seconds: number | null;
  grad_norm: number | null;
  num_tokens: number | null;
  eval_loss: number | null;
}

export interface TrainingSeriesPoint {
  step: number;
  value: number;
}

export interface TrainingRuntimeState {
  jobId: string | null;
  phase: TrainingPhase;
  isTrainingRunning: boolean;
  evalEnabled: boolean;
  message: string;
  error: string | null;
  isHydrating: boolean;
  hasHydrated: boolean;
  isStarting: boolean;
  startError: string | null;
  sseConnected: boolean;
  firstStepReceived: boolean;
  lastEventId: number | null;
  currentStep: number;
  totalSteps: number;
  currentEpoch: number;
  currentLoss: number;
  currentLearningRate: number;
  progressPercent: number;
  elapsedSeconds: number | null;
  etaSeconds: number | null;
  currentGradNorm: number | null;
  currentNumTokens: number | null;
  lossHistory: TrainingSeriesPoint[];
  lrHistory: TrainingSeriesPoint[];
  gradNormHistory: TrainingSeriesPoint[];
  evalLossHistory: TrainingSeriesPoint[];
  resetGeneration: number;
  stopRequested: boolean;
}

export interface TrainingRuntimeActions {
  setStopRequested: (value: boolean) => void;
  setHydrating: (value: boolean) => void;
  setHasHydrated: (value: boolean) => void;
  setStarting: (value: boolean) => void;
  setStartError: (value: string | null) => void;
  setSseConnected: (value: boolean) => void;
  setLastEventId: (value: number | null) => void;
  resetRuntime: () => void;
  applyStatus: (payload: TrainingStatusResponse) => void;
  applyMetrics: (payload: TrainingMetricsResponse) => void;
  applyProgress: (payload: TrainingProgressPayload, eventId?: number) => void;
  setStartQueued: (jobId: string, message: string) => void;
  setRuntimeError: (message: string) => void;
}

export type TrainingRuntimeStore = TrainingRuntimeState & TrainingRuntimeActions;


================================================
FILE: studio/frontend/src/hooks/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export { useDebouncedValue } from "./use-debounced-value";
export { useGpuInfo } from "./use-gpu-info";
export { useGpuUtilization } from "./use-gpu-utilization";
export { useHardwareInfo } from "./use-hardware-info";
export { useHfModelSearch } from "./use-hf-model-search";
export { useRecommendedModelVram } from "./use-recommended-model-vram";
export { useHfDatasetSearch } from "./use-hf-dataset-search";
export { useHfDatasetSplits } from "./use-hf-dataset-splits";
export { useHfTokenValidation } from "./use-hf-token-validation";
export { useInfiniteScroll } from "./use-infinite-scroll";


================================================
FILE: studio/frontend/src/hooks/use-debounced-value.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useEffect, useState } from "react";

export function useDebouncedValue<T>(value: T, delayMs = 300): T {
  const [debounced, setDebounced] = useState(value);
  useEffect(() => {
    const id = setTimeout(() => setDebounced(value), delayMs);
    return () => clearTimeout(id);
  }, [value, delayMs]);
  return debounced;
}


================================================
FILE: studio/frontend/src/hooks/use-gpu-info.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useEffect, useState } from "react";

export interface GpuInfo {
  available: boolean;
  name: string;
  memoryTotalGb: number;
  systemRamAvailableGb: number;
}

const DEFAULT_GPU: GpuInfo = {
  available: false,
  name: "Unknown",
  memoryTotalGb: 0,
  systemRamAvailableGb: 0,
};

// Module-level cache so multiple components share one fetch.
let cachedGpu: GpuInfo | null = null;
let fetchPromise: Promise<GpuInfo> | null = null;

async function fetchGpuOnce(): Promise<GpuInfo> {
  if (cachedGpu) return cachedGpu;
  if (fetchPromise) return fetchPromise;

  fetchPromise = (async () => {
    try {
      const res = await fetch("/api/system");
      if (!res.ok) throw new Error(`HTTP ${res.status}`);
      const data = await res.json();
      const gpuData = data?.gpu;
      if (!gpuData?.available || !gpuData.devices?.length) return DEFAULT_GPU;
      const devices = gpuData.devices as Array<{ name?: string; memory_total_gb?: number }>;
      const totalGb = devices.reduce((sum, d) => sum + (d.memory_total_gb ?? 0), 0);
      const info: GpuInfo = {
        available: true,
        name: devices[0]?.name ?? "Unknown",
        memoryTotalGb: totalGb,
        systemRamAvailableGb: data?.memory?.available_gb ?? 0,
      };
      cachedGpu = info;
      return info;
    } catch {
      // Reset promise so subsequent calls retry (e.g. backend wasn't ready)
      fetchPromise = null;
      return DEFAULT_GPU;
    }
  })();

  return fetchPromise;
}

/**
 * Fetch GPU info from the backend /api/system endpoint.
 *
 * The result is cached at module level -- only one network request is made
 * regardless of how many components call this hook.
 */
export function useGpuInfo(): GpuInfo {
  const [gpu, setGpu] = useState<GpuInfo>(cachedGpu ?? DEFAULT_GPU);

  useEffect(() => {
    if (cachedGpu) return;

    let cancelled = false;
    fetchGpuOnce().then((info) => {
      if (!cancelled) setGpu(info);
    });
    return () => { cancelled = true; };
  }, []);

  return gpu;
}


================================================
FILE: studio/frontend/src/hooks/use-gpu-utilization.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { authFetch } from "@/features/auth";
import { useEffect, useRef, useState } from "react";

export interface GpuUtilization {
    available: boolean;
    backend: string | null;
    gpu_utilization_pct: number | null;
    temperature_c: number | null;
    vram_used_gb: number | null;
    vram_total_gb: number | null;
    vram_utilization_pct: number | null;
    power_draw_w: number | null;
    power_limit_w: number | null;
    power_utilization_pct: number | null;
}

const DEFAULT: GpuUtilization = {
    available: false,
    backend: null,
    gpu_utilization_pct: null,
    temperature_c: null,
    vram_used_gb: null,
    vram_total_gb: null,
    vram_utilization_pct: null,
    power_draw_w: null,
    power_limit_w: null,
    power_utilization_pct: null,
};

/**
 * Poll `GET /api/train/hardware` for live GPU utilization stats.
 *
 * Only polls while `enabled` is true (i.e. training is running).
 * Polling interval defaults to 10 000 ms.
 */
export function useGpuUtilization(
    enabled: boolean,
    intervalMs = 10_000,
): GpuUtilization {
    const [data, setData] = useState<GpuUtilization>(DEFAULT);
    const timerRef = useRef<ReturnType<typeof setInterval> | null>(null);

    useEffect(() => {
        if (!enabled) {
            // Reset when training stops so the cards show "--" again
            setData(DEFAULT);
            return;
        }

        let cancelled = false;

        async function poll() {
            try {
                const res = await authFetch("/api/train/hardware");
                if (!res.ok || cancelled) return;
                const json = (await res.json()) as GpuUtilization;
                if (!cancelled) setData(json);
            } catch {
                // Silently ignore — next poll will retry
            }
        }

        // Fetch immediately, then set up interval
        void poll();
        timerRef.current = setInterval(() => void poll(), intervalMs);

        return () => {
            cancelled = true;
            if (timerRef.current) clearInterval(timerRef.current);
        };
    }, [enabled, intervalMs]);

    return data;
}


================================================
FILE: studio/frontend/src/hooks/use-hardware-info.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { authFetch } from "@/features/auth";
import { useEffect, useState } from "react";

export interface HardwareInfo {
    gpuName: string | null;
    vramTotalGb: number | null;
    vramFreeGb: number | null;
    torch: string | null;
    cuda: string | null;
    transformers: string | null;
    unsloth: string | null;
}

const DEFAULT: HardwareInfo = {
    gpuName: null,
    vramTotalGb: null,
    vramFreeGb: null,
    torch: null,
    cuda: null,
    transformers: null,
    unsloth: null,
};

// Module-level cache so multiple components share one fetch.
let cached: HardwareInfo | null = null;
let fetchPromise: Promise<HardwareInfo> | null = null;

async function fetchOnce(): Promise<HardwareInfo> {
    if (cached) return cached;
    if (fetchPromise) return fetchPromise;

    fetchPromise = (async () => {
        try {
            const res = await authFetch("/api/system/hardware");
            if (!res.ok) throw new Error(`HTTP ${res.status}`);
            const data = await res.json();
            const info: HardwareInfo = {
                gpuName: data?.gpu?.gpu_name ?? null,
                vramTotalGb: data?.gpu?.vram_total_gb ?? null,
                vramFreeGb: data?.gpu?.vram_free_gb ?? null,
                torch: data?.versions?.torch ?? null,
                cuda: data?.versions?.cuda ?? null,
                transformers: data?.versions?.transformers ?? null,
                unsloth: data?.versions?.unsloth ?? null,
            };
            cached = info;
            return info;
        } catch {
            // Reset promise so subsequent calls retry (e.g. backend wasn't ready)
            fetchPromise = null;
            return DEFAULT;
        }
    })();

    return fetchPromise;
}

/**
 * Fetch hardware info from `GET /api/system/hardware`.
 *
 * The result is cached at module level — only one network request is made
 * regardless of how many components call this hook.
 */
export function useHardwareInfo(): HardwareInfo {
    const [info, setInfo] = useState<HardwareInfo>(cached ?? DEFAULT);

    useEffect(() => {
        if (cached) return;

        let cancelled = false;
        fetchOnce().then((hw) => {
            if (!cancelled) setInfo(hw);
        });
        return () => { cancelled = true; };
    }, []);

    return info;
}


================================================
FILE: studio/frontend/src/hooks/use-hf-dataset-search.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { listDatasets } from "@huggingface/hub";
import { useCallback, useMemo } from "react";
import type { ModelType } from "@/types/training";
import { useHfPaginatedSearch } from "./use-hf-paginated-search";

interface DatasetInfoSplit {
  name: string;
  // biome-ignore lint/style/useNamingConvention: external schema
  num_bytes: number;
  // biome-ignore lint/style/useNamingConvention: external schema
  num_examples: number;
}

interface CardDataWithInfo {
  size_categories?: string[];
  pretty_name?: string;
  dataset_info?:
    | {
        splits?: DatasetInfoSplit[];
        download_size?: number;
        dataset_size?: number;
      }
    | Array<{ splits?: DatasetInfoSplit[] }>;
}

function extractTotalExamples(
  cardData: CardDataWithInfo | undefined,
): number | undefined {
  if (!cardData?.dataset_info) {
    return undefined;
  }

  const infos = Array.isArray(cardData.dataset_info)
    ? cardData.dataset_info
    : [cardData.dataset_info];

  const examples = infos
    .flatMap((info) => info.splits ?? [])
    .filter((s) => typeof s.num_examples === "number")
    .map((s) => s.num_examples);

  return examples.length > 0 ? examples.reduce((a, b) => a + b, 0) : undefined;
}

export interface HfDatasetResult {
  id: string;
  downloads: number;
  likes: number;
  totalExamples?: number;
  sizeCategory?: string;
  taskCategories: string[];
  plainTags: string[];
}

function mapDataset(raw: unknown): HfDatasetResult {
  const ds = raw as {
    name: string;
    downloads: number;
    likes: number;
    tags?: string[];
    cardData?: unknown;
  };
  const card = ds.cardData as CardDataWithInfo | undefined;
  const tags = ds.tags ?? [];
  const taskCategories = tags
    .filter((t) => t.startsWith("task_categories:"))
    .map((t) => t.slice("task_categories:".length));
  const plainTags = tags.filter((t) => !t.includes(":"));
  return {
    id: ds.name,
    downloads: ds.downloads,
    likes: ds.likes,
    totalExamples: extractTotalExamples(card),
    sizeCategory: card?.size_categories?.[0],
    taskCategories,
    plainTags,
  };
}

function withTrendingSort(
  input: Parameters<typeof fetch>[0],
  init?: Parameters<typeof fetch>[1],
): ReturnType<typeof fetch> {
  const rawUrl =
    typeof input === "string"
      ? input
      : input instanceof URL
        ? input.toString()
        : input.url;
  const url = new URL(rawUrl);

  if (!url.searchParams.has("sort")) {
    url.searchParams.set("sort", "trendingScore");
  }
  if (!url.searchParams.has("direction")) {
    url.searchParams.set("direction", "-1");
  }

  return fetch(url, init);
}

type DatasetRelevance = "incompatible" | "neutral" | "boosted";

const BOOSTED_TASK_CATEGORIES: Record<ModelType, Set<string>> = {
  text: new Set([
    "text-generation",
    "text2text-generation",
    "question-answering",
    "summarization",
    "conversational",
  ]),
  vision: new Set([
    "image-text-to-text",
    "visual-question-answering",
    "image-to-text",
    "image-captioning",
  ]),
  audio: new Set([
    "text-to-speech",
    "text-to-audio",
    "automatic-speech-recognition",
  ]),
  embeddings: new Set([
    "feature-extraction",
    "sentence-similarity",
    "text-retrieval",
  ]),
};

const INCOMPATIBLE_TASKS_ALL_MODELS = new Set([
  "text-to-3d",
  "image-to-3d",
  "robotics",
  "reinforcement-learning",
  "tabular-classification",
  "tabular-regression",
  "time-series-forecasting",
]);

const PRETRAINING_PLAIN_TAGS = new Set(["pretraining", "pre-training"]);
const OCR_PLAIN_TAGS = new Set(["ocr", "document-ocr"]);

const PRETRAINING_SIZE_CATEGORIES = new Set([
  "5M<n<10M",
  "10M<n<100M",
  "100M<n<1B",
  "1B<n<10B",
  "10B<n<100B",
  "100B<n<1T",
  "n>1T",
]);

const OCR_OR_VISION_TEXT_TASKS = new Set([
  "image-to-text",
  "image-captioning",
  "visual-question-answering",
  "document-question-answering",
]);

const CURATED_EMPTY_QUERY_DATASET_IDS: Partial<Record<ModelType, string[]>> = {
  text: [
    "unsloth/alpaca-cleaned",
    "unsloth/OpenMathReasoning-mini",
    "mlabonne/FineTome-100k",
    "openai/gsm8k",
    "philschmid/guanaco-sharegpt-style",
    "open-r1/DAPO-Math-17k-Processed",
    "HuggingFaceH4/Multilingual-Thinking",
    "HuggingFaceH4/ultrafeedback_binarized",
    "reciperesearch/dolphin-sft-v0.1-preference",
    "roneneldan/TinyStories",
    "FreedomIntelligence/alpaca-gpt4-korean",
    "Goedel-LM/SFT_dataset_v2",
    "allenai/tulu-3-sft-mixture",
    "HuggingFaceH4/no_robots",
    "Magpie-Align/Magpie-Air-300K-Filtered",
    "teknium/OpenHermes-2.5",
    "databricks/databricks-dolly-15k",
    "tatsu-lab/alpaca",
    "garage-bAInd/Open-Platypus",
    "microsoft/orca-math-word-problems-200k",
    "Open-Orca/OpenOrca",
    "openbmb/UltraInteract_sft",
  ],
  vision: [
    "unsloth/LaTeX_OCR",
    "unsloth/llava-instruct-mix-vsft-mini",
    "unsloth/Radiology_mini",
    "AI4Math/MathVista",
    "AI4Math/MathVerse",
    "ChongyanChen/VQAonline",
    "lmms-lab/VQAv2",
    "hezarai/parsynth-ocr-200k",
  ],
  audio: [
    "MrDragonFox/Elise",
    "keithito/lj_speech",
    "parler-tts/mls_eng_10k",
    "parler-tts/libritts-r-filtered-speaker-descriptions",
    "openslr/librispeech_asr",
    "MikhailT/hifi-tts",
    "mozilla-foundation/common_voice_17_0",
    "facebook/voxpopuli",
    "speechcolab/gigaspeech",
    "kth-tmh/vctk",
    "Wenetspeech4TTS/WenetSpeech4TTS",
  ],
  embeddings: [
    "electroglyph/technical",
  ],
};

const INCOMPATIBLE_TASKS_BY_MODEL: Record<ModelType, Set<string>> = {
  text: new Set([
    "text-to-image",
    "image-to-image",
    "image-to-video",
    "text-to-video",
    "image-classification",
    "image-feature-extraction",
    "image-text-to-image",
    "zero-shot-image-classification",
    "keypoint-detection",
    "object-detection",
    "image-segmentation",
    "depth-estimation",
    "text-to-speech",
    "text-to-audio",
    "audio-classification",
    "audio-to-audio",
    "automatic-speech-recognition",
    "video-classification",
    "visual-document-retrieval",
  ]),
  vision: new Set([
    "text-to-speech",
    "text-to-audio",
    "audio-classification",
    "audio-to-audio",
    "automatic-speech-recognition",
  ]),
  audio: new Set([
    "text-to-image",
    "image-to-image",
    "image-to-video",
    "text-to-video",
    "image-classification",
    "image-feature-extraction",
    "image-text-to-image",
    "zero-shot-image-classification",
    "keypoint-detection",
    "object-detection",
    "image-segmentation",
    "depth-estimation",
    "video-classification",
    "visual-document-retrieval",
  ]),
  embeddings: new Set([
    "text-to-image",
    "image-to-image",
    "image-to-video",
    "text-to-video",
    "image-classification",
    "image-feature-extraction",
    "image-text-to-image",
    "zero-shot-image-classification",
    "keypoint-detection",
    "object-detection",
    "image-segmentation",
    "depth-estimation",
    "text-to-speech",
    "text-to-audio",
    "audio-classification",
    "audio-to-audio",
    "automatic-speech-recognition",
    "video-classification",
    "visual-document-retrieval",
  ]),
};

function isPretrainingDataset(dataset: HfDatasetResult): boolean {
  if (dataset.plainTags.some((t) => PRETRAINING_PLAIN_TAGS.has(t.toLowerCase())))
    return true;
  if (
    dataset.sizeCategory &&
    PRETRAINING_SIZE_CATEGORIES.has(dataset.sizeCategory)
  )
    return true;
  return false;
}

function rankDatasetRelevance(
  dataset: HfDatasetResult,
  modelType: ModelType,
): DatasetRelevance {
  if (isPretrainingDataset(dataset)) return "incompatible";

  // Keep OCR / vision-text corpora out of non-vision defaults.
  if (modelType !== "vision") {
    if (
      dataset.plainTags.some((t) => OCR_PLAIN_TAGS.has(t.toLowerCase())) ||
      dataset.taskCategories.some((t) => OCR_OR_VISION_TEXT_TASKS.has(t))
    ) {
      return "incompatible";
    }
  }

  const { taskCategories } = dataset;
  if (taskCategories.length === 0) return "neutral";

  const boosted = BOOSTED_TASK_CATEGORIES[modelType];
  const modelIncompat = INCOMPATIBLE_TASKS_BY_MODEL[modelType];

  if (taskCategories.some((t) => boosted.has(t))) return "boosted";
  if (
    taskCategories.every(
      (t) => INCOMPATIBLE_TASKS_ALL_MODELS.has(t) || modelIncompat.has(t),
    )
  )
    return "incompatible";
  return "neutral";
}

function isOcrOrVisionTextDataset(dataset: HfDatasetResult): boolean {
  return (
    dataset.plainTags.some((t) => OCR_PLAIN_TAGS.has(t.toLowerCase())) ||
    dataset.taskCategories.some((t) => OCR_OR_VISION_TEXT_TASKS.has(t))
  );
}

function toCuratedDatasetResult(id: string): HfDatasetResult {
  // Curated defaults are id-only. This adapter satisfies the shared result shape
  // used by downstream combobox/ranking code without making extra HF requests.
  return {
    id,
    downloads: 0,
    likes: 0,
    taskCategories: [],
    plainTags: [],
  };
}

export function useHfDatasetSearch(
  query: string,
  options?: { modelType?: ModelType | null; accessToken?: string; enabled?: boolean },
) {
  const { modelType, accessToken, enabled = true } = options ?? {};
  const hasQuery = query.trim().length > 0;
  const useCuratedOnly = !hasQuery && !!modelType;
  const createIter = useCallback(
    () => {
      // Use curated defaults for typed model flows only.
      if (useCuratedOnly) {
        return (async function* empty() {})() as AsyncGenerator<unknown>;
      }
      return listDatasets({
        search: hasQuery ? { query } : {},
        additionalFields: ["cardData", "tags"],
        fetch: withTrendingSort,
        ...(accessToken ? { credentials: { accessToken } } : {}),
      }) as AsyncGenerator<unknown>;
    },
    [useCuratedOnly, hasQuery, query, accessToken],
  );

  const search = useHfPaginatedSearch(createIter, mapDataset, { enabled });

  const results = useMemo(() => {
    if (!enabled) return [];
    const hideOcr = modelType !== "vision";
    const baseResults = hideOcr
      ? search.results.filter((ds) => !isOcrOrVisionTextDataset(ds))
      : search.results;

    if (!hasQuery && modelType) {
      const curatedIds = CURATED_EMPTY_QUERY_DATASET_IDS[modelType] ?? [];
      return curatedIds.map(toCuratedDatasetResult);
    }

    if (!modelType) return baseResults;

    const boosted: HfDatasetResult[] = [];
    const neutral: HfDatasetResult[] = [];

    for (const ds of baseResults) {
      const relevance = rankDatasetRelevance(ds, modelType);
      if (relevance === "boosted") boosted.push(ds);
      else if (relevance !== "incompatible") neutral.push(ds);
    }

    return [...boosted, ...neutral];
  }, [enabled, search.results, modelType, query]);

  return { ...search, results };
}


================================================
FILE: studio/frontend/src/hooks/use-hf-dataset-splits.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useCallback, useEffect, useState } from "react";

// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------

export interface HfSplitEntry {
  dataset: string;
  config: string;
  split: string;
}

export interface HfSplitsResponse {
  splits: HfSplitEntry[];
  pending: unknown[];
  failed: unknown[];
}

export interface HfDatasetSplitsResult {
  /** All unique subset names found in the dataset */
  subsets: string[];
  /** All split names available for the currently selected subset */
  splits: string[];
  /** Raw split entries from the API */
  entries: HfSplitEntry[];
  /** Whether the dataset has more than one subset */
  hasMultipleSubsets: boolean;
  /** Whether the selected subset has more than one split */
  hasMultipleSplits: boolean;
  /** True while the request is in-flight */
  isLoading: boolean;
  /** Error message if the fetch failed */
  error: string | null;
}

const HF_SPLITS_API = "https://datasets-server.huggingface.co/splits";

function normalizeDatasetSplitsError(message: string): string {
  const normalized = message.toLowerCase();

  // datasets-server returns technical script/runtime details for legacy datasets.
  if (
    normalized.includes("dataset scripts are no longer supported") ||
    normalized.includes("runs arbitrary python code")
  ) {
    return "We can’t load subset/split options for this Hub dataset because it relies on a legacy custom script.";
  }

  if (
    normalized.includes("unauthorized") ||
    normalized.includes("forbidden") ||
    normalized.includes("access token") ||
    normalized.includes("private") ||
    normalized.includes("gated") ||
    normalized.includes("401") ||
    normalized.includes("403")
  ) {
    return "Unable to load dataset splits. This dataset may be private or gated. Add a Hugging Face token with access and try again.";
  }

  if (normalized.includes("not found") || normalized.includes("404")) {
    return "Dataset not found. Check the dataset name and try again.";
  }

  return "Unable to load dataset split options for this dataset.";
}

// ---------------------------------------------------------------------------
// Hook
// ---------------------------------------------------------------------------

/**
 * Fetches the available configs (subsets) and splits for a HuggingFace dataset
 * using the datasets-server API.
 *
 * @param datasetName - HF dataset id (e.g. "ibm/duorc"), or null to skip.
 * @param selectedSubset - Currently selected subset, used to filter splits.
 * @param options.accessToken - Optional HF access token for gated datasets.
 */
export function useHfDatasetSplits(
  datasetName: string | null,
  selectedSubset: string | null,
  options?: { accessToken?: string },
): HfDatasetSplitsResult {
  const [entries, setEntries] = useState<HfSplitEntry[]>([]);
  const [isLoading, setIsLoading] = useState(false);
  const [error, setError] = useState<string | null>(null);

  
  const [prevDatasetName, setPrevDatasetName] = useState(datasetName);
  if (datasetName !== prevDatasetName) {
    setPrevDatasetName(datasetName);
    setEntries([]);
    setError(null);
  }

  const accessToken = options?.accessToken;

  const fetchSplits = useCallback(
    async (dataset: string, signal: AbortSignal) => {
      const url = `${HF_SPLITS_API}?dataset=${encodeURIComponent(dataset)}`;
      const headers: Record<string, string> = {};
      if (accessToken) {
        headers.Authorization = `Bearer ${accessToken}`;
      }

      const res = await fetch(url, { headers, signal });
      if (!res.ok) {
        const body = await res.json().catch(() => null);
        throw new Error(
          body?.error || `Failed to fetch splits (${res.status})`,
        );
      }

      const data: HfSplitsResponse = await res.json();
      return data.splits ?? [];
    },
    [accessToken],
  );

  useEffect(() => {
    if (!datasetName) {
      setEntries([]);
      setError(null);
      setIsLoading(false);
      return;
    }

    const controller = new AbortController();
    setIsLoading(true);
    setError(null);

    fetchSplits(datasetName, controller.signal)
      .then((splits) => {
        if (!controller.signal.aborted) {
          setEntries(splits);
          setError(null);
        }
      })
      .catch((err) => {
        if (!controller.signal.aborted) {
          const rawErrorMessage =
            err instanceof Error
              ? err.message
              : typeof err === "string"
                ? err
                : "Failed to fetch dataset splits";
          console.warn("[useHfDatasetSplits] Failed to fetch dataset splits", {
            datasetName,
            message: rawErrorMessage,
            error: err,
          });
          setError(normalizeDatasetSplitsError(rawErrorMessage));
          setEntries([]);
        }
      })
      .finally(() => {
        if (!controller.signal.aborted) {
          setIsLoading(false);
        }
      });

    return () => controller.abort();
  }, [datasetName, fetchSplits]);

  // Derive unique subsets
  const subsets = Array.from(new Set(entries.map((e) => e.config)));

  // Derive splits for the active subset.
  // If dataset has >1 subset and none is selected yet, return no splits so UI
  // doesn't auto-pick/show a split before subset is chosen.
  const activeSubset =
    selectedSubset ?? (subsets.length === 1 ? subsets[0] : null);
  const filteredEntries = activeSubset
    ? entries.filter((e) => e.config === activeSubset)
    : [];
  const splits = Array.from(new Set(filteredEntries.map((e) => e.split)));

  return {
    subsets,
    splits,
    entries,
    hasMultipleSubsets: subsets.length > 1,
    hasMultipleSplits: activeSubset ? splits.length > 1 : false,
    isLoading,
    error,
  };
}


================================================
FILE: studio/frontend/src/hooks/use-hf-model-search.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import type { PipelineType } from "@huggingface/hub";
import { listModels, modelInfo } from "@huggingface/hub";
import { useCallback, useMemo } from "react";
import { useHfPaginatedSearch } from "./use-hf-paginated-search";

export interface HfModelResult {
  id: string;
  downloads: number;
  likes: number;
  totalParams?: number;
  estimatedSizeBytes?: number;
}

const EXCLUDED_TAGS = new Set([
  "gptq",
  "awq",
  "exl2",
  "mlx",
  "onnx",
  "openvino",
  "coreml",
  "tflite",
  "ctranslate2",
]);

// Embedding / sentence-transformer models ship with onnx/openvino as additional
// export formats — they should not be excluded by the tag check above.
const EMBEDDING_TAGS = new Set([
  "sentence-transformers",
  "feature-extraction",
]);

function withPopularitySort(
  input: Parameters<typeof fetch>[0],
  init?: Parameters<typeof fetch>[1],
): ReturnType<typeof fetch> {
  const rawUrl =
    typeof input === "string"
      ? input
      : input instanceof URL
        ? input.toString()
        : input.url;
  const url = new URL(rawUrl);

  if (!url.searchParams.has("sort")) {
    url.searchParams.set("sort", "downloads");
  }
  if (!url.searchParams.has("direction")) {
    url.searchParams.set("direction", "-1");
  }

  return fetch(url, init);
}

/** Bytes per parameter for each dtype. */
const DTYPE_BYTES: Record<string, number> = {
  F64: 8, F32: 4, F16: 2, BF16: 2,
  I64: 8, I32: 4, I16: 2, I8: 1, U8: 1,
  // Quantized types (4-bit)
  NF4: 0.5, FP4: 0.5, INT4: 0.5, GPTQ: 0.5,
};

function estimateSizeFromDtypes(
  params: Record<string, number> | undefined,
): number | undefined {
  if (!params) return undefined;
  let total = 0;
  for (const [dtype, count] of Object.entries(params)) {
    const bpp = DTYPE_BYTES[dtype.toUpperCase()] ?? 2; // default BF16
    total += count * bpp;
  }
  return total > 0 ? total : undefined;
}

function makeMapModel(excludeGguf: boolean) {
  return (raw: unknown): HfModelResult | null => {
    const m = raw as {
      name: string;
      downloads: number;
      likes: number;
      safetensors?: { total: number; parameters?: Record<string, number> };
      tags?: string[];
    };
    const isEmbedding = m.tags?.some((t) => EMBEDDING_TAGS.has(t));
    if (!isEmbedding && m.tags?.some((t) => EXCLUDED_TAGS.has(t))) {
      return null;
    }
    if (excludeGguf && m.tags?.includes("gguf")) {
      return null;
    }
    return {
      id: m.name,
      downloads: m.downloads,
      likes: m.likes,
      totalParams: m.safetensors?.total,
      estimatedSizeBytes: estimateSizeFromDtypes(m.safetensors?.parameters),
    };
  };
}

/** Number of unsloth results to pull up-front before yielding general results. */
const UNSLOTH_PREFETCH = 20;

/**
 * Creates a merged async generator that yields unsloth-owned models first,
 * then general results (with deduplication).
 */
async function* mergedModelIterator(
  query: string,
  task?: PipelineType,
  accessToken?: string,
): AsyncGenerator<unknown> {
  const common = {
    additionalFields: ["safetensors", "tags"] as ("safetensors" | "tags")[],
    fetch: withPopularitySort,
    ...(accessToken ? { credentials: { accessToken } } : {}),
  };

  // Fire both iterators immediately (parallel network requests on first pull)
  const unslothIter = listModels({
    search: { query, owner: "unsloth", ...(task ? { task } : {}) },
    ...common,
  });
  const generalIter = listModels({
    search: { query, ...(task ? { task } : {}) },
    ...common,
  });

  // Phase 1: pull & yield unsloth models first
  const seen = new Set<string>();
  let count = 0;
  for await (const model of unslothIter) {
    const m = model as { name?: string };
    if (m.name) seen.add(m.name);
    yield model;
    count++;
    if (count >= UNSLOTH_PREFETCH) break;
  }

  // Phase 2: yield general results, skipping already-seen unsloth models
  for await (const model of generalIter) {
    const m = model as { name?: string };
    if (m.name && seen.has(m.name)) continue;
    yield model;
  }
}

/**
 * Creates an async generator that yields priority models (fetched individually
 * via modelInfo for full metadata), then the general unsloth listing.
 */
async function* priorityThenListingIterator(
  priorityIds: readonly string[],
  task?: PipelineType,
  accessToken?: string,
): AsyncGenerator<unknown> {
  const common = {
    additionalFields: ["safetensors", "tags"] as ("safetensors" | "tags")[],
    fetch: withPopularitySort,
    ...(accessToken ? { credentials: { accessToken } } : {}),
  };

  // Phase 1: fetch priority models in parallel via modelInfo
  const seen = new Set<string>();
  const settled = await Promise.allSettled(
    priorityIds.map((id) =>
      modelInfo({
        name: id,
        additionalFields: ["safetensors", "tags"],
        ...(accessToken ? { credentials: { accessToken } } : {}),
      }),
    ),
  );
  for (const result of settled) {
    if (result.status === "fulfilled") {
      const m = result.value as { name?: string; pipeline_tag?: string };
      // Skip models that don't match the selected task filter
      if (task && m.pipeline_tag && m.pipeline_tag !== task) continue;
      if (m.name) seen.add(m.name);
      yield result.value;
    }
  }

  // Phase 2: yield general unsloth listing, skipping already-seen
  const generalIter = listModels({
    search: { owner: "unsloth", ...(task ? { task } : {}) },
    ...common,
  });
  for await (const model of generalIter) {
    const m = model as { name?: string };
    if (m.name && seen.has(m.name)) continue;
    yield model;
  }
}

export function useHfModelSearch(
  query: string,
  options?: {
    task?: PipelineType;
    accessToken?: string;
    excludeGguf?: boolean;
    priorityIds?: readonly string[];
  },
) {
  const { task, accessToken, excludeGguf = false, priorityIds } = options ?? {};

  const createIter = useCallback(
    () => {
      const trimmed = query.trim();
      if (!trimmed) {
        // No query → show priority models first (with full metadata), then general unsloth listing
        if (priorityIds && priorityIds.length > 0) {
          return priorityThenListingIterator(priorityIds, task, accessToken) as AsyncGenerator<unknown>;
        }
        return listModels({
          search: { owner: "unsloth", ...(task ? { task } : {}) },
          additionalFields: ["safetensors", "tags"],
          fetch: withPopularitySort,
          ...(accessToken ? { credentials: { accessToken } } : {}),
        }) as AsyncGenerator<unknown>;
      }
      // Typed query: disable task filter so explicitly searched models still appear even if HF task metadata is wrong/missing.
      return mergedModelIterator(trimmed, undefined, accessToken) as AsyncGenerator<unknown>;
    },
    [query, task, accessToken, priorityIds],
  );

  const mapModel = useMemo(() => makeMapModel(excludeGguf), [excludeGguf]);
  const search = useHfPaginatedSearch(createIter, mapModel);

  // Secondary sort guarantee: unsloth models always float to the top
  const results = useMemo(
    () =>
      [...search.results].sort((a, b) => {
        const aFirst = a.id.startsWith("unsloth/") ? 0 : 1;
        const bFirst = b.id.startsWith("unsloth/") ? 0 : 1;
        return aFirst - bFirst;
      }),
    [search.results],
  );

  return { ...search, results };
}


================================================
FILE: studio/frontend/src/hooks/use-hf-paginated-search.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useCallback, useEffect, useRef, useState } from "react";

interface HfPaginatedState<T> {
  results: T[];
  isLoading: boolean;
  isLoadingMore: boolean;
  hasMore: boolean;
  error: string | null;
}

const INITIAL: HfPaginatedState<never> = {
  results: [],
  isLoading: false,
  isLoadingMore: false,
  hasMore: false,
  error: null,
};
const BATCH = 20;

async function pullBatch<T>(
  iter: AsyncGenerator<unknown>,
  mapItem: (raw: unknown) => T | null,
  size: number,
) {
  const items: T[] = [];
  while (items.length < size) {
    const result = await iter.next();
    if (result.done) {
      return { items, done: true };
    }
    const mapped = mapItem(result.value);
    if (mapped !== null) {
      items.push(mapped);
    }
  }
  return { items, done: false };
}

export function useHfPaginatedSearch<T>(
  createIter: () => AsyncGenerator<unknown>,
  mapItem: (raw: unknown) => T | null,
  options?: { enabled?: boolean },
): HfPaginatedState<T> & { fetchMore: () => void } {
  const enabled = options?.enabled ?? true;
  const [state, setState] = useState<HfPaginatedState<T>>(
    INITIAL as HfPaginatedState<T>,
  );
  const stateRef = useRef(state);
  useEffect(() => {
    stateRef.current = state;
  }, [state]);

  const iterRef = useRef<AsyncGenerator<unknown> | null>(null);
  const versionRef = useRef(0);

  useEffect(() => {
    const v = ++versionRef.current;
    iterRef.current = null;

    if (!enabled) {
      setState(INITIAL as HfPaginatedState<T>);
      return;
    }

    setState({
      ...(INITIAL as HfPaginatedState<T>),
      isLoading: true,
    });

    const iter = createIter();
    iterRef.current = iter;

    pullBatch(iter, mapItem, BATCH)
      .then(({ items, done }) => {
        if (versionRef.current !== v) {
          return;
        }
        setState({
          results: items,
          isLoading: false,
          isLoadingMore: false,
          hasMore: !done,
          error: null,
        });
      })
      .catch((err) => {
        if (versionRef.current !== v) {
          return;
        }
        setState({
          results: [],
          isLoading: false,
          isLoadingMore: false,
          hasMore: false,
          error: err instanceof Error ? err.message : "Search failed",
        });
      });
  }, [createIter, mapItem, enabled]);

  const fetchMore = useCallback(() => {
    const iter = iterRef.current;
    const { isLoading, isLoadingMore, hasMore } = stateRef.current;
    if (!iter || isLoading || isLoadingMore || !hasMore) {
      return;
    }

    const v = versionRef.current;
    setState((prev) => ({ ...prev, isLoadingMore: true }));

    pullBatch(iter, mapItem, BATCH)
      .then(({ items, done }) => {
        if (versionRef.current !== v) {
          return;
        }
        setState((prev) => ({
          ...prev,
          results: [...prev.results, ...items],
          isLoadingMore: false,
          hasMore: !done,
        }));
      })
      .catch(() => {
        if (versionRef.current !== v) {
          return;
        }
        setState((prev) => ({ ...prev, isLoadingMore: false, hasMore: false }));
      });
  }, [mapItem]);

  return { ...state, fetchMore };
}


================================================
FILE: studio/frontend/src/hooks/use-hf-token-validation.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { whoAmI } from "@huggingface/hub";
import { useCallback, useEffect, useRef, useState } from "react";
import { useDebouncedValue } from "./use-debounced-value";

export interface HfTokenValidationState {
  isValid: boolean | null;
  error: string | null;
  isChecking: boolean;
}

const INITIAL: HfTokenValidationState = {
  isValid: null,
  error: null,
  isChecking: false,
};

/**
 * Validates the Hugging Face token by calling the whoami-v2 API.
 * Debounces the token to avoid excessive requests while typing.
 * Returns validation state: isValid (null = not checked), error message, and isChecking.
 */
export function useHfTokenValidation(token: string): HfTokenValidationState {
  const debouncedToken = useDebouncedValue(
    token.trim().replace(/^["']+|["']+$/g, ""),
    500,
  );
  const [state, setState] = useState<HfTokenValidationState>(INITIAL);
  const versionRef = useRef(0);

  const runCheck = useCallback(async (t: string) => {
    if (!t) {
      setState({ isValid: null, error: null, isChecking: false });
      return;
    }

    const v = ++versionRef.current;
    setState((prev) => ({ ...prev, isChecking: true, error: null }));

    try {
      await whoAmI({ accessToken: t });
      if (versionRef.current !== v) return;
      setState({ isValid: true, error: null, isChecking: false });
    } catch {
      if (versionRef.current !== v) return;
      setState({
        isValid: false,
        error: "invalid or expired token",
        isChecking: false,
      });
    }
  }, []);

  useEffect(() => {
    if (!debouncedToken) {
      setState(INITIAL);
      return;
    }
    runCheck(debouncedToken);
  }, [debouncedToken, runCheck]);

  return state;
}


================================================
FILE: studio/frontend/src/hooks/use-infinite-scroll.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useEffect, useRef } from "react";

export function useInfiniteScroll(fetchMore: () => void, _itemCount: number) {
  const scrollRef = useRef<HTMLDivElement>(null);
  const sentinelRef = useRef<HTMLDivElement>(null);

  useEffect(() => {
    const el = sentinelRef.current;
    if (!el) {
      return;
    }
    const obs = new IntersectionObserver(
      ([e]) => {
        if (e.isIntersecting) {
          fetchMore();
        }
      },
      { threshold: 0, root: scrollRef.current },
    );
    obs.observe(el);
    return () => obs.disconnect();
  }, [fetchMore]);

  return { scrollRef, sentinelRef };
}


================================================
FILE: studio/frontend/src/hooks/use-mobile.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useEffect, useState } from "react";

const MOBILE_BREAKPOINT = 768;

export function useIsMobile() {
  const [isMobile, setIsMobile] = useState<boolean | undefined>(undefined);

  useEffect(() => {
    const mql = window.matchMedia(`(max-width: ${MOBILE_BREAKPOINT - 1}px)`);
    const onChange = () => {
      setIsMobile(window.innerWidth < MOBILE_BREAKPOINT);
    };
    mql.addEventListener("change", onChange);
    setIsMobile(window.innerWidth < MOBILE_BREAKPOINT);
    return () => mql.removeEventListener("change", onChange);
  }, []);

  return !!isMobile;
}


================================================
FILE: studio/frontend/src/hooks/use-recommended-model-vram.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { modelInfo } from "@huggingface/hub";
import { useEffect, useState } from "react";

/**
 * Fetches Hugging Face model info (safetensors total param count) for a list of
 * model IDs. Used to show VRAM fit (FIT / TIGHT / OOM) for recommended/default
 * models in the chat model dropdown.
 */
export function useRecommendedModelVram(ids: string[]) {
  const [paramCountById, setParamCountById] = useState<
    Map<string, number>
  >(new Map());
  const [isLoading, setIsLoading] = useState(false);

  const stableKey = [...ids].filter(Boolean).sort().join(",");

  useEffect(() => {
    const stableIds = stableKey ? stableKey.split(",") : [];
    if (stableIds.length === 0) {
      setParamCountById(new Map());
      setIsLoading(false);
      return;
    }
    let canceled = false;
    void (async () => {
      setIsLoading(true);
      const next = new Map<string, number>();
      await Promise.all(
        stableIds.map(async (id) => {
          if (canceled) return;
          try {
            const info = await modelInfo({
              name: id,
              additionalFields: ["safetensors"],
            });
            const raw = info as { safetensors?: { total?: number } };
            const total = raw.safetensors?.total;
            if (typeof total === "number" && total > 0) {
              next.set(id, total);
            }
          } catch {
            // Model not on HF or no safetensors; skip
          }
        }),
      );
      if (!canceled) {
        setParamCountById(next);
        setIsLoading(false);
      }
    })();
    return () => {
      canceled = true;
    };
  }, [stableKey]);

  return { paramCountById, isLoading };
}


================================================
FILE: studio/frontend/src/index.css
================================================
/* SPDX-License-Identifier: AGPL-3.0-only */
/* Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0 */

@import "tailwindcss";
@import "tw-animate-css";
@import "shadcn/tailwind.css";
@import "streamdown/styles.css";
@import "@fontsource-variable/figtree";
@import "@fontsource-variable/space-grotesk";
@import "@fontsource-variable/inter";
@import "tw-shimmer";
@plugin "@toolwind/corner-shape";
@source "../node_modules/streamdown/dist/*.js";

@font-face {
  font-family: "Hellix";
  src: url("/fonts/Hellix-SemiBold.woff2") format("woff2"),
    url("/fonts/Hellix-SemiBold.woff") format("woff");
  font-weight: 600;
  font-style: normal;
  font-display: swap;
}

@custom-variant dark (&:is(.dark *));

:root {
  /* Animation timing */
  --duration-micro: 100ms;
  --duration-fast: 150ms;
  --duration-normal: 200ms;

  /* Easing curves (Emil Kowalski) */
  --ease-out-quart: cubic-bezier(0.165, 0.84, 0.44, 1);
  --ease-out-cubic: cubic-bezier(0.215, 0.61, 0.355, 1);

  --background: oklch(1 0 0);
  --foreground: oklch(0.2686 0 0);
  --card: oklch(1 0 0);
  --card-foreground: oklch(0.1281 0.0179 169.2764);
  --popover: oklch(1 0 0);
  --popover-foreground: oklch(0.1281 0.0179 169.2764);
  --primary: oklch(0.6929 0.1396 166.5513);
  --primary-foreground: oklch(1 0 0);
  --secondary: oklch(0.9596 0.0275 167.8295);
  --secondary-foreground: oklch(0.2868 0.0649 159.9823);
  --muted: oklch(0.9702 0 0);
  --muted-foreground: oklch(0.5486 0 0);
  --accent: oklch(0.9596 0.0275 167.8295);
  --accent-foreground: oklch(0.2868 0.0649 159.9823);
  --destructive: oklch(0.6368 0.2078 25.3313);
  --border: oklch(0.9208 0.0101 164.8536);
  --input: oklch(0.9208 0.0101 164.8536);
  --ring: oklch(0.6929 0.1396 166.5513);
  --chart-1: oklch(0.6929 0.1396 166.5513);
  --chart-2: oklch(0.694 0.1395 136.6059);
  --chart-3: oklch(0.7014 0.1193 197.5897);
  --chart-4: oklch(0.6926 0.1112 346.5775);
  --chart-5: oklch(0.7497 0.1003 85.0057);
  --radius: 1.2rem;
  --sidebar: oklch(0.975 0 0);
  --sidebar-foreground: oklch(0.1281 0.0179 169.2764);
  --sidebar-primary: oklch(0.6929 0.1396 166.5513);
  --sidebar-primary-foreground: oklch(1 0 0);
  --sidebar-accent: oklch(0.96 0.0279 166.55);
  --sidebar-accent-foreground: oklch(0.2868 0.0649 159.9823);
  --sidebar-border: oklch(0.9208 0.0101 164.8536);
  --sidebar-ring: oklch(0.6929 0.1396 166.5513);
  --destructive-foreground: oklch(1 0 0);
  --font-sans: "Inter Variable", ui-sans-serif, sans-serif, system-ui;
  --font-heading: "Hellix", "Space Grotesk Variable", ui-sans-serif, sans-serif;
  --font-serif: Source Serif 4, serif;
  --font-mono: JetBrains Mono, monospace;
  --shadow-color: hsl(0 0% 0%);
  --shadow-opacity: 0;
  --shadow-blur: 0px;
  --shadow-spread: 0px;
  --shadow-offset-x: 0px;
  --shadow-offset-y: 0px;
  --letter-spacing: 0em;
  --spacing: 0.25rem;
  /*--shadow-2xs: 0px 0px 0px 0px hsl(0 0% 0% / 0);*/
  /*--shadow-xs: 0px 0px 0px 0px hsl(0 0% 0% / 0);*/
  /*--shadow-sm:*/
  /*    0px 0px 0px 0px hsl(0 0% 0% / 0),*/
  /*    0px 1px 2px 0px hsl(0 0% 0% / 0);*/
  /*--shadow:*/
  /*    0px 0px 0px 0px hsl(0 0% 0% / 0),*/
  /*    0px 1px 2px 0px hsl(0 0% 0% / 0);*/
  /*--shadow-md:*/
  /*    0px 0px 0px 0px hsl(0 0% 0% / 0),*/
  /*    0px 2px 4px 0px hsl(0 0% 0% / 0);*/
  /*--shadow-lg:*/
  /*    0px 0px 0px 0px hsl(0 0% 0% / 0),*/
  /*    0px 4px 6px 0px hsl(0 0% 0% / 0);*/
  /*--shadow-xl:*/
  /*    0px 0px 0px 0px hsl(0 0% 0% / 0),*/
  /*    0px 8px 10px 0px hsl(0 0% 0% / 0);*/
  /*--shadow-2xl: 0px 0px 0px 0px hsl(0 0% 0% / 0);*/
  --tracking-normal: 0em;
}
.dark {
  --background: oklch(0.24 0 0);
  --foreground: oklch(0.98 0 0);
  --card: oklch(0.28 0 0);
  --card-foreground: oklch(0.98 0 0);
  --popover: oklch(0.28 0 0);
  --popover-foreground: oklch(0.98 0 0);
  --primary: oklch(0.6929 0.1396 166.5513);
  --primary-foreground: oklch(1 0 0);
  --secondary: oklch(0.33 0 0);
  --secondary-foreground: oklch(0.98 0 0);
  --muted: oklch(0.33 0 0);
  --muted-foreground: oklch(0.70 0 0);
  --accent: oklch(0.33 0 0);
  --accent-foreground: oklch(0.98 0 0);
  --destructive: oklch(0.6368 0.2078 25.3313);
  --border: oklch(0.38 0 0);
  --input: oklch(0.38 0 0);
  --ring: oklch(0.6929 0.1396 166.5513);
  --chart-1: oklch(0.7511 0.1407 166.2284);
  --chart-2: oklch(0.75 0.14 136.5572);
  --chart-3: oklch(0.7554 0.1285 197.339);
  --chart-4: oklch(0.7503 0.1199 346.7805);
  --chart-5: oklch(0.799 0.1196 84.6633);
  --sidebar: oklch(0.24 0 0);
  --sidebar-foreground: oklch(0.98 0 0);
  --sidebar-primary: oklch(0.6929 0.1396 166.5513);
  --sidebar-primary-foreground: oklch(1 0 0);
  --sidebar-accent: oklch(0.33 0 0);
  --sidebar-accent-foreground: oklch(0.98 0 0);
  --sidebar-border: oklch(0.38 0 0);
  --sidebar-ring: oklch(0.6929 0.1396 166.5513);
  --destructive-foreground: oklch(1 0 0);
  --radius: 1.2rem;
  --font-sans: Geist, ui-sans-serif, sans-serif, system-ui;
  --font-serif: Source Serif 4, serif;
  --font-mono: JetBrains Mono, monospace;
  --shadow-color: hsl(0 0% 0%);
  --shadow-opacity: 0;
  --shadow-blur: 0px;
  --shadow-spread: 0px;
  --shadow-offset-x: 0px;
  --shadow-offset-y: 0px;
  --letter-spacing: 0em;
  --spacing: 0.25rem;
  --shadow-2xs: 0px 0px 0px 0px hsl(0 0% 0% / 0);
  --shadow-xs: 0px 0px 0px 0px hsl(0 0% 0% / 0);
  --shadow-sm: 0px 0px 0px 0px hsl(0 0% 0% / 0), 0px 1px 2px 0px
  hsl(0 0% 0% / 0);
  --shadow: 0px 0px 0px 0px hsl(0 0% 0% / 0), 0px 1px 2px 0px hsl(0 0% 0% / 0);
  --shadow-md: 0px 0px 0px 0px hsl(0 0% 0% / 0), 0px 2px 4px 0px
  hsl(0 0% 0% / 0);
  --shadow-lg: 0px 0px 0px 0px hsl(0 0% 0% / 0), 0px 4px 6px 0px
  hsl(0 0% 0% / 0);
  --shadow-xl: 0px 0px 0px 0px hsl(0 0% 0% / 0), 0px 8px 10px 0px
  hsl(0 0% 0% / 0);
  --shadow-2xl: 0px 0px 0px 0px hsl(0 0% 0% / 0);
}

@theme inline {
  --font-sans: "Inter Variable", ui-sans-serif, sans-serif, system-ui;
  --font-heading: "Hellix", "Space Grotesk Variable", ui-sans-serif, sans-serif;
  --color-sidebar-ring: var(--sidebar-ring);
  --color-sidebar-border: var(--sidebar-border);
  --color-sidebar-accent-foreground: var(--sidebar-accent-foreground);
  --color-sidebar-accent: var(--sidebar-accent);
  --color-sidebar-primary-foreground: var(--sidebar-primary-foreground);
  --color-sidebar-primary: var(--sidebar-primary);
  --color-sidebar-foreground: var(--sidebar-foreground);
  --color-sidebar: var(--sidebar);
  --color-chart-5: var(--chart-5);
  --color-chart-4: var(--chart-4);
  --color-chart-3: var(--chart-3);
  --color-chart-2: var(--chart-2);
  --color-chart-1: var(--chart-1);
  --color-ring: var(--ring);
  --color-input: var(--input);
  --color-border: var(--border);
  --color-destructive: var(--destructive);
  --color-accent-foreground: var(--accent-foreground);
  --color-accent: var(--accent);
  --color-muted-foreground: var(--muted-foreground);
  --color-muted: var(--muted);
  --color-secondary-foreground: var(--secondary-foreground);
  --color-secondary: var(--secondary);
  --color-primary-foreground: var(--primary-foreground);
  --color-primary: var(--primary);
  --color-popover-foreground: var(--popover-foreground);
  --color-popover: var(--popover);
  --color-card-foreground: var(--card-foreground);
  --color-card: var(--card);
  --color-foreground: var(--foreground);
  --color-background: var(--background);
  --radius-sm: calc(var(--radius) - 4px);
  --radius-md: calc(var(--radius) - 2px);
  --radius-lg: var(--radius);
  --radius-xl: calc(var(--radius) + 4px);
  --radius-2xl: calc(var(--radius) + 8px);
  --radius-3xl: calc(var(--radius) + 12px);
  --radius-4xl: calc(var(--radius) + 16px);
  --font-mono: JetBrains Mono, monospace;
  --font-serif: Source Serif 4, serif;
  --radius: 1.2rem;
  --tracking-tighter: calc(var(--tracking-normal) - 0.05em);
  --tracking-tight: calc(var(--tracking-normal) - 0.025em);
  --tracking-wide: calc(var(--tracking-normal) + 0.025em);
  --tracking-wider: calc(var(--tracking-normal) + 0.05em);
  --tracking-widest: calc(var(--tracking-normal) + 0.1em);
  --tracking-normal: var(--tracking-normal);
  /*--shadow-2xl: var(--shadow-2xl);*/
  /*--shadow-xl: var(--shadow-xl);*/
  /*--shadow-lg: var(--shadow-lg);*/
  /*--shadow-md: var(--shadow-md);*/
  /*--shadow: var(--shadow);*/
  /*--shadow-sm: var(--shadow-sm);*/
  /*--shadow-xs: var(--shadow-xs);*/
  /*--shadow-2xs: var(--shadow-2xs);*/
  /*--spacing: var(--spacing);*/
  /*--letter-spacing: var(--letter-spacing);*/
  /*--shadow-offset-y: var(--shadow-offset-y);*/
  /*--shadow-offset-x: var(--shadow-offset-x);*/
  /*--shadow-spread: var(--shadow-spread);*/
  /*--shadow-blur: var(--shadow-blur);*/
  /*--shadow-opacity: var(--shadow-opacity);*/
  /*--color-shadow-color: var(--shadow-color);*/
  --color-destructive-foreground: var(--destructive-foreground);
  --animate-pulse: pulse var(--duration) ease-out infinite;
  @keyframes pulse {
    0%,
    100% {
      box-shadow: 0 0 0 0 var(--pulse-color);
    }
    50% {
      box-shadow: 0 0 0 8px var(--pulse-color);
    }
  }
  --animate-shiny-text: shiny-text 8s infinite;
  @keyframes shiny-text {
    0%,
    90%,
    100% {
      background-position: calc(-100% - var(--shiny-width)) 0;
    }
    30%,
    60% {
      background-position: calc(100% + var(--shiny-width)) 0;
    }
  }
  --animate-shine: shine var(--duration) infinite linear
;
  @keyframes shine {
  0% {
    background-position: 0% 0%;
    }
  50% {
    background-position: 100% 100%;
    }
  to {
    background-position: 0% 0%;
    }
  }}

@layer base {
  * {
    @apply border-border outline-ring/50;
  }
  body {
    @apply font-sans bg-background text-foreground;
    letter-spacing: var(--tracking-normal);
  }
  html {
    @apply font-sans;
    scrollbar-gutter: stable;
  }
  body[data-scroll-locked] {
    margin-right: 0 !important;
  }
  h1,
  h2,
  h3,
  h4,
  h5,
  h6 {
    font-family: var(--font-heading);
  }
  .font-medium,
  .font-semibold,
  .font-bold {
    font-family: var(--font-heading);
  }
}

@layer utilities {
  /* Heading font utility */
  .font-heading {
    font-family: var(--font-heading);
  }

  /* Elevated surface shadow (use ring-* for borders) */
  .shadow-border {
    --tw-shadow: 0 4px 16px rgba(0, 0, 0, 0.1);
    --tw-shadow-colored: 0 4px 16px var(--tw-shadow-color);
    box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000),
      var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow);
  }
  .dark .shadow-border {
    --tw-shadow: 0 4px 16px rgba(0, 0, 0, 0.3);
  }

  [data-streamdown="unordered-list"] {
    list-style-type: disc;
    list-style-position: outside;
    padding-left: 1.25rem;
    margin-block: 0.5rem;
  }

  [data-streamdown="ordered-list"] {
    list-style-type: decimal;
    list-style-position: outside;
    padding-left: 1.25rem;
    margin-block: 0.5rem;
  }

  [data-streamdown="list-item"] {
    display: list-item;
  }

  /* Flatten code blocks: single border, language label, then code directly */
  [data-streamdown="code-block-body"] {
    border: none !important;
    border-radius: 0 !important;
    background: transparent !important;
    padding: 0 !important;
  }
  [data-streamdown="code-block"] {
    gap: 0;
    padding: 0.5rem;
  }
  [data-streamdown="code-block-header"] {
    padding-left: 0.75rem;
  }
}

/* Minimal scrollbar — thumb only, no track */
* {
  scrollbar-width: thin;
  scrollbar-color: transparent transparent;
}
*:hover {
  scrollbar-color: oklch(0.6 0 0 / 0.3) transparent;
}
.dark *:hover {
  scrollbar-color: oklch(0.5 0 0 / 0.35) transparent;
}

/* Webkit (Chrome, Safari, Edge) */
::-webkit-scrollbar {
  width: 6px;
  height: 6px;
}
::-webkit-scrollbar-track {
  background: transparent;
}
::-webkit-scrollbar-thumb {
  background: transparent;
  border-radius: 9999px;
}
*:hover::-webkit-scrollbar-thumb {
  background: oklch(0.6 0 0 / 0.3);
}
.dark *:hover::-webkit-scrollbar-thumb {
  background: oklch(0.5 0 0 / 0.35);
}

/*---break---*/

@layer base {
  * {
    @apply border-border outline-ring/50;
  }
  body {
    @apply bg-background text-foreground;
  }
}

::view-transition-old(root), ::view-transition-new(root) {
    animation: none;
    mix-blend-mode: normal;
}

================================================
FILE: studio/frontend/src/main.tsx
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { StrictMode } from "react";
import { createRoot } from "react-dom/client";

import "./index.css";
import { fetchDeviceType } from "./config/env";
import { App } from "./app/app";

const globalCrypto = globalThis.crypto as Crypto | undefined;

if (globalCrypto && typeof globalCrypto.randomUUID !== "function") {
  // Some envs ship `crypto` but no `randomUUID()` (or a non-function stub).
  // Provide a best-effort v4 UUID using `getRandomValues` when available.
  const cryptoRef = globalCrypto;

  function getRandomByte(): number {
    if (typeof cryptoRef.getRandomValues === "function") {
      return cryptoRef.getRandomValues(new Uint8Array(1))[0];
    }
    return Math.floor(Math.random() * 256);
  }

  cryptoRef.randomUUID = (() =>
    "10000000-1000-4000-8000-100000000000".replace(/[018]/g, (c) =>
      (+c ^ (getRandomByte() & (15 >> (+c / 4)))).toString(16),
    )) as Crypto["randomUUID"];
}

const rootElement = document.getElementById("root");
if (!rootElement) {
  throw new Error("Root element not found");
}

fetchDeviceType().then(() => {
  createRoot(rootElement).render(
    <StrictMode>
      <App />
    </StrictMode>,
  );
});


================================================
FILE: studio/frontend/src/shared/toast.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { toast } from "sonner";

export function toastSuccess(message: string): void {
  toast.success(message);
}

export function toastError(message: string, description?: string): void {
  toast.error(message, {
    description,
  });
}


================================================
FILE: studio/frontend/src/speech-recognition.d.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

/**
 * Minimal Web Speech API (Speech Recognition) types for browsers that support it.
 * Full types: @types/dom-speech-recognition
 */
interface SpeechRecognitionResultList {
  readonly length: number;
  item(index: number): SpeechRecognitionResult;
  [index: number]: SpeechRecognitionResult;
}

interface SpeechRecognitionResult {
  readonly length: number;
  readonly isFinal: boolean;
  item(index: number): SpeechRecognitionAlternative;
  [index: number]: SpeechRecognitionAlternative;
}

interface SpeechRecognitionAlternative {
  readonly transcript: string;
  readonly confidence: number;
}

interface SpeechRecognitionEvent extends Event {
  readonly resultIndex: number;
  readonly results: SpeechRecognitionResultList;
}

interface SpeechRecognition extends EventTarget {
  continuous: boolean;
  interimResults: boolean;
  lang: string;
  onresult: ((event: SpeechRecognitionEvent) => void) | null;
  onerror: ((event: Event) => void) | null;
  onend: (() => void) | null;
  start(): void;
  stop(): void;
  abort(): void;
}

interface SpeechRecognitionConstructor {
  new (): SpeechRecognition;
}

interface Window {
  SpeechRecognition?: SpeechRecognitionConstructor;
  webkitSpeechRecognition?: SpeechRecognitionConstructor;
}

declare var SpeechRecognition: SpeechRecognitionConstructor | undefined;


================================================
FILE: studio/frontend/src/stores/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

// Global stores
export {};


================================================
FILE: studio/frontend/src/stores/training.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import { useTrainingConfigStore } from "@/features/training";

export const useWizardStore = useTrainingConfigStore;
export { useTrainingConfigStore };


================================================
FILE: studio/frontend/src/types/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

// Shared types
export type {};


================================================
FILE: studio/frontend/src/types/training.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export type ModelType = "vision" | "audio" | "embeddings" | "text";
export type TrainingMethod = "qlora" | "lora" | "full";

export function isAdapterMethod(method: TrainingMethod): boolean {
  return method === "lora" || method === "qlora";
}
export type StepNumber = 1 | 2 | 3 | 4 | 5;
export type DatasetSource = "huggingface" | "upload";
export type DatasetFormat = "auto" | "alpaca" | "chatml" | "sharegpt";
export type GradientCheckpointing = "none" | "true" | "unsloth";

export interface WizardState {
  currentStep: StepNumber;
  modelType: ModelType | null;
  selectedModel: string | null;
  trainingMethod: TrainingMethod;
  hfToken: string;
  datasetSource: DatasetSource;
  datasetFormat: DatasetFormat;
  dataset: string | null;
  datasetSubset: string | null;
  datasetSplit: string | null;
  uploadedFile: string | null;
  epochs: number;
  contextLength: number;
  learningRate: number;
  loraRank: number;
  loraAlpha: number;
  loraDropout: number;
  loraVariant: "lora" | "rslora" | "loftq";
  batchSize: number;
  gradientAccumulation: number;
  weightDecay: number;
  warmupSteps: number;
  maxSteps: number;
  saveSteps: number;
  packing: boolean;
  trainOnCompletions: boolean;
  gradientCheckpointing: GradientCheckpointing;
  randomSeed: number;
  enableWandb: boolean;
  wandbToken: string;
  wandbProject: string;
  enableTensorboard: boolean;
  tensorboardDir: string;
  logFrequency: number;
  finetuneVisionLayers: boolean;
  finetuneLanguageLayers: boolean;
  finetuneAttentionModules: boolean;
  finetuneMLPModules: boolean;
  targetModules: string[];
}

export interface WizardActions {
  setStep: (step: StepNumber) => void;
  nextStep: () => void;
  prevStep: () => void;
  setModelType: (type: ModelType) => void;
  setSelectedModel: (model: string | null) => void;
  setTrainingMethod: (method: TrainingMethod) => void;
  setHfToken: (token: string) => void;
  setDatasetSource: (source: DatasetSource) => void;
  setDatasetFormat: (format: DatasetFormat) => void;
  setDataset: (dataset: string | null) => void;
  setDatasetSubset: (subset: string | null) => void;
  setDatasetSplit: (split: string | null) => void;
  setUploadedFile: (file: string | null) => void;
  setEpochs: (epochs: number) => void;
  setContextLength: (length: number) => void;
  setLearningRate: (rate: number) => void;
  setLoraRank: (rank: number) => void;
  setLoraAlpha: (alpha: number) => void;
  setLoraDropout: (dropout: number) => void;
  setLoraVariant: (v: "lora" | "rslora" | "loftq") => void;
  setBatchSize: (v: number) => void;
  setGradientAccumulation: (v: number) => void;
  setWeightDecay: (v: number) => void;
  setWarmupSteps: (v: number) => void;
  setMaxSteps: (v: number) => void;
  setSaveSteps: (v: number) => void;
  setPacking: (v: boolean) => void;
  setTrainOnCompletions: (v: boolean) => void;
  setGradientCheckpointing: (v: GradientCheckpointing) => void;
  setRandomSeed: (v: number) => void;
  setEnableWandb: (v: boolean) => void;
  setWandbToken: (v: string) => void;
  setWandbProject: (v: string) => void;
  setEnableTensorboard: (v: boolean) => void;
  setTensorboardDir: (v: string) => void;
  setLogFrequency: (v: number) => void;
  setFinetuneVisionLayers: (v: boolean) => void;
  setFinetuneLanguageLayers: (v: boolean) => void;
  setFinetuneAttentionModules: (v: boolean) => void;
  setFinetuneMLPModules: (v: boolean) => void;
  setTargetModules: (v: string[]) => void;
  canProceed: () => boolean;
  reset: () => void;
}

export interface StepConfig {
  number: StepNumber;
  title: string;
  subtitle: string;
  description: string;
}


================================================
FILE: studio/frontend/src/utils/index.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

// Utility functions
export { normalizeNonEmptyName } from "./strings";


================================================
FILE: studio/frontend/src/utils/strings.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

export function normalizeNonEmptyName(
  value: string,
  fallback = "Unnamed",
): string {
  const trimmed = value.trim();
  return trimmed.length > 0 ? trimmed : fallback;
}


================================================
FILE: studio/frontend/tsconfig.app.json
================================================
{
  "compilerOptions": {
    "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
    "target": "ES2022",
    "useDefineForClassFields": true,
    "lib": ["ES2022", "DOM", "DOM.Iterable"],
    "module": "ESNext",
    "types": ["vite/client", "react", "react-dom"],
    "skipLibCheck": true,

    /* Bundler mode */
    "moduleResolution": "bundler",
    "allowImportingTsExtensions": true,
    "verbatimModuleSyntax": true,
    "moduleDetection": "force",
    "noEmit": true,
    "jsx": "react-jsx",

    /* Linting */
    "strict": true,
    "noUnusedLocals": false,
    "noUnusedParameters": true,
    "erasableSyntaxOnly": true,
    "noFallthroughCasesInSwitch": true,
    "noUncheckedSideEffectImports": true,
    "baseUrl": ".",
    "paths": {
      "@/*": ["./src/*"]
    }
  },
  "include": ["src"]
}


================================================
FILE: studio/frontend/tsconfig.json
================================================
{
  "files": [],
  "references": [
    { "path": "./tsconfig.app.json" },
    { "path": "./tsconfig.node.json" }
  ],
  "compilerOptions": {
    "baseUrl": ".",
    "paths": {
      "@/*": ["./src/*"]
    }
  }
}


================================================
FILE: studio/frontend/tsconfig.node.json
================================================
{
  "compilerOptions": {
    "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
    "target": "ES2023",
    "lib": ["ES2023"],
    "module": "ESNext",
    "types": ["node"],
    "skipLibCheck": true,

    /* Bundler mode */
    "moduleResolution": "bundler",
    "allowImportingTsExtensions": true,
    "verbatimModuleSyntax": true,
    "moduleDetection": "force",
    "noEmit": true,

    /* Linting */
    "strict": true,
    "noUnusedLocals": true,
    "noUnusedParameters": true,
    "erasableSyntaxOnly": true,
    "noFallthroughCasesInSwitch": true,
    "noUncheckedSideEffectImports": true
  },
  "include": ["vite.config.ts"]
}


================================================
FILE: studio/frontend/vite.config.ts
================================================
// SPDX-License-Identifier: AGPL-3.0-only
// Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import path from "node:path";
import tailwindcss from "@tailwindcss/vite";
import react from "@vitejs/plugin-react";
import { defineConfig } from "vite";

// https://vite.dev/config/
export default defineConfig({
  plugins: [react(), tailwindcss()],
  optimizeDeps: {
    include: ["@dagrejs/dagre", "@dagrejs/graphlib"],
  },
  server: {
    host: "0.0.0.0",
    allowedHosts: true,
    proxy: {
      "/api": {
        target: "http://127.0.0.1:8888",
        changeOrigin: true,
      },
      "/v1": {
        target: "http://127.0.0.1:8888",
        changeOrigin: true,
      },
      "/seed/inspect": {
        target: "http://127.0.0.1:8004",
        changeOrigin: true,
      },
      "/seed/preview": {
        target: "http://127.0.0.1:8004",
        changeOrigin: true,
      },
      "/preview": {
        target: "http://127.0.0.1:8004",
        changeOrigin: true,
      },
      "/validate": {
        target: "http://127.0.0.1:8004",
        changeOrigin: true,
      },
      "/tools": {
        target: "http://127.0.0.1:8004",
        changeOrigin: true,
      },
    },
  },
  resolve: {
    alias: {
      "@": path.resolve(__dirname, "./src"),
      "@dagrejs/dagre": path.resolve(
        __dirname,
        "./node_modules/@dagrejs/dagre/dist/dagre.cjs.js",
      ),
    },
  },
  build: {
    commonjsOptions: {
      include: [/node_modules/, /@dagrejs\/dagre/, /@dagrejs\/graphlib/],
    },
  },
});


================================================
FILE: studio/install_python_stack.py
================================================
#!/usr/bin/env python3

# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""Cross-platform Python dependency installer for Unsloth Studio.

Called by both setup.sh (Linux / WSL) and setup.ps1 (Windows) after the
virtual environment is already activated.  Expects `pip` and `python` on
PATH to point at the venv.
"""

from __future__ import annotations

import os
import shutil
import subprocess
import sys
import tempfile
import urllib.request
from pathlib import Path

IS_WINDOWS = sys.platform == "win32"

# ── Verbosity control ──────────────────────────────────────────────────────────
# By default the installer shows a minimal progress bar (one line, in-place).
# Set UNSLOTH_VERBOSE=1 in the environment to restore full per-step output:
#   Linux/Mac:  UNSLOTH_VERBOSE=1 ./studio/setup.sh
#   Windows:    $env:UNSLOTH_VERBOSE="1" ; .\studio\setup.ps1
VERBOSE: bool = os.environ.get("UNSLOTH_VERBOSE", "0") == "1"

# Progress bar state — updated by _progress() as each install step runs.
# _TOTAL counts: pip-upgrade + 7 shared steps + triton (non-Windows) + local-plugin + finalize
# Update _TOTAL here if you add or remove install steps in install_python_stack().
_STEP: int = 0
_TOTAL: int = 0  # set at runtime in install_python_stack() based on platform

# ── Paths ──────────────────────────────────────────────────────────────
SCRIPT_DIR = Path(__file__).resolve().parent
REQ_ROOT = SCRIPT_DIR / "backend" / "requirements"
SINGLE_ENV = REQ_ROOT / "single-env"
CONSTRAINTS = SINGLE_ENV / "constraints.txt"
LOCAL_DD_UNSTRUCTURED_PLUGIN = (
    SCRIPT_DIR / "backend" / "plugins" / "data-designer-unstructured-seed"
)

# ── Color support ──────────────────────────────────────────────────────


def _enable_colors() -> bool:
    """Try to enable ANSI color support. Returns True if available."""
    if not hasattr(sys.stdout, "fileno"):
        return False
    try:
        if not os.isatty(sys.stdout.fileno()):
            return False
    except Exception:
        return False
    if IS_WINDOWS:
        try:
            import ctypes

            kernel32 = ctypes.windll.kernel32
            # Enable ENABLE_VIRTUAL_TERMINAL_PROCESSING (0x0004) on stdout
            handle = kernel32.GetStdHandle(-11)  # STD_OUTPUT_HANDLE
            mode = ctypes.c_ulong()
            kernel32.GetConsoleMode(handle, ctypes.byref(mode))
            kernel32.SetConsoleMode(handle, mode.value | 0x0004)
            return True
        except Exception:
            return False
    return True  # Unix terminals support ANSI by default


# Colors disabled — Colab and most CI runners render ANSI fine, but plain output
# is cleaner in the notebook cell. Re-enable by setting _HAS_COLOR = _enable_colors()
_HAS_COLOR = False


def _green(msg: str) -> str:
    return f"\033[92m{msg}\033[0m" if _HAS_COLOR else msg


def _cyan(msg: str) -> str:
    return f"\033[96m{msg}\033[0m" if _HAS_COLOR else msg


def _red(msg: str) -> str:
    return f"\033[91m{msg}\033[0m" if _HAS_COLOR else msg


def _progress(label: str) -> None:
    """Print an in-place progress bar for the current install step.

    Uses only stdlib (sys.stdout) — no extra packages required.
    In VERBOSE mode this is a no-op; per-step labels are printed by run() instead.
    """
    global _STEP
    _STEP += 1
    if VERBOSE:
        return  # verbose mode: run() already printed the label
    width = 20
    filled = int(width * _STEP / _TOTAL)
    bar = "=" * filled + "-" * (width - filled)
    end = "\n" if _STEP >= _TOTAL else ""  # newline only on the final step
    sys.stdout.write(f"\r[{bar}] {_STEP:2}/{_TOTAL}  {label:<40}{end}")
    sys.stdout.flush()


def run(
    label: str, cmd: list[str], *, quiet: bool = True
) -> subprocess.CompletedProcess[bytes]:
    """Run a command; on failure print output and exit."""
    if VERBOSE:
        print(f"   {label}...")
    result = subprocess.run(
        cmd,
        stdout = subprocess.PIPE if quiet else None,
        stderr = subprocess.STDOUT if quiet else None,
    )
    if result.returncode != 0:
        print(_red(f"❌ {label} failed (exit code {result.returncode}):"))
        if result.stdout:
            print(result.stdout.decode(errors = "replace"))
        sys.exit(result.returncode)
    return result


# Packages to skip on Windows (require special build steps)
WINDOWS_SKIP_PACKAGES = {"open_spiel", "triton_kernels"}

# ── uv bootstrap ──────────────────────────────────────────────────────

USE_UV = False  # Set by _bootstrap_uv() at the start of install_python_stack()
UV_NEEDS_SYSTEM = False  # Set by _bootstrap_uv() via probe


def _bootstrap_uv() -> bool:
    """Check if uv is available and probe whether --system is needed."""
    global UV_NEEDS_SYSTEM
    if not shutil.which("uv"):
        return False
    # Probe: try a dry-run install targeting the current Python explicitly.
    # Without --python, uv can ignore the activated venv on some platforms.
    probe = subprocess.run(
        ["uv", "pip", "install", "--dry-run", "--python", sys.executable, "pip"],
        stdout = subprocess.PIPE,
        stderr = subprocess.STDOUT,
    )
    if probe.returncode != 0:
        # Retry with --system (some envs need it when uv can't find a venv)
        probe_sys = subprocess.run(
            ["uv", "pip", "install", "--dry-run", "--system", "pip"],
            stdout = subprocess.PIPE,
            stderr = subprocess.STDOUT,
        )
        if probe_sys.returncode != 0:
            return False  # uv is broken, fall back to pip
        UV_NEEDS_SYSTEM = True
    return True


def _filter_requirements(req: Path, skip: set[str]) -> Path:
    """Return a temp copy of a requirements file with certain packages removed."""
    lines = req.read_text(encoding = "utf-8").splitlines(keepends = True)
    filtered = [
        line
        for line in lines
        if not any(line.strip().lower().startswith(pkg) for pkg in skip)
    ]
    tmp = tempfile.NamedTemporaryFile(
        mode = "w",
        suffix = ".txt",
        delete = False,
        encoding = "utf-8",
    )
    tmp.writelines(filtered)
    tmp.close()
    return Path(tmp.name)


def _translate_pip_args_for_uv(args: tuple[str, ...]) -> list[str]:
    """Translate pip flags to their uv equivalents."""
    translated: list[str] = []
    for arg in args:
        if arg == "--no-cache-dir":
            continue  # uv cache is fast; drop this flag
        elif arg == "--force-reinstall":
            translated.append("--reinstall")
        else:
            translated.append(arg)
    return translated


def _build_pip_cmd(args: tuple[str, ...]) -> list[str]:
    """Build a standard pip install command."""
    cmd = [sys.executable, "-m", "pip", "install"]
    cmd.extend(args)
    return cmd


def _build_uv_cmd(args: tuple[str, ...]) -> list[str]:
    """Build a uv pip install command with translated flags."""
    cmd = ["uv", "pip", "install"]
    if UV_NEEDS_SYSTEM:
        cmd.append("--system")
    # Always pass --python so uv targets the correct environment.
    # Without this, uv can ignore an activated venv and install into
    # the system Python (observed on Colab and similar environments).
    cmd.extend(["--python", sys.executable])
    cmd.extend(_translate_pip_args_for_uv(args))
    cmd.append("--torch-backend=auto")
    return cmd


def pip_install(
    label: str,
    *args: str,
    req: Path | None = None,
    constrain: bool = True,
) -> None:
    """Build and run a pip install command (uses uv when available, falls back to pip)."""
    constraint_args: list[str] = []
    if constrain and CONSTRAINTS.is_file():
        constraint_args = ["-c", str(CONSTRAINTS)]

    actual_req = req
    if req is not None and IS_WINDOWS and WINDOWS_SKIP_PACKAGES:
        actual_req = _filter_requirements(req, WINDOWS_SKIP_PACKAGES)
    req_args: list[str] = []
    if actual_req is not None:
        req_args = ["-r", str(actual_req)]

    try:
        if USE_UV:
            uv_cmd = _build_uv_cmd(args) + constraint_args + req_args
            if VERBOSE:
                print(f"   {label}...")
            result = subprocess.run(
                uv_cmd,
                stdout = subprocess.PIPE,
                stderr = subprocess.STDOUT,
            )
            if result.returncode == 0:
                return
            print(_red(f"   uv failed, falling back to pip..."))
            if result.stdout:
                print(result.stdout.decode(errors = "replace"))

        pip_cmd = _build_pip_cmd(args) + constraint_args + req_args
        run(f"{label} (pip)" if USE_UV else label, pip_cmd)
    finally:
        if actual_req is not None and actual_req != req:
            actual_req.unlink(missing_ok = True)


def download_file(url: str, dest: Path) -> None:
    """Download a file using urllib (no curl dependency)."""
    urllib.request.urlretrieve(url, dest)


def patch_package_file(package_name: str, relative_path: str, url: str) -> None:
    """Download a file from url and overwrite a file inside an installed package."""
    result = subprocess.run(
        [sys.executable, "-m", "pip", "show", package_name],
        capture_output = True,
        text = True,
    )
    if result.returncode != 0:
        print(_red(f"   ⚠️  Could not find package {package_name}, skipping patch"))
        return

    location = None
    for line in result.stdout.splitlines():
        if line.lower().startswith("location:"):
            location = line.split(":", 1)[1].strip()
            break

    if not location:
        print(_red(f"   ⚠️  Could not determine location of {package_name}"))
        return

    dest = Path(location) / relative_path
    print(_cyan(f"   Patching {dest.name} in {package_name}..."))
    download_file(url, dest)


# ── Main install sequence ─────────────────────────────────────────────


def install_python_stack() -> int:
    global USE_UV, _STEP, _TOTAL
    _STEP = 0
    _TOTAL = 10 if IS_WINDOWS else 11

    # 1. Upgrade pip (needed even with uv as fallback and for bootstrapping)
    _progress("pip upgrade")
    run("Upgrading pip", [sys.executable, "-m", "pip", "install", "--upgrade", "pip"])

    # Try to use uv for faster installs
    USE_UV = _bootstrap_uv()

    # 2. Core packages: unsloth-zoo + unsloth
    _progress("base packages")
    pip_install(
        "Installing base packages",
        "--no-cache-dir",
        req = REQ_ROOT / "base.txt",
    )

    # 3. Extra dependencies
    _progress("unsloth extras")
    pip_install(
        "Installing additional unsloth dependencies",
        "--no-cache-dir",
        req = REQ_ROOT / "extras.txt",
    )

    # 3b. Extra dependencies (no-deps) — audio model support etc.
    _progress("extra codecs")
    pip_install(
        "Installing extras (no-deps)",
        "--no-deps",
        "--no-cache-dir",
        req = REQ_ROOT / "extras-no-deps.txt",
    )

    # 4. Overrides (torchao, transformers) — force-reinstall
    _progress("dependency overrides")
    pip_install(
        "Installing dependency overrides",
        "--force-reinstall",
        "--no-cache-dir",
        req = REQ_ROOT / "overrides.txt",
    )

    # 5. Triton kernels (no-deps, from source)
    if not IS_WINDOWS:
        _progress("triton kernels")
        pip_install(
            "Installing triton kernels",
            "--no-deps",
            "--no-cache-dir",
            req = REQ_ROOT / "triton-kernels.txt",
            constrain = False,
        )

    # # 6. Patch: override llama_cpp.py with fix from unsloth-zoo  feature/llama-cpp-windows-support branch
    # patch_package_file(
    #     "unsloth-zoo",
    #     os.path.join("unsloth_zoo", "llama_cpp.py"),
    #     "https://raw.githubusercontent.com/unslothai/unsloth-zoo/refs/heads/main/unsloth_zoo/llama_cpp.py",
    # )

    # # 7a. Patch: override vision.py with fix from unsloth PR #4091
    # patch_package_file(
    #     "unsloth",
    #     os.path.join("unsloth", "models", "vision.py"),
    #     "https://raw.githubusercontent.com/unslothai/unsloth/80e0108a684c882965a02a8ed851e3473c1145ab/unsloth/models/vision.py",
    # )

    # # 7b. Patch : override save.py with fix from feature/llama-cpp-windows-support
    # patch_package_file(
    #     "unsloth",
    #     os.path.join("unsloth", "save.py"),
    #     "https://raw.githubusercontent.com/unslothai/unsloth/refs/heads/main/unsloth/save.py",
    # )

    # 8. Studio dependencies
    _progress("studio deps")
    pip_install(
        "Installing studio dependencies",
        "--no-cache-dir",
        req = REQ_ROOT / "studio.txt",
    )

    # 9. Data-designer dependencies
    _progress("data designer deps")
    pip_install(
        "Installing data-designer base dependencies",
        "--no-cache-dir",
        req = SINGLE_ENV / "data-designer-deps.txt",
    )

    # 10. Data-designer packages (no-deps to avoid conflicts)
    _progress("data designer")
    pip_install(
        "Installing data-designer",
        "--no-cache-dir",
        "--no-deps",
        req = SINGLE_ENV / "data-designer.txt",
    )

    # 11. Local Data Designer seed plugin
    if not LOCAL_DD_UNSTRUCTURED_PLUGIN.is_dir():
        print(
            _red(
                f"❌ Missing local plugin directory: {LOCAL_DD_UNSTRUCTURED_PLUGIN}",
            ),
        )
        return 1
    _progress("local plugin")
    pip_install(
        "Installing local data-designer unstructured plugin",
        "--no-cache-dir",
        "--no-deps",
        str(LOCAL_DD_UNSTRUCTURED_PLUGIN),
        constrain = False,
    )

    # 12. Patch metadata for single-env compatibility
    _progress("finalizing")
    run(
        "Patching single-env metadata",
        [sys.executable, str(SINGLE_ENV / "patch_metadata.py")],
    )

    # 13. Final check (silent; third-party conflicts are expected)
    subprocess.run(
        [sys.executable, "-m", "pip", "check"],
        stdout = subprocess.DEVNULL,
        stderr = subprocess.DEVNULL,
    )

    print(_green("✅ Python dependencies installed"))
    return 0


if __name__ == "__main__":
    sys.exit(install_python_stack())


================================================
FILE: studio/setup.bat
================================================
@echo off
REM SPDX-License-Identifier: AGPL-3.0-only
REM Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

powershell -ExecutionPolicy Bypass -File "%~dp0setup.ps1" %*


================================================
FILE: studio/setup.ps1
================================================
#Requires -Version 5.1
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0
<#
.SYNOPSIS
    Full environment setup for Unsloth Studio on Windows (bundled version).
.DESCRIPTION
    Always installs Node.js if needed. When running from pip install:
    skips frontend build (already bundled). When running from git repo:
    full setup including frontend build.
    Supports NVIDIA GPU (full training + inference) and CPU-only (GGUF chat mode).
.NOTES
    Usage: powershell -ExecutionPolicy Bypass -File setup.ps1
#>

$ErrorActionPreference = "Stop"
$ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
$PackageDir = Split-Path -Parent $ScriptDir

# Detect if running from pip install (no frontend/ dir in studio)
$FrontendDir = Join-Path $ScriptDir "frontend"
$OxcValidatorDir = Join-Path $ScriptDir "backend\core\data_recipe\oxc-validator"
$IsPipInstall = -not (Test-Path $FrontendDir)

# ─────────────────────────────────────────────
# Helper functions
# ─────────────────────────────────────────────

# Reload ALL environment variables from registry.
# Picks up changes made by installers (winget, msi, etc.) including
# Path, CUDA_PATH, CUDA_PATH_V*, and any other vars they set.
function Refresh-Environment {
    foreach ($level in @('Machine', 'User')) {
        $vars = [System.Environment]::GetEnvironmentVariables($level)
        foreach ($key in $vars.Keys) {
            if ($key -eq 'Path') { continue }
            Set-Item -Path "Env:$key" -Value $vars[$key] -ErrorAction SilentlyContinue
        }
    }
    $machinePath = [System.Environment]::GetEnvironmentVariable('Path', 'Machine')
    $userPath = [System.Environment]::GetEnvironmentVariable('Path', 'User')
    $env:Path = "$machinePath;$userPath"
}

# Find nvcc on PATH, CUDA_PATH, or standard toolkit dirs.
# Returns the path to nvcc.exe, or $null if not found.
function Find-Nvcc {
    param([string]$MaxVersion = "")

    # If MaxVersion is set, we need to find a toolkit <= that version.
    # CUDA toolkits install side-by-side under C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y\

    $toolkitBase = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA'

    if ($MaxVersion -and (Test-Path $toolkitBase)) {
        $drMajor = [int]$MaxVersion.Split('.')[0]
        $drMinor = [int]$MaxVersion.Split('.')[1]

        # Get all installed CUDA dirs, sorted descending (highest first)
        $cudaDirs = Get-ChildItem -Directory $toolkitBase | Where-Object {
            $_.Name -match '^v(\d+)\.(\d+)'
        } | Sort-Object { [version]($_.Name -replace '^v','') } -Descending

        foreach ($dir in $cudaDirs) {
            if ($dir.Name -match '^v(\d+)\.(\d+)') {
                $tkMajor = [int]$Matches[1]; $tkMinor = [int]$Matches[2]
                $compatible = ($tkMajor -lt $drMajor) -or ($tkMajor -eq $drMajor -and $tkMinor -le $drMinor)
                if ($compatible) {
                    $nvcc = Join-Path $dir.FullName 'bin\nvcc.exe'
                    if (Test-Path $nvcc) {
                        return $nvcc
                    }
                }
            }
        }

        # No compatible side-by-side version found
        return $null
    }

    # Fallback: no version constraint — pick latest or whatever is available

    # 1. Check nvcc on PATH
    $cmd = Get-Command nvcc -ErrorAction SilentlyContinue
    if ($cmd) { return $cmd.Source }

    # 2. Check CUDA_PATH env var
    $cudaRoot = [Environment]::GetEnvironmentVariable('CUDA_PATH', 'Process')
    if (-not $cudaRoot) { $cudaRoot = [Environment]::GetEnvironmentVariable('CUDA_PATH', 'Machine') }
    if (-not $cudaRoot) { $cudaRoot = [Environment]::GetEnvironmentVariable('CUDA_PATH', 'User') }
    if ($cudaRoot -and (Test-Path (Join-Path $cudaRoot 'bin\nvcc.exe'))) {
        return (Join-Path $cudaRoot 'bin\nvcc.exe')
    }

    # 3. Scan standard toolkit directory
    if (Test-Path $toolkitBase) {
        $latest = Get-ChildItem -Directory $toolkitBase | Sort-Object Name | Select-Object -Last 1
        if ($latest -and (Test-Path (Join-Path $latest.FullName 'bin\nvcc.exe'))) {
            return (Join-Path $latest.FullName 'bin\nvcc.exe')
        }
    }

    return $null
}

# Detect CUDA Compute Capability via nvidia-smi.
# Returns e.g. "80" for A100 (8.0), "89" for RTX 4090 (8.9), etc.
# Returns $null if detection fails.
function Get-CudaComputeCapability {
    # Use the resolved absolute path ($NvidiaSmiExe) to survive Refresh-Environment
    $smiExe = if ($script:NvidiaSmiExe) { $script:NvidiaSmiExe } else {
        $cmd = Get-Command nvidia-smi -ErrorAction SilentlyContinue
        if ($cmd) { $cmd.Source } else { $null }
    }
    if (-not $smiExe) { return $null }

    try {
        $raw = & $smiExe --query-gpu=compute_cap --format=csv,noheader 2>$null
        if ($LASTEXITCODE -ne 0 -or -not $raw) { return $null }

        # nvidia-smi may return multiple GPUs; take the first one
        $cap = ($raw -split "`n")[0].Trim()
        if ($cap -match '^(\d+)\.(\d+)$') {
            $major = $Matches[1]
            $minor = $Matches[2]
            return "$major$minor"
        }
    } catch { }

    return $null
}

# Check if an nvcc binary supports a given sm_ architecture.
# Uses `nvcc --list-gpu-code` which outputs sm_* tokens (--list-gpu-arch
# outputs compute_* tokens instead).  Available since CUDA 11.6.
# Returns $false if the flag isn't supported (old toolkit) — safer to reject
# and fall back to scanning/PTX than to assume support and fail later.
function Test-NvccArchSupport {
    param([string]$NvccExe, [string]$Arch)
    try {
        $listCode = & $NvccExe --list-gpu-code 2>&1 | Out-String
        if ($LASTEXITCODE -ne 0) { return $false }
        return ($listCode -match "sm_$Arch")
    } catch {
        return $false
    }
}

# Given an nvcc binary, return the highest sm_ architecture it supports.
# Returns e.g. "90" for CUDA 12.4. Returns $null if detection fails.
function Get-NvccMaxArch {
    param([string]$NvccExe)
    try {
        $listCode = & $NvccExe --list-gpu-code 2>&1 | Out-String
        if ($LASTEXITCODE -ne 0) { return $null }
        $arches = @()
        foreach ($line in $listCode -split "`n") {
            if ($line.Trim() -match '^sm_(\d+)') {
                $arches += [int]$Matches[1]
            }
        }
        if ($arches.Count -gt 0) {
            return ($arches | Sort-Object | Select-Object -Last 1).ToString()
        }
    } catch { }
    return $null
}

# Detect driver's max CUDA version from nvidia-smi and return the highest
# compatible PyTorch CUDA index tag (e.g. "cu128").
# PyTorch on Windows ships CPU-only by default from PyPI; CUDA wheels live at
# https://download.pytorch.org/whl/<tag>. The tag must not exceed the driver's
# capability: e.g. driver "CUDA Version: 12.9" → cu128 (not cu130).
function Get-PytorchCudaTag {
    $smiExe = if ($script:NvidiaSmiExe) { $script:NvidiaSmiExe } else {
        $cmd = Get-Command nvidia-smi -ErrorAction SilentlyContinue
        if ($cmd) { $cmd.Source } else { $null }
    }
    if (-not $smiExe) { return "cu124" }

    try {
        # 2>&1 | Out-String merges stderr into stdout then converts to a single
        # string.  Plain 2>$null doesn't fully suppress stderr in PS 5.1 --
        # ErrorRecord objects leak into $output and break the -match.
        $output = & $smiExe 2>&1 | Out-String
        if ($output -match 'CUDA Version:\s+(\d+)\.(\d+)') {
            $major = [int]$Matches[1]
            $minor = [int]$Matches[2]
            # PyTorch 2.10 offers: cu124, cu126, cu128, cu130
            if ($major -ge 13) { return "cu130" }
            if ($major -eq 12 -and $minor -ge 8) { return "cu128" }
            if ($major -eq 12 -and $minor -ge 6) { return "cu126" }
            return "cu124"
        }
    } catch { }

    return "cu124"
}

# Find Visual Studio Build Tools for cmake -G flag.
# Strategy: (1) vswhere, (2) scan filesystem (handles broken vswhere registration).
# Returns @{ Generator = "Visual Studio 17 2022"; InstallPath = "C:\..."; Source = "..." } or $null.
function Find-VsBuildTools {
    $map = @{ '2022' = '17'; '2019' = '16'; '2017' = '15' }

    # --- Try vswhere first (works when VS is properly registered) ---
    $vsw = "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe"
    if (Test-Path $vsw) {
        $info = & $vsw -latest -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property catalog_productLineVersion 2>$null
        $path = & $vsw -latest -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath 2>$null
        if ($info -and $path) {
            $y = $info.Trim()
            $n = $map[$y]
            if ($n) {
                return @{ Generator = "Visual Studio $n $y"; InstallPath = $path.Trim(); Source = 'vswhere' }
            }
        }
    }

    # --- Scan filesystem (handles broken vswhere registration after winget cycles) ---
    $roots = @($env:ProgramFiles, ${env:ProgramFiles(x86)})
    $editions = @('BuildTools', 'Community', 'Professional', 'Enterprise')
    $years = @('2022', '2019', '2017')

    foreach ($y in $years) {
        foreach ($r in $roots) {
            foreach ($ed in $editions) {
                $candidate = Join-Path $r "Microsoft Visual Studio\$y\$ed"
                if (Test-Path $candidate) {
                    $vcDir = Join-Path $candidate "VC\Tools\MSVC"
                    if (Test-Path $vcDir) {
                        $cl = Get-ChildItem -Path $vcDir -Filter "cl.exe" -Recurse -ErrorAction SilentlyContinue | Select-Object -First 1
                        if ($cl) {
                            $n = $map[$y]
                            if ($n) {
                                return @{ Generator = "Visual Studio $n $y"; InstallPath = $candidate; Source = "filesystem ($ed)"; ClExe = $cl.FullName }
                            }
                        }
                    }
                }
            }
        }
    }

    return $null
}

# ─────────────────────────────────────────────
# Banner
# ─────────────────────────────────────────────
Write-Host "+==============================================+" -ForegroundColor Green
Write-Host "|       Unsloth Studio Setup (Windows)         |" -ForegroundColor Green
Write-Host "+==============================================+" -ForegroundColor Green

# ==========================================================================
#  PHASE 1: System-level prerequisites (winget installs, env vars)
#  All heavy system tool installs happen here BEFORE touching Python.
# ==========================================================================

# ============================================
# 1a. GPU detection
# ============================================
$HasNvidiaSmi = $false
$NvidiaSmiExe = $null  # Absolute path -- survives Refresh-Environment
try {
    $nvSmiCmd = Get-Command nvidia-smi -ErrorAction SilentlyContinue
    if ($nvSmiCmd) {
        & $nvSmiCmd.Source 2>&1 | Out-Null
        if ($LASTEXITCODE -eq 0) {
            $HasNvidiaSmi = $true
            $NvidiaSmiExe = $nvSmiCmd.Source
        }
    }
} catch {}
# Fallback: nvidia-smi may not be on PATH even though a GPU + driver exist.
# Check the default install location and the Windows driver store.
if (-not $HasNvidiaSmi) {
    $nvSmiDefaults = @(
        "$env:ProgramFiles\NVIDIA Corporation\NVSMI\nvidia-smi.exe",
        "$env:SystemRoot\System32\nvidia-smi.exe"
    )
    foreach ($p in $nvSmiDefaults) {
        if (Test-Path $p) {
            try {
                & $p 2>&1 | Out-Null
                if ($LASTEXITCODE -eq 0) {
                    $HasNvidiaSmi = $true
                    $NvidiaSmiExe = $p
                    Write-Host "   Found nvidia-smi at $(Split-Path $p -Parent)" -ForegroundColor Gray
                    break
                }
            } catch {}
        }
    }
}
if (-not $HasNvidiaSmi) {
    Write-Host ""
    Write-Host "[WARN] No NVIDIA GPU detected. Studio will run in chat-only (GGUF) mode." -ForegroundColor Yellow
    Write-Host "       Training and GPU inference require an NVIDIA GPU with drivers installed." -ForegroundColor Yellow
    Write-Host "       https://www.nvidia.com/Download/index.aspx" -ForegroundColor Yellow
    Write-Host ""
} else {
    Write-Host "[OK] NVIDIA GPU detected" -ForegroundColor Green
}

# ============================================
# 1a.5. Windows Long Paths (required for deep node_modules / Python paths)
# ============================================
$LongPathsEnabled = $false
try {
    $regVal = Get-ItemProperty -Path "HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -ErrorAction SilentlyContinue
    if ($regVal -and $regVal.LongPathsEnabled -eq 1) {
        $LongPathsEnabled = $true
    }
} catch {}

if ($LongPathsEnabled) {
    Write-Host "[OK] Windows Long Paths enabled" -ForegroundColor Green
} else {
    Write-Host "Windows Long Paths not enabled (required for Triton compilation and deep dependency paths)." -ForegroundColor Yellow
    Write-Host "   Requesting admin access to fix..." -ForegroundColor Yellow
    try {
        # Spawn an elevated process to set the registry key (triggers UAC prompt)
        $proc = Start-Process -FilePath "reg.exe" `
            -ArgumentList 'add "HKLM\SYSTEM\CurrentControlSet\Control\FileSystem" /v LongPathsEnabled /t REG_DWORD /d 1 /f' `
            -Verb RunAs -Wait -PassThru -ErrorAction Stop
        if ($proc.ExitCode -eq 0) {
            $LongPathsEnabled = $true
            Write-Host "[OK] Windows Long Paths enabled (via UAC)" -ForegroundColor Green
        } else {
            Write-Host "[WARN] Failed to enable Long Paths (exit code: $($proc.ExitCode))" -ForegroundColor Yellow
        }
    } catch {
        Write-Host "[WARN] Could not enable Long Paths (UAC was declined or not available)" -ForegroundColor Yellow
        Write-Host "       Run this manually in an Admin terminal:" -ForegroundColor Yellow
        Write-Host '       reg add "HKLM\SYSTEM\CurrentControlSet\Control\FileSystem" /v LongPathsEnabled /t REG_DWORD /d 1 /f' -ForegroundColor Cyan
    }
}

# ============================================
# 1b. Git (required by pip for git+https:// deps and by npm)
# ============================================
$HasGit = $null -ne (Get-Command git -ErrorAction SilentlyContinue)
if (-not $HasGit) {
    Write-Host "Git not found -- installing via winget..." -ForegroundColor Yellow
    $HasWinget = $null -ne (Get-Command winget -ErrorAction SilentlyContinue)
    if ($HasWinget) {
        try {
            winget install Git.Git --source winget --accept-package-agreements --accept-source-agreements 2>&1 | Out-Null
            Refresh-Environment
            $HasGit = $null -ne (Get-Command git -ErrorAction SilentlyContinue)
        } catch { }
    }
    if (-not $HasGit) {
        Write-Host "[ERROR] Git is required but could not be installed automatically." -ForegroundColor Red
        Write-Host "        Install Git from https://git-scm.com/download/win and re-run." -ForegroundColor Red
        exit 1
    }
    Write-Host "[OK] Git installed: $(git --version)" -ForegroundColor Green
} else {
    Write-Host "[OK] Git found: $(git --version)" -ForegroundColor Green
}

# ============================================
# 1c. CMake (required for llama.cpp build)
# ============================================
$HasCmake = $null -ne (Get-Command cmake -ErrorAction SilentlyContinue)
if (-not $HasCmake) {
    Write-Host "CMake not found -- installing via winget..." -ForegroundColor Yellow
    $HasWinget = $null -ne (Get-Command winget -ErrorAction SilentlyContinue)
    if ($HasWinget) {
        try {
            winget install Kitware.CMake --source winget --accept-package-agreements --accept-source-agreements 2>&1 | Out-Null
            Refresh-Environment
            $HasCmake = $null -ne (Get-Command cmake -ErrorAction SilentlyContinue)
        } catch { }
    }
    # winget may succeed but cmake isn't on PATH yet (MSI PATH changes need a
    # new shell). Try the default install location as a fallback.
    if (-not $HasCmake) {
        $cmakeDefaults = @(
            "$env:ProgramFiles\CMake\bin",
            "${env:ProgramFiles(x86)}\CMake\bin",
            "$env:LOCALAPPDATA\CMake\bin"
        )
        foreach ($d in $cmakeDefaults) {
            if (Test-Path (Join-Path $d "cmake.exe")) {
                $env:Path = "$d;$env:Path"
                # Persist to user PATH so Refresh-Environment does not drop it later
                $userPath = [Environment]::GetEnvironmentVariable('Path', 'User')
                if (-not $userPath -or $userPath -notlike "*$d*") {
                    [Environment]::SetEnvironmentVariable('Path', "$d;$userPath", 'User')
                }
                $HasCmake = $null -ne (Get-Command cmake -ErrorAction SilentlyContinue)
                if ($HasCmake) {
                    Write-Host "   Found cmake at $d (added to PATH)" -ForegroundColor Gray
                    break
                }
            }
        }
    }
    if ($HasCmake) {
        Write-Host "[OK] CMake installed" -ForegroundColor Green
    } else {
        Write-Host "[ERROR] CMake is required but could not be installed." -ForegroundColor Red
        Write-Host "        Install CMake from https://cmake.org/download/ and re-run." -ForegroundColor Red
        exit 1
    }
} else {
    Write-Host "[OK] CMake found: $(cmake --version | Select-Object -First 1)" -ForegroundColor Green
}

# ============================================
# 1d. Visual Studio Build Tools (C++ compiler for llama.cpp)
# ============================================
$CmakeGenerator = $null
$VsInstallPath = $null
$vsResult = Find-VsBuildTools

if (-not $vsResult) {
    Write-Host "Visual Studio Build Tools not found -- installing via winget..." -ForegroundColor Yellow
    Write-Host "   (This is a one-time install, may take several minutes)" -ForegroundColor Gray
    $HasWinget = $null -ne (Get-Command winget -ErrorAction SilentlyContinue)
    if ($HasWinget) {
        $prevEAPTemp = $ErrorActionPreference
        $ErrorActionPreference = "Continue"
        winget install Microsoft.VisualStudio.2022.BuildTools --source winget --accept-package-agreements --accept-source-agreements --override "--add Microsoft.VisualStudio.Workload.VCTools --includeRecommended --passive --wait"
        $ErrorActionPreference = $prevEAPTemp
        # Re-scan after install (don't trust vswhere catalog)
        $vsResult = Find-VsBuildTools
    }
}

if ($vsResult) {
    $CmakeGenerator = $vsResult.Generator
    $VsInstallPath = $vsResult.InstallPath
    Write-Host "[OK] $CmakeGenerator detected via $($vsResult.Source)" -ForegroundColor Green
    if ($vsResult.ClExe) { Write-Host "   cl.exe: $($vsResult.ClExe)" -ForegroundColor Gray }
} else {
    Write-Host "[ERROR] Visual Studio Build Tools could not be found or installed." -ForegroundColor Red
    Write-Host "        Manual install:" -ForegroundColor Red
    Write-Host '        1. winget install Microsoft.VisualStudio.2022.BuildTools --source winget' -ForegroundColor Yellow
    Write-Host '        2. Open Visual Studio Installer -> Modify -> check "Desktop development with C++"' -ForegroundColor Yellow
    exit 1
}

# ============================================
# 1e. CUDA Toolkit (nvcc for llama.cpp build + env vars)
# ============================================
if ($HasNvidiaSmi) {
# IMPORTANT: The CUDA Toolkit version must be <= the max CUDA version the
# NVIDIA driver supports.  nvidia-smi reports this as "CUDA Version: X.Y".
# If we install a toolkit newer than the driver supports, llama-server will
# fail at runtime with "ggml_cuda_init: failed to initialize CUDA: (null)".

# -- Detect max CUDA version the driver supports --
$DriverMaxCuda = $null
try {
    $smiOut = & $NvidiaSmiExe 2>&1 | Out-String
    if ($smiOut -match "CUDA Version:\s+([\d]+)\.([\d]+)") {
        $DriverMaxCuda = "$($Matches[1]).$($Matches[2])"
        Write-Host "   Driver supports up to CUDA $DriverMaxCuda" -ForegroundColor Gray
    }
} catch {}

# Detect compute capability early so we can validate toolkit support
$CudaArch = Get-CudaComputeCapability
if ($CudaArch) {
    Write-Host "   GPU Compute Capability = $($CudaArch.Insert($CudaArch.Length-1, '.')) (sm_$CudaArch)" -ForegroundColor Gray
}

# -- Find a toolkit that's compatible with the driver AND the GPU --
# Strategy: prefer the toolkit at CUDA_PATH (user's existing setup) if it's
# compatible with the driver AND supports the GPU architecture.  Only fall back
# to scanning side-by-side installs if CUDA_PATH is missing, points to an
# incompatible version, or can't compile for the GPU.  This avoids
# header/binary mismatches when multiple toolkits are installed.
$IncompatibleToolkit = $null
$NvccPath = $null

if ($DriverMaxCuda) {
    $drMajorCuda = [int]$DriverMaxCuda.Split('.')[0]
    $drMinorCuda = [int]$DriverMaxCuda.Split('.')[1]

    # --- Step 1: Check existing CUDA_PATH first ---
    $existingCudaPath = [Environment]::GetEnvironmentVariable('CUDA_PATH', 'Machine')
    if (-not $existingCudaPath) {
        $existingCudaPath = [Environment]::GetEnvironmentVariable('CUDA_PATH', 'User')
    }
    if ($existingCudaPath -and (Test-Path (Join-Path $existingCudaPath 'bin\nvcc.exe'))) {
        $candidateNvcc = Join-Path $existingCudaPath 'bin\nvcc.exe'
        $verOut = & $candidateNvcc --version 2>&1 | Out-String
        if ($verOut -match 'release\s+(\d+)\.(\d+)') {
            $tkMaj = [int]$Matches[1]; $tkMin = [int]$Matches[2]
            $isCompat = ($tkMaj -lt $drMajorCuda) -or ($tkMaj -eq $drMajorCuda -and $tkMin -le $drMinorCuda)
            if ($isCompat) {
                # Also verify the toolkit supports our GPU architecture
                Write-Host "   [DEBUG] Checking CUDA compatibility: toolkit=$tkMaj.$tkMin arch=sm_$CudaArch" -ForegroundColor Magenta
                $archOk = $true
                if ($CudaArch) {
                    $archOk = Test-NvccArchSupport -NvccExe $candidateNvcc -Arch $CudaArch
                    if (-not $archOk) {
                        Write-Host "   [INFO] CUDA_PATH toolkit (CUDA $tkMaj.$tkMin) does not support GPU arch sm_$CudaArch" -ForegroundColor Yellow
                        Write-Host "          Looking for a newer toolkit..." -ForegroundColor Yellow
                    }
                }
                if ($archOk) {
                    $NvccPath = $candidateNvcc
                    Write-Host "   [OK] Using existing CUDA Toolkit at CUDA_PATH (nvcc: $NvccPath)" -ForegroundColor Green
                }
            } else {
                Write-Host "   [INFO] CUDA_PATH ($existingCudaPath) has CUDA $tkMaj.$tkMin which exceeds driver max $DriverMaxCuda" -ForegroundColor Yellow
            }
        }
    }

    # --- Step 2: Fall back to scanning side-by-side installs ---
    if (-not $NvccPath) {
        $NvccPath = Find-Nvcc -MaxVersion $DriverMaxCuda
        if ($NvccPath) {
            Write-Host "   [OK] Found compatible CUDA Toolkit (nvcc: $NvccPath)" -ForegroundColor Green
            if ($existingCudaPath) {
                $selectedRoot = Split-Path (Split-Path $NvccPath -Parent) -Parent
                if ($existingCudaPath.TrimEnd('\') -ne $selectedRoot.TrimEnd('\')) {
                    Write-Host "   [INFO] Overriding CUDA_PATH from $existingCudaPath to $selectedRoot" -ForegroundColor Yellow
                }
            }
        } else {
            # Check if there's an incompatible (too new) toolkit installed
            $AnyNvcc = Find-Nvcc
            if ($AnyNvcc) {
                $NvccOut = & $AnyNvcc --version 2>&1 | Out-String
                if ($NvccOut -match "release\s+([\d]+\.[\d]+)") {
                    $IncompatibleToolkit = $Matches[1]
                }
            }
        }
    }
} else {
    $NvccPath = Find-Nvcc
}

# -- If incompatible toolkit is blocking, tell user to uninstall it --
if (-not $NvccPath -and $IncompatibleToolkit) {
    Write-Host "" -ForegroundColor Red
    Write-Host "========================================================================" -ForegroundColor Red
    Write-Host "[ERROR] CUDA Toolkit $IncompatibleToolkit is installed but INCOMPATIBLE" -ForegroundColor Red
    Write-Host "        with your NVIDIA driver (which supports up to CUDA $DriverMaxCuda)." -ForegroundColor Red
    Write-Host "" -ForegroundColor Red
    Write-Host "  This will cause 'failed to initialize CUDA' errors at runtime." -ForegroundColor Red
    Write-Host "" -ForegroundColor Red
    Write-Host "  To fix:" -ForegroundColor Yellow
    Write-Host "    1. Open Control Panel -> Programs -> Uninstall a program" -ForegroundColor Yellow
    Write-Host "    2. Uninstall 'NVIDIA CUDA Toolkit $IncompatibleToolkit'" -ForegroundColor Yellow
    Write-Host "    3. Re-run setup.bat (it will install CUDA $DriverMaxCuda automatically)" -ForegroundColor Yellow
    Write-Host "" -ForegroundColor Yellow
    Write-Host "  Alternatively, update your NVIDIA driver to one that supports CUDA $IncompatibleToolkit." -ForegroundColor Gray
    Write-Host "========================================================================" -ForegroundColor Red
    exit 1
}

# -- No toolkit at all: install via winget --
if (-not $NvccPath) {
    Write-Host "CUDA toolkit (nvcc) not found -- installing via winget..." -ForegroundColor Yellow
    $HasWinget = $null -ne (Get-Command winget -ErrorAction SilentlyContinue)
    if ($HasWinget) {
        if ($DriverMaxCuda) {
            # Query winget for available CUDA Toolkit versions
            $drMajor = [int]$DriverMaxCuda.Split('.')[0]
            $drMinor = [int]$DriverMaxCuda.Split('.')[1]
            $AvailableVersions = @()
            try {
                $rawOutput = winget show Nvidia.CUDA --versions --accept-source-agreements 2>&1 | Out-String
                # Parse version lines (e.g. "12.6", "12.5", "11.8")
                foreach ($line in $rawOutput -split "`n") {
                    $line = $line.Trim()
                    if ($line -match '^\d+\.\d+') {
                        $AvailableVersions += $line
                    }
                }
            } catch {}

            # Filter to compatible versions (<= driver max) and pick the highest
            $BestVersion = $null
            foreach ($ver in $AvailableVersions) {
                $parts = $ver.Split('.')
                $vMajor = [int]$parts[0]
                $vMinor = [int]$parts[1]
                if ($vMajor -lt $drMajor -or ($vMajor -eq $drMajor -and $vMinor -le $drMinor)) {
                    $BestVersion = $ver
                    break  # list is descending, first match is highest compatible
                }
            }

            if ($BestVersion) {
                Write-Host "   Installing CUDA Toolkit $BestVersion via winget...  " -ForegroundColor Cyan
                $prevEAPCuda = $ErrorActionPreference
                $ErrorActionPreference = "Continue"
                winget install --id=Nvidia.CUDA --version=$BestVersion -e --source winget --accept-package-agreements --accept-source-agreements 2>&1 | Out-Null
                $ErrorActionPreference = $prevEAPCuda
                Refresh-Environment
                $NvccPath = Find-Nvcc -MaxVersion $DriverMaxCuda
                if ($NvccPath) {
                    Write-Host "   [OK] CUDA Toolkit $BestVersion installed (nvcc: $NvccPath)" -ForegroundColor Green
                }
            } else {
                Write-Host "   [WARN] No compatible CUDA Toolkit version found in winget (need <= $DriverMaxCuda)" -ForegroundColor Yellow
            }
        } else {
            Write-Host "   Installing CUDA Toolkit (latest) via winget..." -ForegroundColor Cyan
            winget install --id=Nvidia.CUDA -e --source winget --accept-package-agreements --accept-source-agreements
            Refresh-Environment
            $NvccPath = Find-Nvcc
            if ($NvccPath) {
                Write-Host "   [OK] CUDA Toolkit installed (nvcc: $NvccPath)" -ForegroundColor Green
            }
        }
    }
}

if (-not $NvccPath) {
    Write-Host "[ERROR] CUDA Toolkit (nvcc) is required but could not be found or installed." -ForegroundColor Red
    if ($DriverMaxCuda) {
        Write-Host "        Install CUDA Toolkit $DriverMaxCuda from https://developer.nvidia.com/cuda-toolkit-archive" -ForegroundColor Yellow
    } else {
        Write-Host "        Install CUDA Toolkit from https://developer.nvidia.com/cuda-downloads" -ForegroundColor Yellow
    }
    exit 1
}

# -- Set CUDA env vars so cmake AND MSBuild can find the toolkit --
$CudaToolkitRoot = Split-Path (Split-Path $NvccPath -Parent) -Parent
# CUDA_PATH: used by cmake's find_package(CUDAToolkit)
[Environment]::SetEnvironmentVariable('CUDA_PATH', $CudaToolkitRoot, 'Process')
# CudaToolkitDir: the MSBuild property that CUDA .targets checks directly
# Trailing backslash required -- the .targets file appends subpaths to it
[Environment]::SetEnvironmentVariable('CudaToolkitDir', "$CudaToolkitRoot\", 'Process')
# Always persist CUDA_PATH to User registry so the compatible toolkit is used
# in future sessions (overwrites any existing value pointing to a newer, incompatible version)
[Environment]::SetEnvironmentVariable('CUDA_PATH', $CudaToolkitRoot, 'User')
Write-Host "   Persisted CUDA_PATH=$CudaToolkitRoot to user environment" -ForegroundColor Gray
# Clear all versioned CUDA_PATH_V* env vars in this process to prevent
# cmake/MSBuild from discovering a conflicting CUDA installation.
$cudaPathVars = @([Environment]::GetEnvironmentVariables('Process').Keys | Where-Object { $_ -match '^CUDA_PATH_V' })
foreach ($v in $cudaPathVars) {
    [Environment]::SetEnvironmentVariable($v, $null, 'Process')
}
# Set only the versioned var matching the selected toolkit (e.g. CUDA_PATH_V13_0)
$tkDirName = Split-Path $CudaToolkitRoot -Leaf
if ($tkDirName -match '^v(\d+)\.(\d+)') {
    $cudaPathVerVar = "CUDA_PATH_V$($Matches[1])_$($Matches[2])"
    [Environment]::SetEnvironmentVariable($cudaPathVerVar, $CudaToolkitRoot, 'Process')
    Write-Host "   Set $cudaPathVerVar (cleared other CUDA_PATH_V* vars)" -ForegroundColor Gray
}
# Ensure nvcc's bin dir is on PATH for this process
$nvccBinDir = Split-Path $NvccPath -Parent
if ($env:PATH -notlike "*$nvccBinDir*") {
    [Environment]::SetEnvironmentVariable('PATH', "$nvccBinDir;$env:PATH", 'Process')
}
# Persist nvcc bin dir to User PATH so it works in new terminals
$userPath = [Environment]::GetEnvironmentVariable('Path', 'User')
if (-not $userPath -or $userPath -notlike "*$nvccBinDir*") {
    if ($userPath) {
        [Environment]::SetEnvironmentVariable('Path', "$nvccBinDir;$userPath", 'User')
    } else {
        [Environment]::SetEnvironmentVariable('Path', "$nvccBinDir", 'User')
    }
    Write-Host "   Persisted CUDA bin dir to user PATH" -ForegroundColor Gray
}

# -- Ensure CUDA ↔ Visual Studio integration files exist --
# When CUDA is installed before VS Build Tools (or VS is reinstalled after CUDA),
# the MSBuild .targets/.props files that let VS compile .cu files are missing.
# cmake fails with "No CUDA toolset found". Fix: copy from CUDA extras dir.
if ($VsInstallPath -and $CudaToolkitRoot) {
    $vsCustomizations = Join-Path $VsInstallPath "MSBuild\Microsoft\VC\v170\BuildCustomizations"
    $cudaExtras = Join-Path $CudaToolkitRoot "extras\visual_studio_integration\MSBuildExtensions"
    if ((Test-Path $cudaExtras) -and (Test-Path $vsCustomizations)) {
        $hasTargets = Get-ChildItem $vsCustomizations -Filter "CUDA *.targets" -ErrorAction SilentlyContinue
        if (-not $hasTargets) {
            Write-Host "   [INFO] CUDA VS integration missing -- copying .targets files..." -ForegroundColor Yellow
            try {
                Copy-Item "$cudaExtras\*" $vsCustomizations -Force -ErrorAction Stop
                Write-Host "   [OK] CUDA VS integration files installed" -ForegroundColor Green
            } catch {
                # Direct copy failed (needs admin). Try elevated copy via Start-Process.
                try {
                    $copyCmd = "Copy-Item '$cudaExtras\*' '$vsCustomizations' -Force"
                    Start-Process powershell -ArgumentList "-NoProfile -Command $copyCmd" -Verb RunAs -Wait -ErrorAction Stop
                    $hasTargetsRetry = Get-ChildItem $vsCustomizations -Filter "CUDA *.targets" -ErrorAction SilentlyContinue
                    if ($hasTargetsRetry) {
                        Write-Host "   [OK] CUDA VS integration files installed (elevated)" -ForegroundColor Green
                    } else {
                        throw "Copy did not produce .targets files"
                    }
                } catch {
                    Write-Host "   [WARN] Could not copy CUDA VS integration files" -ForegroundColor Yellow
                    Write-Host "          The llama.cpp build may fail with 'No CUDA toolset found'." -ForegroundColor Yellow
                    Write-Host "          Manual fix: copy contents of" -ForegroundColor Yellow
                    Write-Host "            $cudaExtras" -ForegroundColor Cyan
                    Write-Host "          into:" -ForegroundColor Yellow
                    Write-Host "            $vsCustomizations" -ForegroundColor Cyan
                }
            }
        }
    }
}

Write-Host "[OK] CUDA Toolkit: $NvccPath" -ForegroundColor Green
Write-Host "   CUDA_PATH      = $CudaToolkitRoot" -ForegroundColor Gray
Write-Host "   CudaToolkitDir = $CudaToolkitRoot\" -ForegroundColor Gray

# $CudaArch was detected earlier (before toolkit selection) so it could
# influence which toolkit we picked.  Just log the final state here.
if (-not $CudaArch) {
    Write-Host "   [WARN] Could not detect compute capability -- cmake will use defaults" -ForegroundColor Yellow
}
} else {
    Write-Host "[SKIP] CUDA Toolkit -- no NVIDIA GPU detected" -ForegroundColor Yellow
}

# ============================================
# 1f. Node.js / npm (skip if pip-installed -- only needed for frontend build)
# ============================================
if ($IsPipInstall) {
    Write-Host "[OK] Running from pip install - frontend already bundled, skipping Node/npm check" -ForegroundColor Green
} else {
    # setup.sh installs Node LTS (v22) via nvm. We enforce the same range here:
    # Node >= 20, npm >= 11.
    $NeedNode = $true
    try {
        $NodeVersion = (node -v 2>$null)
        $NpmVersion = (npm -v 2>$null)
        if ($NodeVersion -and $NpmVersion) {
            $NodeMajor = [int]($NodeVersion -replace 'v','').Split('.')[0]
            $NpmMajor = [int]$NpmVersion.Split('.')[0]

            if ($NodeMajor -ge 20 -and $NpmMajor -ge 11) {
                Write-Host "[OK] Node $NodeVersion and npm $NpmVersion already meet requirements." -ForegroundColor Green
                $NeedNode = $false
            } else {
                Write-Host "[WARN] Node $NodeVersion / npm $NpmVersion too old." -ForegroundColor Yellow
            }
        }
    } catch {
        Write-Host "[WARN] Node/npm not found." -ForegroundColor Yellow
    }

    if ($NeedNode) {
        Write-Host "Installing Node.js LTS via winget..." -ForegroundColor Cyan
        try {
            winget install OpenJS.NodeJS.LTS --source winget --accept-package-agreements --accept-source-agreements
            Refresh-Environment
        } catch {
            Write-Host "[ERROR] Could not install Node.js automatically." -ForegroundColor Red
            Write-Host "Please install Node.js >= 20 from https://nodejs.org/" -ForegroundColor Red
            exit 1
        }
    }

    Write-Host "[OK] Node $(node -v) | npm $(npm -v)" -ForegroundColor Green
}

# ============================================
# 1g. Python (>= 3.11 and < 3.14, matching setup.sh)
# ============================================
$HasPython = $null -ne (Get-Command python -ErrorAction SilentlyContinue)
$PythonOk = $false

if ($HasPython) {
    $PyVer = python --version 2>&1
    if ($PyVer -match "(\d+)\.(\d+)") {
        $PyMajor = [int]$Matches[1]; $PyMinor = [int]$Matches[2]
        if ($PyMajor -eq 3 -and $PyMinor -ge 11 -and $PyMinor -lt 14) {
            Write-Host "[OK] Python $PyVer" -ForegroundColor Green
            $PythonOk = $true
        } else {
            Write-Host "[ERROR] Python $PyVer is outside supported range (need >= 3.11 and < 3.14)." -ForegroundColor Red
            Write-Host "        Install Python 3.12 from https://python.org/downloads/" -ForegroundColor Yellow
            exit 1
        }
    }
} else {
    # No Python at all -- install 3.12
    Write-Host "Python not found -- installing Python 3.12 via winget..." -ForegroundColor Yellow
    $HasWinget = $null -ne (Get-Command winget -ErrorAction SilentlyContinue)
    if ($HasWinget) {
        winget install -e --id Python.Python.3.12 --source winget --accept-package-agreements --accept-source-agreements
        Refresh-Environment
    }
    $HasPython = $null -ne (Get-Command python -ErrorAction SilentlyContinue)
    if (-not $HasPython) {
        Write-Host "[ERROR] Python could not be installed automatically." -ForegroundColor Red
        Write-Host "        Install Python 3.12 from https://python.org/downloads/" -ForegroundColor Yellow
        exit 1
    }
    Write-Host "[OK] Python $(python --version)" -ForegroundColor Green
    $PythonOk = $true
}

# Ensure Python Scripts dir is on PATH (so 'unsloth' command works in new terminals)
$ScriptsDir = python -c "import sysconfig; print(sysconfig.get_path('scripts', 'nt_user') if __import__('os').path.exists(sysconfig.get_path('scripts', 'nt_user')) else sysconfig.get_path('scripts'))"
if ($LASTEXITCODE -eq 0 -and $ScriptsDir -and (Test-Path $ScriptsDir)) {
    $UserPath = [Environment]::GetEnvironmentVariable('Path', 'User')
    $UserPathEntries = if ($UserPath) { $UserPath.Split(';') } else { @() }
    if (-not ($UserPathEntries | Where-Object { $_.TrimEnd('\') -eq $ScriptsDir })) {
        $newUserPath = if ($UserPath) { "$ScriptsDir;$UserPath" } else { $ScriptsDir }
        [Environment]::SetEnvironmentVariable('Path', $newUserPath, 'User')

        # Also add to current process so it's available immediately
        $ProcessPathEntries = $env:PATH.Split(';')
        if (-not ($ProcessPathEntries | Where-Object { $_.TrimEnd('\') -eq $ScriptsDir })) {
            $env:PATH = "$ScriptsDir;$env:PATH"
        }
        Write-Host "   Persisted Python Scripts dir to user PATH: $ScriptsDir" -ForegroundColor Gray
    }
}

Write-Host ""
Write-Host "--- System prerequisites ready ---" -ForegroundColor Green
Write-Host ""

# ==========================================================================
#  PHASE 2: Frontend build (skip if pip-installed -- already bundled)
# ==========================================================================
$DistDir = Join-Path $FrontendDir "dist"
# Skip build if dist/ exists and no tracked input is newer than dist/.
# Checks src/, public/, package.json, config files -- not just src/.
$NeedFrontendBuild = $true
if ($IsPipInstall) {
    $NeedFrontendBuild = $false
    Write-Host "[OK] Running from pip install - frontend already bundled, skipping build" -ForegroundColor Green
} elseif (Test-Path $DistDir) {
    $DistTime = (Get-Item $DistDir).LastWriteTime
    $NewerFile = $null
    # Check src/ and public/ recursively (probe paths directly, not via -Include)
    foreach ($subDir in @("src", "public")) {
        $subPath = Join-Path $FrontendDir $subDir
        if (Test-Path $subPath) {
            $NewerFile = Get-ChildItem -Path $subPath -Recurse -File -ErrorAction SilentlyContinue |
                Where-Object { $_.LastWriteTime -gt $DistTime } | Select-Object -First 1
            if ($NewerFile) { break }
        }
    }
    # Also check all top-level files (package.json, bun.lock, vite.config.ts, index.html, etc.)
    if (-not $NewerFile) {
        $NewerFile = Get-ChildItem -Path $FrontendDir -File -ErrorAction SilentlyContinue |
            Where-Object { $_.LastWriteTime -gt $DistTime } |
            Select-Object -First 1
    }
    if (-not $NewerFile) {
        $NeedFrontendBuild = $false
        Write-Host "[OK] Frontend already built and up to date -- skipping build" -ForegroundColor Green
    } else {
        Write-Host "[INFO] Frontend source changed since last build -- rebuilding..." -ForegroundColor Yellow
    }
}
if ($NeedFrontendBuild -and -not $IsPipInstall) {
    Write-Host ""
    Write-Host "Building frontend..." -ForegroundColor Cyan

    # ── Tailwind v4 .gitignore workaround ──
    # Tailwind v4's oxide scanner respects .gitignore in parent directories.
    # Python venvs create a .gitignore with "*" (ignore everything), which
    # prevents Tailwind from scanning .tsx source files for class names.
    # Temporarily hide any such .gitignore during the build, then restore it.
    $HiddenGitignores = @()
    $WalkDir = (Get-Item $FrontendDir).Parent.FullName
    while ($WalkDir -and $WalkDir -ne [System.IO.Path]::GetPathRoot($WalkDir)) {
        $gi = Join-Path $WalkDir ".gitignore"
        if (Test-Path $gi) {
            $content = Get-Content $gi -Raw -ErrorAction SilentlyContinue
            if ($content -and ($content.Trim() -match '^\*$')) {
                $hidden = "$gi._twbuild"
                Rename-Item -Path $gi -NewName (Split-Path $hidden -Leaf) -Force
                $HiddenGitignores += $gi
                Write-Host "   [INFO] Temporarily hiding $gi (venv .gitignore blocks Tailwind scanner)" -ForegroundColor DarkGray
            }
        }
        $WalkDir = Split-Path $WalkDir -Parent
    }

    # npm writes warnings to stderr; lower ErrorActionPreference so PS doesn't
    # treat them as terminating errors (same pattern as the pip section below).
    $prevEAP_npm = $ErrorActionPreference
    $ErrorActionPreference = "Continue"
    Push-Location $FrontendDir
    npm install 2>&1 | Out-Null
    if ($LASTEXITCODE -ne 0) {
        Pop-Location
        $ErrorActionPreference = $prevEAP_npm
        foreach ($gi in $HiddenGitignores) { Rename-Item -Path "$gi._twbuild" -NewName (Split-Path $gi -Leaf) -Force -ErrorAction SilentlyContinue }
        Write-Host "[ERROR] npm install failed (exit code $LASTEXITCODE)" -ForegroundColor Red
        Write-Host "   Try running 'npm install' manually in frontend/ to see errors" -ForegroundColor Yellow
        exit 1
    }
    npm run build 2>&1 | Out-Null
    if ($LASTEXITCODE -ne 0) {
        Pop-Location
        $ErrorActionPreference = $prevEAP_npm
        foreach ($gi in $HiddenGitignores) { Rename-Item -Path "$gi._twbuild" -NewName (Split-Path $gi -Leaf) -Force -ErrorAction SilentlyContinue }
        Write-Host "[ERROR] npm run build failed (exit code $LASTEXITCODE)" -ForegroundColor Red
        exit 1
    }
    Pop-Location
    $ErrorActionPreference = $prevEAP_npm

    # ── Restore hidden .gitignore files ──
    foreach ($gi in $HiddenGitignores) {
        Rename-Item -Path "$gi._twbuild" -NewName (Split-Path $gi -Leaf) -Force -ErrorAction SilentlyContinue
    }

    # ── Validate CSS output ──
    $CssFiles = Get-ChildItem (Join-Path $DistDir "assets") -Filter "*.css" -ErrorAction SilentlyContinue
    $MaxCssSize = ($CssFiles | Measure-Object -Property Length -Maximum).Maximum
    if ($MaxCssSize -lt 100000) {
        Write-Host "[WARN] Largest CSS file is only $([math]::Round($MaxCssSize / 1024))KB -- Tailwind may not have scanned all source files." -ForegroundColor Yellow
        Write-Host "       Expected >100KB. Check for .gitignore files blocking the Tailwind oxide scanner." -ForegroundColor Yellow
    } else {
        Write-Host "[OK] Frontend built to frontend/dist (CSS: $([math]::Round($MaxCssSize / 1024))KB)" -ForegroundColor Green
    }
}

if (Test-Path $OxcValidatorDir) {
    Write-Host "Installing OXC validator runtime..." -ForegroundColor Cyan
    $prevEAP_oxc = $ErrorActionPreference
    $ErrorActionPreference = "Continue"
    Push-Location $OxcValidatorDir
    npm install 2>&1 | Out-Null
    if ($LASTEXITCODE -ne 0) {
        Pop-Location
        $ErrorActionPreference = $prevEAP_oxc
        Write-Host "[ERROR] OXC validator npm install failed (exit code $LASTEXITCODE)" -ForegroundColor Red
        exit 1
    }
    Pop-Location
    $ErrorActionPreference = $prevEAP_oxc
    Write-Host "[OK] OXC validator runtime installed" -ForegroundColor Green
}

# ==========================================================================
#  PHASE 3: Python environment + dependencies
# ==========================================================================
Write-Host ""
Write-Host "Setting up Python environment..." -ForegroundColor Cyan

# Find Python
$PythonCmd = $null
foreach ($candidate in @("python3.13", "python3.12", "python3.11", "python3", "python")) {
    try {
        $ver = & $candidate --version 2>&1
        if ($ver -match 'Python 3\.(\d+)') {
            $minor = [int]$Matches[1]
            if ($minor -ge 11 -and $minor -le 13) {
                $PythonCmd = $candidate
                break
            }
        }
    } catch { }
}

if (-not $PythonCmd) {
    Write-Host "[ERROR] No Python 3.11-3.13 found." -ForegroundColor Red
    exit 1
}

Write-Host "[OK] Using $PythonCmd ($(& $PythonCmd --version 2>&1))" -ForegroundColor Green

# Always create a .venv for isolation -- even for pip installs.
# Created in the repo root (parent of studio/).
$VenvDir = Join-Path $env:USERPROFILE ".unsloth\studio\.venv"
if (-not (Test-Path $VenvDir)) {
    Write-Host "   Creating virtual environment at $VenvDir..." -ForegroundColor Cyan
    & $PythonCmd -m venv $VenvDir
} else {
    Write-Host "   Reusing existing virtual environment at $VenvDir" -ForegroundColor Green
}

# pip and python write to stderr even on success (progress bars, warnings).
# With $ErrorActionPreference = "Stop" (set at top of script), PS 5.1
# converts stderr lines into terminating ErrorRecords, breaking output.
# Lower to "Continue" for the pip/python section.
$prevEAP = $ErrorActionPreference
$ErrorActionPreference = "Continue"

$ActivateScript = Join-Path $VenvDir "Scripts\Activate.ps1"
. $ActivateScript

# Try to use uv (much faster than pip), fall back to pip if unavailable
$UseUv = $false
if (Get-Command uv -ErrorAction SilentlyContinue) {
    $UseUv = $true
} else {
    Write-Host "   Installing uv package manager..." -ForegroundColor Cyan
    try {
        powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex" 2>&1 | Out-Null
        Refresh-Environment
        # Re-activate venv since Refresh-Environment rebuilds PATH from
        # registry and drops the venv's Scripts directory
        . $ActivateScript
        if (Get-Command uv -ErrorAction SilentlyContinue) { $UseUv = $true }
    } catch { }
}

# Helper: install a package, preferring uv with pip fallback
function Fast-Install {
    param([Parameter(ValueFromRemainingArguments=$true)]$Args_)
    if ($UseUv) {
        $VenvPy = (Get-Command python).Source
        $result = & uv pip install --python $VenvPy @Args_ 2>&1
        if ($LASTEXITCODE -eq 0) { return }
    }
    & python -m pip install @Args_ 2>&1
}

Fast-Install --upgrade pip | Out-Null

# if (-not $IsPipInstall) {
#     # Running from repo: copy requirements and do editable install
#     $RepoRoot = (Resolve-Path (Join-Path $ScriptDir "..\..")).Path
#     $ReqsSrc = Join-Path $RepoRoot "backend\requirements"
#     $ReqsDst = Join-Path $PackageDir "requirements"
#     if (-not (Test-Path $ReqsDst)) { New-Item -ItemType Directory -Path $ReqsDst | Out-Null }
#     Copy-Item (Join-Path $ReqsSrc "*.txt") $ReqsDst -Force

#     Write-Host "   Installing CLI entry point..." -ForegroundColor Cyan
#     pip install -e $RepoRoot 2>&1 | Out-Null
# } else {
#     # Running from pip install: the package is in system Python but not in
#     # the fresh .venv. Install it so run_install() can find its modules
#     # and bundled requirements files.
#     Write-Host "   Installing package into venv..." -ForegroundColor Cyan
#     pip install unsloth-roland-test 2>&1 | Out-Null
# }

# Pre-install PyTorch with CUDA support.
# On Windows, the default PyPI torch wheel is CPU-only.
# We need PyTorch's CUDA index to get GPU-enabled wheels.
# PyTorch bundles its own CUDA runtime, so this works regardless
# of whether the CUDA Toolkit is installed yet.
# The CUDA tag is chosen based on the driver's max supported CUDA version.

# Windows MAX_PATH (260 chars) causes Triton kernel compilation to fail because
# the auto-generated filenames are extremely long. Use a short cache directory.
$TorchCacheDir = "C:\tc"
if (-not (Test-Path $TorchCacheDir)) { New-Item -ItemType Directory -Path $TorchCacheDir -Force | Out-Null }
$env:TORCHINDUCTOR_CACHE_DIR = $TorchCacheDir
[Environment]::SetEnvironmentVariable('TORCHINDUCTOR_CACHE_DIR', $TorchCacheDir, 'User')
Write-Host "[OK] TORCHINDUCTOR_CACHE_DIR set to $TorchCacheDir (avoids MAX_PATH issues)" -ForegroundColor Green

if ($HasNvidiaSmi) {
    $CuTag = Get-PytorchCudaTag
    Write-Host "   Installing PyTorch with CUDA support ($CuTag)..." -ForegroundColor Cyan
    Write-Host "   (This download is ~2.8 GB -- may take a few minutes)" -ForegroundColor Gray
    $output = Fast-Install torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/$CuTag" | Out-String
    if ($LASTEXITCODE -ne 0) {
        Write-Host "[FAILED] PyTorch CUDA install failed (exit code $LASTEXITCODE)" -ForegroundColor Red
        Write-Host $output -ForegroundColor Red
        exit 1
    }

    # Install Triton for Windows (enables torch.compile -- without it training can hang)
    Write-Host "   Installing Triton for Windows..." -ForegroundColor Cyan
    $output = Fast-Install "triton-windows<3.7" | Out-String
    if ($LASTEXITCODE -ne 0) {
        Write-Host "[WARN] Triton install failed -- torch.compile may not work" -ForegroundColor Yellow
        Write-Host $output -ForegroundColor Yellow
    } else {
        Write-Host "[OK] Triton for Windows installed (enables torch.compile)" -ForegroundColor Green
    }
} else {
    Write-Host "   Installing PyTorch (CPU-only)..." -ForegroundColor Cyan
    $output = Fast-Install torch torchvision torchaudio --index-url "https://download.pytorch.org/whl/cpu" | Out-String
    if ($LASTEXITCODE -ne 0) {
        Write-Host "[FAILED] PyTorch install failed (exit code $LASTEXITCODE)" -ForegroundColor Red
        Write-Host $output -ForegroundColor Red
        exit 1
    }
}

# Ordered heavy dependency installation -- shared cross-platform script
Write-Host "   Running ordered dependency installation..." -ForegroundColor Cyan
python "$PSScriptRoot\install_python_stack.py"
# Restore ErrorActionPreference after pip/python work
$ErrorActionPreference = $prevEAP

# ── Pre-install transformers 5.x into .venv_t5/ ──
# Models like GLM-4.7-Flash need transformers>=5.3.0. Instead of pip-installing
# at runtime (slow, ~10-15s), we pre-install into a separate directory.
# The training subprocess just prepends .venv_t5/ to sys.path -- instant switch.
Write-Host ""
Write-Host "   Pre-installing transformers 5.x for newer model support..." -ForegroundColor Cyan
$VenvT5Dir = Join-Path $env:USERPROFILE ".unsloth\studio\.venv_t5"
if (Test-Path $VenvT5Dir) { Remove-Item -Recurse -Force $VenvT5Dir }
New-Item -ItemType Directory -Path $VenvT5Dir -Force | Out-Null
$prevEAP_t5 = $ErrorActionPreference
$ErrorActionPreference = "Continue"
foreach ($pkg in @("transformers==5.3.0", "huggingface_hub==1.7.1", "hf_xet==1.4.2")) {
    $output = Fast-Install --target $VenvT5Dir --no-deps $pkg | Out-String
    if ($LASTEXITCODE -ne 0) {
        Write-Host "[FAIL] Could not install $pkg into .venv_t5/" -ForegroundColor Red
        Write-Host $output -ForegroundColor Red
        $ErrorActionPreference = $prevEAP_t5
        exit 1
    }
}
# tiktoken is needed by Qwen-family tokenizers -- install with deps since
# regex/requests may be missing on Windows
$output = Fast-Install --target $VenvT5Dir tiktoken | Out-String
if ($LASTEXITCODE -ne 0) {
    Write-Host "[WARN] Could not install tiktoken into .venv_t5/ -- Qwen tokenizers may fail" -ForegroundColor Yellow
}
$ErrorActionPreference = $prevEAP_t5
Write-Host "[OK] Transformers 5.x pre-installed to .venv_t5/" -ForegroundColor Green

# ==========================================================================
#  PHASE 3.5: Install OpenSSL dev (for HTTPS support in llama-server)
# ==========================================================================
# llama-server needs OpenSSL to download models from HuggingFace via -hf.
# ShiningLight.OpenSSL.Dev includes headers + libs that cmake can find.
$OpenSslAvailable = $false

# Check if OpenSSL dev is already installed (look for include dir)
$OpenSslRoots = @(
    'C:\Program Files\OpenSSL-Win64',
    'C:\Program Files\OpenSSL',
    'C:\OpenSSL-Win64'
)
$OpenSslRoot = $null
foreach ($root in $OpenSslRoots) {
    if (Test-Path (Join-Path $root 'include\openssl\ssl.h')) {
        $OpenSslRoot = $root
        break
    }
}

if ($OpenSslRoot) {
    $OpenSslAvailable = $true
    Write-Host "[OK] OpenSSL dev found at $OpenSslRoot" -ForegroundColor Green
} else {
    Write-Host "" 
    Write-Host "Installing OpenSSL dev (for HTTPS in llama-server)..." -ForegroundColor Cyan
    $HasWinget = $null -ne (Get-Command winget -ErrorAction SilentlyContinue)
    if ($HasWinget) {
        winget install -e --id ShiningLight.OpenSSL.Dev --accept-package-agreements --accept-source-agreements
        # Re-check after install
        foreach ($root in $OpenSslRoots) {
            if (Test-Path (Join-Path $root 'include\openssl\ssl.h')) {
                $OpenSslRoot = $root
                $OpenSslAvailable = $true
                Write-Host "[OK] OpenSSL dev installed at $OpenSslRoot" -ForegroundColor Green
                break
            }
        }
    }
    if (-not $OpenSslAvailable) {
        Write-Host "[WARN] OpenSSL dev not available -- llama-server will be built without HTTPS" -ForegroundColor Yellow
    }
}

# ==========================================================================
#  PHASE 4: Build llama.cpp with CUDA for GGUF inference + export
# ==========================================================================
# Builds at ~/.unsloth/llama.cpp — a single shared location under the user's
# home directory. This is used by both the inference server and the GGUF
# export pipeline (unsloth-zoo).
# We build:
#   - llama-server:   for GGUF model inference (with HTTPS if OpenSSL available)
#   - llama-quantize: for GGUF export quantization
# Prerequisites (git, cmake, VS Build Tools, CUDA Toolkit) already installed in Phase 1.
$UnslothHome = Join-Path $env:USERPROFILE ".unsloth"
if (-not (Test-Path $UnslothHome)) { New-Item -ItemType Directory -Force $UnslothHome | Out-Null }
$LlamaCppDir = Join-Path $UnslothHome "llama.cpp"
$BuildDir = Join-Path $LlamaCppDir "build"
$LlamaServerBin = Join-Path $BuildDir "bin\Release\llama-server.exe"

$HasCmakeForBuild = $null -ne (Get-Command cmake -ErrorAction SilentlyContinue)

# Check if existing llama-server matches current GPU mode. A CUDA-built binary
# on a now-CPU-only machine (or vice versa) needs to be rebuilt.
$NeedRebuild = $false
if (Test-Path $LlamaServerBin) {
    $CmakeCacheFile = Join-Path $BuildDir "CMakeCache.txt"
    if (Test-Path $CmakeCacheFile) {
        $cachedCuda = Select-String -Path $CmakeCacheFile -Pattern 'GGML_CUDA:BOOL=ON' -Quiet
        if ($HasNvidiaSmi -and -not $cachedCuda) {
            Write-Host "   Existing llama-server is CPU-only but GPU is available -- rebuilding" -ForegroundColor Yellow
            $NeedRebuild = $true
        } elseif (-not $HasNvidiaSmi -and $cachedCuda) {
            Write-Host "   Existing llama-server was built with CUDA but no GPU detected -- rebuilding" -ForegroundColor Yellow
            $NeedRebuild = $true
        }
    }
}

if ((Test-Path $LlamaServerBin) -and -not $NeedRebuild) {
    Write-Host ""
    Write-Host "[OK] llama-server already exists at $LlamaServerBin" -ForegroundColor Green
} elseif (-not $HasCmakeForBuild) {
    Write-Host ""
    if (-not $HasNvidiaSmi) {
        # CPU-only machines depend entirely on llama-server for GGUF chat -- cmake is required
        Write-Host "[ERROR] CMake is required to build llama-server for GGUF chat mode." -ForegroundColor Red
        Write-Host "        Install CMake from https://cmake.org/download/ and re-run setup." -ForegroundColor Yellow
        exit 1
    }
    Write-Host "[SKIP] llama-server build -- cmake not available" -ForegroundColor Yellow
    Write-Host "       GGUF inference and export will not be available." -ForegroundColor Yellow
    Write-Host "       Install CMake from https://cmake.org/download/ and re-run setup." -ForegroundColor Yellow
} else {
    Write-Host ""
    if ($HasNvidiaSmi) {
        Write-Host "Building llama.cpp with CUDA support..." -ForegroundColor Cyan
    } else {
        Write-Host "Building llama.cpp (CPU-only, no NVIDIA GPU detected)..." -ForegroundColor Cyan
    }
    Write-Host "   This typically takes 5-10 minutes on first build." -ForegroundColor Gray
    Write-Host ""

    # Start total build timer
    $totalSw = [System.Diagnostics.Stopwatch]::StartNew()

    # Native commands (git, cmake) write to stderr even on success.
    # With $ErrorActionPreference = "Stop" (set at top of script), PS 5.1
    # converts stderr lines into terminating ErrorRecords, breaking output.
    # Lower to "Continue" for the build section.
    $prevEAP = $ErrorActionPreference
    $ErrorActionPreference = "Continue"

    $BuildOk = $true
    $FailedStep = ""

    # Re-sanitize CUDA_PATH_V* vars — Refresh-Environment (called during
    # Node/Python installs above) may have repopulated conflicting versioned
    # vars from the Machine registry.
    if ($HasNvidiaSmi -and $CudaToolkitRoot) {
        $cudaPathVars2 = @([Environment]::GetEnvironmentVariables('Process').Keys | Where-Object { $_ -match '^CUDA_PATH_V' })
        foreach ($v2 in $cudaPathVars2) {
            [Environment]::SetEnvironmentVariable($v2, $null, 'Process')
        }
        $tkDirName2 = Split-Path $CudaToolkitRoot -Leaf
        if ($tkDirName2 -match '^v(\d+)\.(\d+)') {
            [Environment]::SetEnvironmentVariable("CUDA_PATH_V$($Matches[1])_$($Matches[2])", $CudaToolkitRoot, 'Process')
        }
        # Also re-assert CUDA_PATH and CudaToolkitDir in case they were overwritten
        [Environment]::SetEnvironmentVariable('CUDA_PATH', $CudaToolkitRoot, 'Process')
        [Environment]::SetEnvironmentVariable('CudaToolkitDir', "$CudaToolkitRoot\", 'Process')
    }

    # -- Step A: Clone or pull llama.cpp --

    if (Test-Path (Join-Path $LlamaCppDir ".git")) {
        Write-Host "   llama.cpp repo already cloned, pulling latest..." -ForegroundColor Gray
        git -C $LlamaCppDir pull 2>&1 | Out-Null
        if ($LASTEXITCODE -ne 0) {
            Write-Host "   [WARN] git pull failed -- using existing source" -ForegroundColor Yellow
        }
    } else {
        Write-Host "   Cloning llama.cpp..." -ForegroundColor Gray
        if (Test-Path $LlamaCppDir) { Remove-Item -Recurse -Force $LlamaCppDir }
        git clone --depth 1 https://github.com/ggml-org/llama.cpp.git $LlamaCppDir 2>&1 | Out-Null
        if ($LASTEXITCODE -ne 0) {
            $BuildOk = $false
            $FailedStep = "git clone"
        }
    }

    # -- Step B: cmake configure --
    # Clean stale CMake cache to prevent previous CUDA settings from leaking
    # into a CPU-only rebuild (or vice versa).
    $CmakeCacheFile = Join-Path $BuildDir "CMakeCache.txt"
    if (Test-Path $CmakeCacheFile) {
        Remove-Item -Recurse -Force $BuildDir
    }

    if ($BuildOk) {
        Write-Host ""
        Write-Host "--- cmake configure ---" -ForegroundColor Cyan

        $CmakeArgs = @(
            '-S', $LlamaCppDir,
            '-B', $BuildDir,
            '-G', $CmakeGenerator,
            '-Wno-dev'
        )
        # Tell cmake exactly where VS is (bypasses registry lookup)
        if ($VsInstallPath) {
            $CmakeArgs += "-DCMAKE_GENERATOR_INSTANCE=$VsInstallPath"
        }
        # Common flags
        $CmakeArgs += '-DBUILD_SHARED_LIBS=OFF'
        $CmakeArgs += '-DLLAMA_BUILD_TESTS=OFF'
        $CmakeArgs += '-DLLAMA_BUILD_EXAMPLES=OFF'
        $CmakeArgs += '-DLLAMA_BUILD_SERVER=ON'
        $CmakeArgs += '-DGGML_NATIVE=ON'
        # HTTPS support via OpenSSL
        if ($OpenSslAvailable -and $OpenSslRoot) {
            $CmakeArgs += "-DOPENSSL_ROOT_DIR=$OpenSslRoot"
            $CmakeArgs += '-DLLAMA_OPENSSL=ON'
        } else {
            $CmakeArgs += '-DLLAMA_CURL=OFF'
        }
        $CmakeArgs += '-DCMAKE_EXE_LINKER_FLAGS=/NODEFAULTLIB:LIBCMT'
        # CUDA flags -- only if GPU available, otherwise explicitly disable
        if ($HasNvidiaSmi -and $NvccPath) {
            $CmakeArgs += '-DGGML_CUDA=ON'
            $CmakeArgs += "-DCUDAToolkit_ROOT=$CudaToolkitRoot"
            $CmakeArgs += "-DCUDA_TOOLKIT_ROOT_DIR=$CudaToolkitRoot"
            $CmakeArgs += "-DCMAKE_CUDA_COMPILER=$NvccPath"
            if ($CudaArch) {
                # Validate nvcc actually supports this architecture
                if (Test-NvccArchSupport -NvccExe $NvccPath -Arch $CudaArch) {
                    $CmakeArgs += "-DCMAKE_CUDA_ARCHITECTURES=$CudaArch"
                } else {
                    # GPU arch too new for this toolkit -- fall back to highest supported.
                    # PTX forward-compatibility will JIT-compile for the actual GPU at runtime.
                    $maxArch = Get-NvccMaxArch -NvccExe $NvccPath
                    if ($maxArch) {
                        $CmakeArgs += "-DCMAKE_CUDA_ARCHITECTURES=$maxArch"
                        Write-Host "   [WARN] GPU is sm_$CudaArch but nvcc only supports up to sm_$maxArch" -ForegroundColor Yellow
                        Write-Host "          Building with sm_$maxArch (PTX will JIT for your GPU at runtime)" -ForegroundColor Yellow
                    }
                    # else: omit flag entirely, let cmake pick defaults
                }
            }
        } else {
            $CmakeArgs += '-DGGML_CUDA=OFF'
        }

        $cmakeOutput = cmake @CmakeArgs 2>&1 | Out-String
        if ($LASTEXITCODE -ne 0) {
            $BuildOk = $false
            $FailedStep = "cmake configure"
            Write-Host $cmakeOutput -ForegroundColor Red
            if ($cmakeOutput -match 'No CUDA toolset found|CUDA_TOOLKIT_ROOT_DIR|nvcc') {
                Write-Host ""
                Write-Host "   Hint: CUDA VS integration may be missing. Try running as admin:" -ForegroundColor Yellow
                Write-Host "   Copy contents of:" -ForegroundColor Yellow
                Write-Host "     <CUDA_PATH>\extras\visual_studio_integration\MSBuildExtensions" -ForegroundColor Yellow
                Write-Host "   into:" -ForegroundColor Yellow
                Write-Host "     <VS_PATH>\MSBuild\Microsoft\VC\v170\BuildCustomizations" -ForegroundColor Yellow
            }
        }
    }

    # -- Step C: Build llama-server --
    $NumCpu = [Environment]::ProcessorCount
    if ($NumCpu -lt 1) { $NumCpu = 4 }

    if ($BuildOk) {
        Write-Host ""
        Write-Host "--- cmake build (llama-server) ---" -ForegroundColor Cyan
        Write-Host "   Parallel jobs: $NumCpu" -ForegroundColor Gray
        Write-Host ""

        $output = cmake --build $BuildDir --config Release --target llama-server -j $NumCpu 2>&1 | Out-String
        if ($LASTEXITCODE -ne 0) {
            $BuildOk = $false
            $FailedStep = "cmake build (llama-server)"
            Write-Host $output -ForegroundColor Red
        }
    }

    # -- Step D: Build llama-quantize (optional, best-effort) --
    if ($BuildOk) {
        Write-Host ""
        Write-Host "--- cmake build (llama-quantize) ---" -ForegroundColor Cyan
        $output = cmake --build $BuildDir --config Release --target llama-quantize -j $NumCpu 2>&1 | Out-String
        if ($LASTEXITCODE -ne 0) {
            Write-Host "   [WARN] llama-quantize build failed (GGUF export may be unavailable)" -ForegroundColor Yellow
            Write-Host $output -ForegroundColor Yellow
        }
    }

    # Restore ErrorActionPreference
    $ErrorActionPreference = $prevEAP

    # Stop timer
    $totalSw.Stop()
    $totalMin = [math]::Floor($totalSw.Elapsed.TotalMinutes)
    $totalSec = [math]::Round($totalSw.Elapsed.TotalSeconds % 60, 1)

    # -- Summary --
    Write-Host ""
    if ($BuildOk -and (Test-Path $LlamaServerBin)) {
        Write-Host "[OK] llama-server built at $LlamaServerBin" -ForegroundColor Green
        $QuantizeBin = Join-Path $BuildDir "bin\Release\llama-quantize.exe"
        if (Test-Path $QuantizeBin) {
            Write-Host "[OK] llama-quantize available for GGUF export" -ForegroundColor Green
        }
        Write-Host "   Build time: ${totalMin}m ${totalSec}s" -ForegroundColor Cyan
    } else {
        # Check alternate paths (some cmake generators don't use Release subdir)
        $altBin = Join-Path $BuildDir "bin\llama-server.exe"
        if ($BuildOk -and (Test-Path $altBin)) {
            Write-Host "[OK] llama-server built at $altBin" -ForegroundColor Green
            Write-Host "   Build time: ${totalMin}m ${totalSec}s" -ForegroundColor Cyan
        } else {
            Write-Host "[FAILED] llama.cpp build failed at step: $FailedStep (${totalMin}m ${totalSec}s)" -ForegroundColor Red
            Write-Host "         To retry: delete $LlamaCppDir and re-run setup." -ForegroundColor Yellow
            exit 1
        }
    }
}

# ============================================
# Done
# ============================================
Write-Host ""
Write-Host "+===============================================+" -ForegroundColor Green
Write-Host "|           Setup Complete!                     |" -ForegroundColor Green
Write-Host "|                                               |" -ForegroundColor Green
Write-Host "|  Launch with:                                 |" -ForegroundColor Green
Write-Host "|    unsloth studio -H 0.0.0.0 -p 8888          |" -ForegroundColor Green
Write-Host "|                                               |" -ForegroundColor Green
Write-Host "+===============================================+" -ForegroundColor Green


================================================
FILE: studio/setup.sh
================================================
#!/usr/bin/env bash
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"

# ── Helper: run command quietly, show output only on failure ──
run_quiet() {
    local label="$1"
    shift
    local tmplog
    tmplog=$(mktemp)
    if "$@" > "$tmplog" 2>&1; then
        rm -f "$tmplog"
    else
        local exit_code=$?
        echo "❌ $label failed (exit code $exit_code):"
        cat "$tmplog"
        rm -f "$tmplog"
        exit $exit_code
    fi
}

echo "╔══════════════════════════════════════╗"
echo "║     Unsloth Studio Setup Script      ║"
echo "╚══════════════════════════════════════╝"

# ── Clean up stale Unsloth compiled caches ──
rm -rf "$REPO_ROOT/unsloth_compiled_cache"
rm -rf "$SCRIPT_DIR/backend/unsloth_compiled_cache"
rm -rf "$SCRIPT_DIR/tmp/unsloth_compiled_cache"

# ── Detect Colab (like unsloth does) ──
IS_COLAB=false
keynames=$'\n'$(printenv | cut -d= -f1)
if [[ "$keynames" == *$'\nCOLAB_'* ]]; then
    IS_COLAB=true
fi

# ── Detect whether frontend needs building ──
# Skip if dist/ exists AND no tracked input is newer than dist/.
# Checks top-level config/entry files and src/, public/ recursively.
# This handles: PyPI installs (dist/ bundled), repeat runs (no changes),
# and upgrades/pulls (source newer than dist/ triggers rebuild).
_NEED_FRONTEND_BUILD=true
if [ -d "$SCRIPT_DIR/frontend/dist" ]; then
    # Check all top-level files (package.json, bun.lock, vite.config.ts, index.html, etc.)
    _changed=$(find "$SCRIPT_DIR/frontend" -maxdepth 1 -type f \
        -newer "$SCRIPT_DIR/frontend/dist" -print -quit 2>/dev/null)
    # Check src/ and public/ recursively (|| true guards against set -e when dirs are missing)
    if [ -z "$_changed" ]; then
        _changed=$(find "$SCRIPT_DIR/frontend/src" "$SCRIPT_DIR/frontend/public" \
            -type f -newer "$SCRIPT_DIR/frontend/dist" -print -quit 2>/dev/null) || true
    fi
    if [ -z "$_changed" ]; then
        _NEED_FRONTEND_BUILD=false
    fi
fi
if [ "$_NEED_FRONTEND_BUILD" = false ]; then
    echo "✅ Frontend already built and up to date -- skipping Node/npm check."
else
NEED_NODE=true
if command -v node &>/dev/null && command -v npm &>/dev/null; then
    NODE_MAJOR=$(node -v | sed 's/v//' | cut -d. -f1)
    NPM_MAJOR=$(npm -v | cut -d. -f1)
    if [ "$NODE_MAJOR" -ge 20 ] && [ "$NPM_MAJOR" -ge 11 ]; then
        echo "✅ Node $(node -v) and npm $(npm -v) already meet requirements. Skipping nvm install."
        NEED_NODE=false
    else
        if [ "$IS_COLAB" = true ]; then
            echo "✅ Node $(node -v) and npm $(npm -v) detected in Colab."
            # In Colab, just upgrade npm directly - nvm doesn't work well
            if [ "$NPM_MAJOR" -lt 11 ]; then
                echo "   Upgrading npm to latest..."
                npm install -g npm@latest > /dev/null 2>&1
            fi
            NEED_NODE=false
        else
            echo "⚠️  Node $(node -v) / npm $(npm -v) too old. Installing via nvm..."
        fi
    fi
else
    echo "⚠️  Node/npm not found. Installing via nvm..."
fi

if [ "$NEED_NODE" = true ]; then
    # ── 2. Install nvm ──
    export NODE_OPTIONS=--dns-result-order=ipv4first # or else fails on colab.
    echo "Installing nvm..."
    curl -so- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash > /dev/null 2>&1

    # Load nvm (source ~/.bashrc won't work inside a script)
    export NVM_DIR="$HOME/.nvm"
    set +u
    [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"

    # ── Fix npmrc conflict with nvm ──
    # System npm (apt, conda, etc.) may have written `prefix` or `globalconfig`
    # to ~/.npmrc, which is incompatible with nvm and causes "nvm use" to fail
    # with: "has a `globalconfig` and/or a `prefix` setting, which are
    # incompatible with nvm."
    if [ -f "$HOME/.npmrc" ]; then
        if grep -qE '^\s*(prefix|globalconfig)\s*=' "$HOME/.npmrc"; then
            echo "   Removing incompatible prefix/globalconfig from ~/.npmrc for nvm..."
            sed -i.bak '/^\s*\(prefix\|globalconfig\)\s*=/d' "$HOME/.npmrc"
        fi
    fi

    # ── 3. Install Node LTS ──
    echo "Installing Node LTS..."
    run_quiet "nvm install" nvm install --lts
    nvm use --lts > /dev/null 2>&1
    set -u
    # ── 4. Verify versions ──
    NODE_MAJOR=$(node -v | sed 's/v//' | cut -d. -f1)
    NPM_MAJOR=$(npm -v | cut -d. -f1)

    if [ "$NODE_MAJOR" -lt 20 ]; then
        echo "❌ ERROR: Node version must be >= 20 (got $(node -v))"
        exit 1
    fi
    if [ "$NPM_MAJOR" -lt 11 ]; then
        echo "⚠️  npm version is $(npm -v), expected >= 11. Updating..."
        run_quiet "npm update" npm install -g npm@latest
    fi
fi

echo "✅ Node $(node -v) | npm $(npm -v)"

# ── 5. Build frontend ──
cd "$SCRIPT_DIR/frontend"

# Tailwind v4's oxide scanner respects .gitignore in parent directories.
# Python venvs create a .gitignore with "*" (ignore everything), which
# prevents Tailwind from scanning .tsx source files for class names.
# Temporarily hide any such .gitignore during the build, then restore it.
_HIDDEN_GITIGNORES=()
_dir="$(pwd)"
while [ "$_dir" != "/" ]; do
    _dir="$(dirname "$_dir")"
    if [ -f "$_dir/.gitignore" ] && grep -qx '\*' "$_dir/.gitignore" 2>/dev/null; then
        mv "$_dir/.gitignore" "$_dir/.gitignore._twbuild"
        _HIDDEN_GITIGNORES+=("$_dir/.gitignore")
    fi
done

_restore_gitignores() {
    for _gi in "${_HIDDEN_GITIGNORES[@]+"${_HIDDEN_GITIGNORES[@]}"}"; do
        mv "${_gi}._twbuild" "$_gi" 2>/dev/null || true
    done
}
trap _restore_gitignores EXIT

run_quiet "npm install" npm install
run_quiet "npm run build" npm run build

_restore_gitignores
trap - EXIT

# Validate CSS output -- catch truncated Tailwind builds
_MAX_CSS=$(find "$SCRIPT_DIR/frontend/dist/assets" -name '*.css' -exec wc -c {} + 2>/dev/null | sort -n | tail -1 | awk '{print $1}')
if [ -z "$_MAX_CSS" ]; then
    echo "⚠️  WARNING: No CSS files were emitted. The frontend build may have failed."
elif [ "$_MAX_CSS" -lt 100000 ]; then
    echo "⚠️  WARNING: Largest CSS file is only $((_MAX_CSS / 1024))KB (expected >100KB)."
    echo "   Tailwind may not have scanned all source files. Check for .gitignore interference."
fi

cd "$SCRIPT_DIR"
echo "✅ Frontend built to frontend/dist"

fi  # end frontend build check

# ── oxc-validator runtime (needs npm -- skip if not available) ──
if [ -d "$SCRIPT_DIR/backend/core/data_recipe/oxc-validator" ] && command -v npm &>/dev/null; then
    cd "$SCRIPT_DIR/backend/core/data_recipe/oxc-validator"
    run_quiet "npm install (oxc validator runtime)" npm install
    cd "$SCRIPT_DIR"
fi

# ── 6. Python venv + deps ──

# ── 6a. Discover best Python >= 3.11 and < 3.14 (i.e. 3.11.x, 3.12.x, or 3.13.x) ──
MIN_PY_MINOR=11   # minimum minor version (>= 3.11)
MAX_PY_MINOR=13   # maximum minor version (< 3.14)
BEST_PY=""
BEST_MAJOR=0
BEST_MINOR=0

# Collect candidate python3 binaries (python3, python3.9, python3.10, …)
for candidate in $(compgen -c python3 2>/dev/null | grep -E '^python3(\.[0-9]+)?$' | sort -u); do
    if ! command -v "$candidate" &>/dev/null; then
        continue
    fi
    # Get version string, e.g. "Python 3.12.5"
    ver_str=$("$candidate" --version 2>&1) || continue
    ver_str=$(echo "$ver_str" | awk '{print $2}')
    py_major=$(echo "$ver_str" | cut -d. -f1)
    py_minor=$(echo "$ver_str" | cut -d. -f2)

    # Skip anything that isn't Python 3
    if [ "$py_major" -ne 3 ] 2>/dev/null; then
        continue
    fi

    # Skip versions below 3.12 (require > 3.11)
    if [ "$py_minor" -lt "$MIN_PY_MINOR" ] 2>/dev/null; then
        continue
    fi

    # Skip versions above 3.13 (require < 3.14)
    if [ "$py_minor" -gt "$MAX_PY_MINOR" ] 2>/dev/null; then
        continue
    fi

    # Keep the highest qualifying version
    if [ "$py_minor" -gt "$BEST_MINOR" ]; then
        BEST_PY="$candidate"
        BEST_MAJOR="$py_major"
        BEST_MINOR="$py_minor"
    fi
done
echo "finished finding best python"
if [ -z "$BEST_PY" ]; then
    echo "❌ ERROR: No Python version between 3.${MIN_PY_MINOR} and 3.${MAX_PY_MINOR} found on this system."
    echo "   Detected Python 3 installations:"
    for candidate in $(compgen -c python3 2>/dev/null | grep -E '^python3(\.[0-9]+)?$' | sort -u); do
        if command -v "$candidate" &>/dev/null; then
            echo "     - $candidate ($($candidate --version 2>&1))"
        fi
    done
    echo ""
    echo "   Please install Python 3.${MIN_PY_MINOR} or 3.${MAX_PY_MINOR}."
    echo "   For example:  sudo apt install python3.12 python3.12-venv"
    exit 1
fi

BEST_VER=$("$BEST_PY" --version 2>&1 | awk '{print $2}')
echo "✅ Using $BEST_PY ($BEST_VER) — compatible (3.${MIN_PY_MINOR}.x – 3.${MAX_PY_MINOR}.x)"

REQ_ROOT="$SCRIPT_DIR/backend/requirements"
SINGLE_ENV_CONSTRAINTS="$REQ_ROOT/single-env/constraints.txt"
SINGLE_ENV_DATA_DESIGNER="$REQ_ROOT/single-env/data-designer.txt"
SINGLE_ENV_DATA_DESIGNER_DEPS="$REQ_ROOT/single-env/data-designer-deps.txt"
SINGLE_ENV_PATCH="$REQ_ROOT/single-env/patch_metadata.py"

install_python_stack() {
    python "$SCRIPT_DIR/install_python_stack.py"
}

# Create venv under ~/.unsloth/studio/ (shared location, not in repo).
# All platforms (including Colab) use the same isolated venv so that
# studio dependencies are never installed into the system Python.
STUDIO_HOME="$HOME/.unsloth/studio"
VENV_DIR="$STUDIO_HOME/.venv"
VENV_T5_DIR="$STUDIO_HOME/.venv_t5"
mkdir -p "$STUDIO_HOME"

# Clean up legacy in-repo venvs if they exist
[ -d "$REPO_ROOT/.venv" ] && rm -rf "$REPO_ROOT/.venv"
[ -d "$REPO_ROOT/.venv_overlay" ] && rm -rf "$REPO_ROOT/.venv_overlay"
[ -d "$REPO_ROOT/.venv_t5" ] && rm -rf "$REPO_ROOT/.venv_t5"

rm -rf "$VENV_DIR"
rm -rf "$VENV_T5_DIR"
# Try creating venv with pip; fall back to --without-pip + bootstrap
# (some environments like Colab have broken ensurepip)
if ! "$BEST_PY" -m venv "$VENV_DIR" 2>/dev/null; then
    "$BEST_PY" -m venv --without-pip "$VENV_DIR"
    source "$VENV_DIR/bin/activate"
    curl -sS https://bootstrap.pypa.io/get-pip.py | python > /dev/null
else
    source "$VENV_DIR/bin/activate"
fi

# ── Ensure uv is available (much faster than pip) ──
USE_UV=false
if command -v uv &>/dev/null; then
    USE_UV=true
elif curl -LsSf https://astral.sh/uv/install.sh | sh > /dev/null 2>&1; then
    export PATH="$HOME/.local/bin:$PATH"
    command -v uv &>/dev/null && USE_UV=true
fi

# Helper: install a package, preferring uv with pip fallback
fast_install() {
    if [ "$USE_UV" = true ]; then
        uv pip install --python "$(command -v python)" "$@" && return 0
    fi
    python -m pip install "$@"
}

cd "$SCRIPT_DIR"
install_python_stack

# ── 6b. Pre-install transformers 5.x into .venv_t5/ ──
# Models like GLM-4.7-Flash need transformers>=5.3.0. Instead of pip-installing
# at runtime (slow, ~10-15s), we pre-install into a separate directory.
# The training subprocess just prepends .venv_t5/ to sys.path -- instant switch.
echo ""
echo "   Pre-installing transformers 5.x for newer model support..."
mkdir -p "$VENV_T5_DIR"
run_quiet "install transformers 5.x" fast_install --target "$VENV_T5_DIR" --no-deps "transformers==5.3.0"
run_quiet "install huggingface_hub for t5" fast_install --target "$VENV_T5_DIR" --no-deps "huggingface_hub==1.7.1"
run_quiet "install hf_xet for t5" fast_install --target "$VENV_T5_DIR" --no-deps "hf_xet==1.4.2"
# tiktoken is needed by Qwen-family tokenizers. Install with deps since
# regex/requests may be missing on Windows.
run_quiet "install tiktoken for t5" fast_install --target "$VENV_T5_DIR" "tiktoken"
echo "✅ Transformers 5.x pre-installed to $VENV_T5_DIR/"

# ── 7. WSL: pre-install GGUF build dependencies ──
# On WSL, sudo requires a password and can't be entered during GGUF export
# (runs in a non-interactive subprocess). Install build deps here instead.
if grep -qi microsoft /proc/version 2>/dev/null; then
    echo ""
    echo "⚠️  WSL detected -- installing build dependencies for GGUF export..."
    _GGUF_DEPS="pciutils build-essential cmake curl git libcurl4-openssl-dev"

    # Try without sudo first (works when already root)
    apt-get update -y >/dev/null 2>&1 || true
    apt-get install -y $_GGUF_DEPS >/dev/null 2>&1 || true

    # Check which packages are still missing
    _STILL_MISSING=""
    for _pkg in $_GGUF_DEPS; do
        case "$_pkg" in
            build-essential) command -v gcc >/dev/null 2>&1 || _STILL_MISSING="$_STILL_MISSING $_pkg" ;;
            pciutils) command -v lspci >/dev/null 2>&1 || _STILL_MISSING="$_STILL_MISSING $_pkg" ;;
            libcurl4-openssl-dev) dpkg -s "$_pkg" >/dev/null 2>&1 || _STILL_MISSING="$_STILL_MISSING $_pkg" ;;
            *) command -v "$_pkg" >/dev/null 2>&1 || _STILL_MISSING="$_STILL_MISSING $_pkg" ;;
        esac
    done
    _STILL_MISSING=$(echo "$_STILL_MISSING" | sed 's/^ *//')

    if [ -z "$_STILL_MISSING" ]; then
        echo "✅ GGUF build dependencies installed"
    elif command -v sudo >/dev/null 2>&1; then
        echo ""
        echo "   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
        echo "   WARNING: We require sudo elevated permissions to install:"
        echo "   $_STILL_MISSING"
        echo "   If you accept, we'll run sudo now, and it'll prompt your password."
        echo "   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
        echo ""
        printf "   Accept? [Y/n] "
        if [ -r /dev/tty ]; then
            read -r REPLY </dev/tty || REPLY="y"
        else
            REPLY="y"
        fi
        case "$REPLY" in
            [nN]*)
                echo ""
                echo "   Please install these packages first, then re-run Unsloth Studio setup:"
                echo "   sudo apt-get update -y && sudo apt-get install -y $_STILL_MISSING"
                _SKIP_GGUF_BUILD=true
                ;;
            *)
                sudo apt-get update -y
                sudo apt-get install -y $_STILL_MISSING
                echo "✅ GGUF build dependencies installed"
                ;;
        esac
    else
        echo "   sudo is not available on this system."
        echo "   Please install as root, then re-run setup:"
        echo "   apt-get install -y $_STILL_MISSING"
        _SKIP_GGUF_BUILD=true
    fi
fi

# ── 8. Build llama.cpp binaries for GGUF inference + export ──
# Builds at ~/.unsloth/llama.cpp — a single shared location under the user's
# home directory. This is used by both the inference server and the GGUF
# export pipeline (unsloth-zoo).
#   - llama-server: for GGUF model inference
#   - llama-quantize: for GGUF export quantization (symlinked to root for check_llama_cpp())
UNSLOTH_HOME="$HOME/.unsloth"
mkdir -p "$UNSLOTH_HOME"
LLAMA_CPP_DIR="$UNSLOTH_HOME/llama.cpp"
LLAMA_SERVER_BIN="$LLAMA_CPP_DIR/build/bin/llama-server"
if [ "${_SKIP_GGUF_BUILD:-}" = true ]; then
    echo ""
    echo "Skipping llama-server build (missing dependencies)"
    echo "   Install the missing packages and re-run setup to enable GGUF inference."
else
rm -rf "$LLAMA_CPP_DIR"
{
    # Check prerequisites
    if ! command -v cmake &>/dev/null; then
        echo ""
        echo "⚠️  cmake not found — skipping llama-server build (GGUF inference won't be available)"
        echo "   Install cmake and re-run setup.sh to enable GGUF inference."
    elif ! command -v git &>/dev/null; then
        echo ""
        echo "⚠️  git not found — skipping llama-server build (GGUF inference won't be available)"
    else
        echo ""
        echo "Building llama-server for GGUF inference..."

        BUILD_OK=true
        run_quiet "clone llama.cpp" git clone --depth 1 https://github.com/ggml-org/llama.cpp.git "$LLAMA_CPP_DIR" || BUILD_OK=false

        if [ "$BUILD_OK" = true ]; then
            # Skip tests/examples we don't need (faster build)
            CMAKE_ARGS="-DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_NATIVE=ON"

            # Use ccache if available (dramatically faster rebuilds)
            if command -v ccache &>/dev/null; then
                CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache"
                echo "   Using ccache for faster compilation"
            fi

            # Detect CUDA: check nvcc on PATH, then common install locations
            NVCC_PATH=""
            if command -v nvcc &>/dev/null; then
                NVCC_PATH="$(command -v nvcc)"
            elif [ -x /usr/local/cuda/bin/nvcc ]; then
                NVCC_PATH="/usr/local/cuda/bin/nvcc"
                export PATH="/usr/local/cuda/bin:$PATH"
            elif ls /usr/local/cuda-*/bin/nvcc &>/dev/null 2>&1; then
                # Pick the newest cuda-XX.X directory
                NVCC_PATH="$(ls -d /usr/local/cuda-*/bin/nvcc 2>/dev/null | sort -V | tail -1)"
                export PATH="$(dirname "$NVCC_PATH"):$PATH"
            fi

            if [ -n "$NVCC_PATH" ]; then
                echo "   Building with CUDA support (nvcc: $NVCC_PATH)..."
                CMAKE_ARGS="$CMAKE_ARGS -DGGML_CUDA=ON"

                # Detect GPU compute capability and limit CUDA architectures
                # Without this, cmake builds for ALL default archs (very slow)
                CUDA_ARCHS=""
                if command -v nvidia-smi &>/dev/null; then
                    # Read all GPUs, deduplicate (handles mixed-GPU hosts)
                    _raw_caps=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null || true)
                    while IFS= read -r _cap; do
                        _cap=$(echo "$_cap" | tr -d '[:space:]')
                        if [[ "$_cap" =~ ^([0-9]+)\.([0-9]+)$ ]]; then
                            _arch="${BASH_REMATCH[1]}${BASH_REMATCH[2]}"
                            # Append if not already present
                            case ";$CUDA_ARCHS;" in
                                *";$_arch;"*) ;;
                                *) CUDA_ARCHS="${CUDA_ARCHS:+$CUDA_ARCHS;}$_arch" ;;
                            esac
                        fi
                    done <<< "$_raw_caps"
                fi

                if [ -n "$CUDA_ARCHS" ]; then
                    echo "   GPU compute capabilities: ${CUDA_ARCHS//;/, } -- limiting build to detected archs"
                    CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHS}"
                else
                    echo "   Could not detect GPU arch -- building for all default CUDA architectures (slower)"
                fi

                # Multi-threaded nvcc compilation (uses all CPU cores per .cu file)
                CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_CUDA_FLAGS=--threads=0"
            elif [ -d /usr/local/cuda ] || nvidia-smi &>/dev/null; then
                echo "   CUDA driver detected but nvcc not found — building CPU-only"
                echo "   To enable GPU: install cuda-toolkit or add nvcc to PATH"
            else
                echo "   Building CPU-only (no CUDA detected)..."
            fi

            NCPU=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)

            # Use Ninja if available (faster parallel builds than Make)
            CMAKE_GENERATOR_ARGS=""
            if command -v ninja &>/dev/null; then
                CMAKE_GENERATOR_ARGS="-G Ninja"
            fi

            run_quiet "cmake llama.cpp" cmake $CMAKE_GENERATOR_ARGS -S "$LLAMA_CPP_DIR" -B "$LLAMA_CPP_DIR/build" $CMAKE_ARGS || BUILD_OK=false
        fi

        if [ "$BUILD_OK" = true ]; then
            run_quiet "build llama-server" cmake --build "$LLAMA_CPP_DIR/build" --config Release --target llama-server -j"$NCPU" || BUILD_OK=false
        fi

        # Also build llama-quantize (needed by unsloth-zoo's GGUF export pipeline)
        if [ "$BUILD_OK" = true ]; then
            run_quiet "build llama-quantize" cmake --build "$LLAMA_CPP_DIR/build" --config Release --target llama-quantize -j"$NCPU" || true
            # Symlink to llama.cpp root — check_llama_cpp() looks for the binary there
            QUANTIZE_BIN="$LLAMA_CPP_DIR/build/bin/llama-quantize"
            if [ -f "$QUANTIZE_BIN" ]; then
                ln -sf build/bin/llama-quantize "$LLAMA_CPP_DIR/llama-quantize"
            fi
        fi

        if [ "$BUILD_OK" = true ]; then
            if [ -f "$LLAMA_SERVER_BIN" ]; then
                echo "✅ llama-server built at $LLAMA_SERVER_BIN"
            else
                echo "⚠️  llama-server binary not found after build — GGUF inference won't be available"
            fi
            if [ -f "$LLAMA_CPP_DIR/llama-quantize" ]; then
                echo "✅ llama-quantize available for GGUF export"
            fi
        else
            echo "⚠️  llama-server build failed — GGUF inference won't be available, but everything else works"
        fi
    fi
}
fi  # end _SKIP_GGUF_BUILD check

echo ""
if [ "$IS_COLAB" = true ]; then
    echo "╔══════════════════════════════════════╗"
    echo "║           Setup Complete!            ║"
    echo "╠══════════════════════════════════════╣"
    echo "║ Unsloth Studio is ready to start     ║"
    echo "║ in your Colab notebook!              ║"
    echo "║                                      ║"
    echo "║ from colab import start              ║"
    echo "║ start()                              ║"
    echo "╚══════════════════════════════════════╝"
else
    echo "╔══════════════════════════════════════╗"
    echo "║           Setup Complete!            ║"
    echo "╠══════════════════════════════════════╣"
    echo "║ Launch with:                         ║"
    echo "║                                      ║"
    echo "║ unsloth studio -H 0.0.0.0 -p 8888    ║"
    echo "╚══════════════════════════════════════╝"
fi


================================================
FILE: tests/__init__.py
================================================


================================================
FILE: tests/qlora/README.md
================================================
## QLoRA Train and Merge Tests

### Overview
Tests that performing QLoRA training and merging weights to 16-bits post-training maintains same behavior as trained model.

- `test_unsloth_qlora_train_and_merge.py`: Test Unsloth QLoRA train and merge using `FastLanguageModel.from_pretrained`, `FastLanguageModel.get_peft_model`, and `FastLanguageModel.save_pretrained_merged` apis
- `test_hf_qlora_train_and_merge.py`: Test Hugging Face QLoRA train and merge using `from_pretrained`, `get_peft_model`, and `merge_and_unload` apis.
   - Demonstrates that `peft`'s `merge_and_unload` results in loss of accuracy as it requantizes the base layer after merging adapter weights so that the model still contains `Linear4Bit` layers post merging.
   - I (@jeromeku) implemented a custom merge function that replaces all `LoraLayers` with `Linear` layers whose weights are the dequantized base layer weights with adapter weights merged (compute done in fp32, cast to original dtype after merging), roughly equivalent to `FastLanguageModel.save_pretrained_merged`.

### Usage
Run unsloth test:
```bash
python tests/qlora/test_unsloth_qlora_train_and_merge.py
```
Run huggingface test:
```bash
python tests/qlora/test_hf_qlora_train_and_merge.py
```

### Details
The tests train a QLoRA model on a single prompt dataset
```
QUESTION = "What day was I born?"
ANSWER = "January 1, 2058"
USER_MESSAGE = {"role": "user", "content": QUESTION}
ASSISTANT_MESSAGE = {"role": "assistant", "content": ANSWER}
```

Given that the answer is impossible to answer accurately without finetuning, we can only expect the model to answer the question correctly if the model has been trained on the question.

To check this behavior, we check the model's response to the question before and after training and after merging, checking that the model's response contains the answer after training and merging but not before training.

### Results

For the unsloth test, the model's behavior is as expected: 
- before training, the model's response does not contain the answer
- after training, the model's response contains the answer
- after merging, the model's response contains the answer

For the huggingface test, the model's behavior is as expected:
- before training, the model's response does not contain the answer
- after training, the model's response contains the answer
- after using peft's `merge_and_unload`, the model's response does not contain the answer
- after using my custom merge function, the model's response contains the answer

The scripts should output training params, training logs, as well as model responses before and after training and after merging (only prints model responses if answer is not contained in response).

================================================
FILE: tests/qlora/test_hf_qlora_train_and_merge.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# ruff: noqa
import sys
from pathlib import Path

REPO_ROOT = Path(__file__).parents[2]
sys.path.append(str(REPO_ROOT))

import itertools
from copy import deepcopy

import torch
from datasets import Dataset
from trl import SFTConfig
from tests.utils import header_footer_context
from tests.utils.data_utils import (
    ANSWER,
    DEFAULT_MESSAGES,
    USER_MESSAGE,
    check_responses,
    create_dataset,
    describe_peft_weights,
)
from tests.utils.hf_utils import (
    convert_lora_to_linear,
    fix_llama3_tokenizer,
    get_peft_config,
    sample_responses,
    setup_model,
    setup_tokenizer,
    setup_trainer,
)

if __name__ == "__main__":
    model_name = "meta-llama/Llama-3.2-1B-Instruct"
    dtype = torch.bfloat16
    max_steps = 100
    num_examples = 1000
    lora_rank = 64
    output_dir = "sft_test"
    seed = 42
    batch_size = 5
    num_generations = 5
    tokenizer = setup_tokenizer(model_name, fixup_funcs = [fix_llama3_tokenizer])
    temperature = 0.8
    max_new_tokens = 20

    peft_config = get_peft_config(lora_rank = lora_rank, target_modules = "all-linear")
    model = setup_model(model_name, quantize = True, dtype = dtype, peft_config = peft_config)

    prompt = tokenizer.apply_chat_template(
        [USER_MESSAGE], tokenize = False, add_generation_prompt = True
    )
    with header_footer_context("Test Prompt and Answer"):
        print(f"Test Prompt:\n{prompt}\nExpected Answer:\n{ANSWER}")

    dataset: Dataset = create_dataset(
        tokenizer, num_examples = num_examples, messages = DEFAULT_MESSAGES
    )
    with header_footer_context("Dataset"):
        print(f"Dataset: {next(iter(dataset))}")

    training_args = SFTConfig(
        output_dir = output_dir,
        max_steps = max_steps,
        per_device_train_batch_size = batch_size,
        log_level = "info",
        report_to = "none",
        num_train_epochs = 1,
        logging_steps = 1,
        seed = seed,
        bf16 = dtype == torch.bfloat16,
        fp16 = dtype == torch.float16,
        save_strategy = "no",
    )

    with header_footer_context("Train Args"):
        print(training_args)
        print(peft_config)

    trainer = setup_trainer(
        model, tokenizer, dataset, training_args, peft_config = peft_config
    )

    with header_footer_context("Model"):
        print(type(model.model))

    generation_args = {
        "num_generations": num_generations,
        "max_new_tokens": max_new_tokens,
        "temperature": temperature,
        "skip_special_tokens": False,
        "dtype": dtype,
    }
    responses = sample_responses(
        model,
        tokenizer,
        prompt = prompt,
        **generation_args,
    )
    with header_footer_context("Responses before training"):
        check_responses(responses, answer = ANSWER, prompt = prompt)

    with header_footer_context("Peft Weights before training"):
        for name, stats in itertools.islice(describe_peft_weights(model), 2):
            print(f"{name}:\n{stats}")

    output = trainer.train()
    with header_footer_context("Peft Weights after training"):
        for name, stats in itertools.islice(describe_peft_weights(model), 2):
            print(f"{name}:\n{stats}")

    with header_footer_context("Trainer Output"):
        print(output)

    responses = sample_responses(
        model,
        tokenizer,
        prompt = prompt,
        **generation_args,
    )
    with header_footer_context("Responses after training"):
        check_responses(responses, answer = ANSWER, prompt = prompt)

    model_copy = deepcopy(model)

    merged_model = convert_lora_to_linear(model)

    responses = sample_responses(
        merged_model,
        tokenizer,
        prompt = prompt,
        **generation_args,
    )
    with header_footer_context("Responses after custom merging to 16bit"):
        check_responses(responses, answer = ANSWER, prompt = prompt)

    merged_model_peft = model_copy.merge_and_unload()
    responses = sample_responses(
        merged_model_peft,
        tokenizer,
        prompt = prompt,
        **generation_args,
    )
    with header_footer_context("Responses after peft merge_and_unload"):
        check_responses(responses, answer = ANSWER, prompt = prompt)


================================================
FILE: tests/qlora/test_unsloth_qlora_train_and_merge.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# ruff: noqa
import sys
from pathlib import Path

REPO_ROOT = Path(__file__).parents[2]
sys.path.append(str(REPO_ROOT))

import itertools
from unsloth import FastLanguageModel

import torch
from datasets import Dataset
from trl import SFTConfig
from tests.utils import header_footer_context
from tests.utils.data_utils import (
    DEFAULT_MESSAGES,
    USER_MESSAGE,
    ANSWER,
    create_dataset,
    describe_peft_weights,
    check_responses,
)
from tests.utils.hf_utils import (
    sample_responses,
    setup_trainer,
)


def get_unsloth_model_and_tokenizer(
    model_name: str,
    max_seq_length: int,
    load_in_4bit: bool,
    fast_inference: bool,
    max_lora_rank: int = None,
    gpu_memory_utilization: float = 0.5,
    dtype: torch.dtype = torch.bfloat16,
):
    return FastLanguageModel.from_pretrained(
        model_name = model_name,
        max_seq_length = max_seq_length,
        load_in_4bit = load_in_4bit,
        fast_inference = fast_inference,
        max_lora_rank = max_lora_rank,
        gpu_memory_utilization = gpu_memory_utilization,
        dtype = dtype,
    )


def get_unsloth_peft_model(
    model,
    lora_rank: int,
    target_modules: list[str] = "all-linear",
    use_gradient_checkpointing: str = False,
    random_state: int = 42,
):
    return FastLanguageModel.get_peft_model(
        model,
        r = lora_rank,
        target_modules = target_modules,
        lora_alpha = lora_rank,
        use_gradient_checkpointing = use_gradient_checkpointing,
        random_state = random_state,
    )


if __name__ == "__main__":
    model_name = "meta-llama/Llama-3.2-1B-Instruct"
    dtype = torch.bfloat16
    max_steps = 100
    num_examples = 1000
    lora_rank = 64
    output_dir = "sft_test"
    seed = 42
    batch_size = 5
    num_generations = 5
    target_modules = [
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ]
    gradient_checkpointing = False
    unsloth_merged_path = "unsloth_merged_16bit"

    model, tokenizer = get_unsloth_model_and_tokenizer(
        model_name,
        max_seq_length = 512,
        load_in_4bit = True,
        fast_inference = False,
        max_lora_rank = lora_rank,
        dtype = dtype,
    )
    temperature = 0.8
    max_new_tokens = 20

    model = get_unsloth_peft_model(
        model,
        lora_rank = lora_rank,
        target_modules = target_modules,
        use_gradient_checkpointing = gradient_checkpointing,
        random_state = seed,
    )

    prompt = tokenizer.apply_chat_template(
        [USER_MESSAGE], tokenize = False, add_generation_prompt = True
    )

    with header_footer_context("Test Prompt and Answer"):
        print(f"Test Prompt:\n{prompt}\nExpected Answer:\n{ANSWER}")

    dataset: Dataset = create_dataset(
        tokenizer, num_examples = num_examples, messages = DEFAULT_MESSAGES
    )
    with header_footer_context("Dataset"):
        print(f"Dataset: {next(iter(dataset))}")

    training_args = SFTConfig(
        output_dir = output_dir,
        max_steps = max_steps,
        per_device_train_batch_size = batch_size,
        log_level = "info",
        report_to = "none",
        num_train_epochs = 1,
        logging_steps = 1,
        seed = seed,
        bf16 = dtype == torch.bfloat16,
        fp16 = dtype == torch.float16,
        save_strategy = "no",
    )

    with header_footer_context("Train Args"):
        print(training_args)

    trainer = setup_trainer(model, tokenizer, dataset, training_args)

    with header_footer_context("Model"):
        print(type(model.model))

    generation_args = {
        "num_generations": num_generations,
        "max_new_tokens": max_new_tokens,
        "temperature": temperature,
        "skip_special_tokens": False,
        "dtype": dtype,
    }
    responses = sample_responses(
        model,
        tokenizer,
        prompt = prompt,
        **generation_args,
    )
    with header_footer_context("Responses before training"):
        check_responses(responses, answer = ANSWER, prompt = prompt)
    with header_footer_context("Peft Weights before training"):
        for name, stats in itertools.islice(describe_peft_weights(model), 2):
            print(f"{name}:\n{stats}")

    output = trainer.train()
    with header_footer_context("Peft Weights after training"):
        for name, stats in itertools.islice(describe_peft_weights(model), 2):
            print(f"{name}:\n{stats}")

    with header_footer_context("Trainer Output"):
        print(output)

    responses = sample_responses(
        model,
        tokenizer,
        prompt = prompt,
        **generation_args,
    )
    with header_footer_context("Responses after training"):
        check_responses(responses, answer = ANSWER, prompt = prompt)

    model.save_pretrained_merged(
        unsloth_merged_path,
        tokenizer,
        save_method = "merged_16bit",
    )
    merged_model_unsloth, tokenizer = get_unsloth_model_and_tokenizer(
        unsloth_merged_path,
        max_seq_length = 512,
        load_in_4bit = False,
        fast_inference = False,
        dtype = dtype,
    )
    responses = sample_responses(
        merged_model_unsloth,
        tokenizer,
        prompt = prompt,
        **generation_args,
    )
    with header_footer_context("Responses after unsloth merge to 16bit"):
        check_responses(responses, answer = ANSWER, prompt = prompt)


================================================
FILE: tests/saving/gpt-oss-merge/run_test.sh
================================================
#!/bin/bash
set -e

echo "================================================================"
echo "🚀 STEP 1: Running the training and merging script..."
echo "================================================================"
python train_and_merge.py

echo ""
echo "================================================================"
echo "✅ STEP 2: Training complete. Running the inference script..."
echo "================================================================"
python test_merged_model.py

echo ""
echo "================================================================"
echo "🎉 All steps completed successfully!"
echo "================================================================"


================================================
FILE: tests/saving/gpt-oss-merge/test_merged_model.py
================================================
# inference_on_merged.py
from unsloth import FastLanguageModel
from transformers import TextStreamer
import torch
import gc
import os
import shutil


def safe_remove_directory(path):
    try:
        if os.path.exists(path) and os.path.isdir(path):
            shutil.rmtree(path)
            return True
        else:
            print(f"Path {path} is not a valid directory")
            return False
    except Exception as e:
        print(f"Failed to remove directory {path}: {e}")
        return False


print("🔥 Loading the 16-bit merged model from disk...")
merged_model, merged_tokenizer = FastLanguageModel.from_pretrained(
    model_name = "./gpt-oss-finetuned-merged",
    max_seq_length = 1024,
    load_in_4bit = True,
    load_in_8bit = False,
)
print("✅ Merged model loaded successfully.")

# --- Run Inference ---
print("\n🚀 Running inference...")
messages = [
    {"role": "user", "content": "Solve x^5 + 3x^4 - 10 = 3."},
]
inputs = merged_tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,
    return_tensors = "pt",
    return_dict = True,
    reasoning_effort = "low",  # **NEW!** Set reasoning effort to low, medium or high
).to(merged_model.device)

_ = merged_model.generate(
    **inputs, max_new_tokens = 512, streamer = TextStreamer(merged_tokenizer)
)
print("\n✅ Inference complete.")

# --- Final Cleanup ---
print("\n🧹 Cleaning up merged model directory and cache...")
del merged_model, merged_tokenizer
torch.cuda.empty_cache()
gc.collect()

safe_remove_directory("./gpt-oss-finetuned-merged")
safe_remove_directory(
    "./unsloth_compiled_cache"
)  # Clean up cache created by this process
print("✅ Final cleanup complete. Exiting inference script.")


================================================
FILE: tests/saving/gpt-oss-merge/train_and_merge.py
================================================
# train_and_merge.py
from unsloth import FastLanguageModel
from trl import SFTTrainer, SFTConfig
from datasets import load_dataset
import torch
import gc
import os
import shutil


def safe_remove_directory(path):
    try:
        if os.path.exists(path) and os.path.isdir(path):
            shutil.rmtree(path)
            return True
        else:
            print(f"Path {path} is not a valid directory")
            return False
    except Exception as e:
        print(f"Failed to remove directory {path}: {e}")
        return False


# This tokenizer will be used by the mapping function
tokenizer = None


def formatting_prompts_func(examples):
    convos = examples["messages"]
    texts = [
        tokenizer.apply_chat_template(
            convo, tokenize = False, add_generation_prompt = False
        )
        for convo in convos
    ]
    return {"text": texts}


# --- Load 4-bit Model and Train ---
print("Loading 4-bit Mxfp4 gpt-oss model for training...")
max_seq_length = 1024
model, tokenizer = FastLanguageModel.from_pretrained(
    "unsloth/gpt-oss-20b", max_seq_length = max_seq_length, load_in_4bit = True
)

dataset = load_dataset("HuggingFaceH4/Multilingual-Thinking", split = "train[:50]").map(
    formatting_prompts_func, batched = True
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 8,
    target_modules = [
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha = 16,
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
)

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    args = SFTConfig(
        per_device_train_batch_size = 1,
        gradient_accumulation_steps = 4,
        max_steps = 10,
        learning_rate = 2e-4,
        output_dir = "outputs",
        report_to = "none",
    ),
)

print("Starting fine-tuning...")
trainer.train()
print("Fine-tuning complete.")

# --- Merge and Save ---
print("\n💾 Merging and saving the 16-bit model to './gpt-oss-finetuned-merged'...")
model.save_pretrained_merged(
    save_directory = "./gpt-oss-finetuned-merged", tokenizer = tokenizer
)
print("✅ Model merged and saved.")

# --- Cleanup ---
print("\n🧹 Cleaning up training artifacts...")
del model, trainer, tokenizer, dataset
torch.cuda.empty_cache()
gc.collect()

safe_remove_directory("./outputs")
safe_remove_directory(
    "./unsloth_compiled_cache"
)  # Clean up the cache created by this process
print("✅ Cleanup complete. Exiting training script.")


================================================
FILE: tests/saving/language_models/test_merge_4bit_validation.py
================================================
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template
from trl import SFTTrainer, SFTConfig
from transformers import DataCollatorForSeq2Seq, TrainingArguments
from datasets import load_dataset
import torch
import sys
from pathlib import Path

REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))

from tests.utils.cleanup_utils import safe_remove_directory


def formatting_prompts_func(examples):
    convos = examples["messages"]
    texts = [
        tokenizer.apply_chat_template(
            convo, tokenize = False, add_generation_prompt = False
        )
        for convo in convos
    ]
    return {"text": texts}


print(f"\n{'='*80}")
print("🔍 PHASE 1: Loading Base Model and Initial Training")
print(f"{'='*80}")

if torch.cuda.is_bf16_supported():
    compute_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
else:
    compute_dtype = torch.float16
    attn_implementation = "sdpa"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.1-8B-Instruct",
    max_seq_length = 2048,
    dtype = compute_dtype,
    load_in_4bit = True,
    load_in_8bit = False,
    full_finetuning = False,
    attn_implementation = attn_implementation,
)

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)

# Load small dataset for quick training
dataset_train = load_dataset(
    "allenai/openassistant-guanaco-reformatted", split = "train[:100]"
)
dataset_train = dataset_train.map(formatting_prompts_func, batched = True)

print("✅ Base model loaded successfully!")

print(f"\n{'='*80}")
print("🔍 PHASE 2: First Fine-tuning")
print(f"{'='*80}")

model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = [
        "k_proj",
        "q_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "down_proj",
        "up_proj",
    ],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset_train,
    dataset_text_field = "text",
    max_seq_length = 2048,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_ratio = 0.1,
        max_steps = 10,  # Very short training for test
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 5,
        optim = "adamw_8bit",
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",
    ),
)

trainer_stats = trainer.train()
print("✅ First fine-tuning completed!")

print(f"\n{'='*80}")
print("🔍 PHASE 3: Save with Forced 4bit Merge")
print(f"{'='*80}")

model.save_pretrained_merged(
    save_directory = "./test_4bit_model",
    tokenizer = tokenizer,
    save_method = "forced_merged_4bit",
)

print("✅ Model saved with forced 4bit merge!")

print(f"\n{'='*80}")
print("🔍 PHASE 4: Loading 4bit Model and Second Fine-tuning")
print(f"{'='*80}")

# Clean up first model
del model
del tokenizer
torch.cuda.empty_cache()

# Load the 4bit merged model
model_4bit, tokenizer_4bit = FastLanguageModel.from_pretrained(
    model_name = "./test_4bit_model",
    max_seq_length = 2048,
    load_in_4bit = True,
    load_in_8bit = False,
)

tokenizer_4bit = get_chat_template(
    tokenizer_4bit,
    chat_template = "llama-3.1",
)

print("✅ 4bit model loaded successfully!")

# Add LoRA adapters to the 4bit model
model_4bit = FastLanguageModel.get_peft_model(
    model_4bit,
    r = 16,
    target_modules = [
        "k_proj",
        "q_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "down_proj",
        "up_proj",
    ],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

# Second fine-tuning
trainer_4bit = SFTTrainer(
    model = model_4bit,
    tokenizer = tokenizer_4bit,
    train_dataset = dataset_train,
    dataset_text_field = "text",
    max_seq_length = 2048,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer_4bit),
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_ratio = 0.1,
        max_steps = 10,  # Very short training for test
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 5,
        optim = "adamw_8bit",
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs_4bit",
        report_to = "none",
    ),
)

trainer_4bit.train()
print("✅ Second fine-tuning on 4bit model completed!")

print(f"\n{'='*80}")
print("🔍 PHASE 5: Testing TypeError on Regular Merge (Should Fail)")
print(f"{'='*80}")

try:
    model_4bit.save_pretrained_merged(
        save_directory = "./test_should_fail",
        tokenizer = tokenizer_4bit,
        # No save_method specified, should default to regular merge
    )
    assert False, "Expected TypeError but merge succeeded!"
except TypeError as e:
    expected_error = "Base model should be a 16bits or mxfp4 base model for a 16bit model merge. Use `save_method=forced_merged_4bit` instead"
    assert expected_error in str(e), f"Unexpected error message: {str(e)}"
    print("✅ Correct TypeError raised for 4bit base model regular merge attempt!")
    print(f"Error message: {str(e)}")

print(f"\n{'='*80}")
print("🔍 PHASE 6: Successful Save with Forced 4bit Method")
print(f"{'='*80}")

try:
    model_4bit.save_pretrained_merged(
        save_directory = "./test_4bit_second",
        tokenizer = tokenizer_4bit,
        save_method = "forced_merged_4bit",
    )
    print("✅ Successfully saved 4bit model with forced 4bit method!")
except Exception as e:
    assert False, f"Phase 6 failed unexpectedly: {e}"

print(f"\n{'='*80}")
print("🔍 CLEANUP")
print(f"{'='*80}")

# Cleanup
safe_remove_directory("./outputs")
safe_remove_directory("./outputs_4bit")
safe_remove_directory("./unsloth_compiled_cache")
safe_remove_directory("./test_4bit_model")
safe_remove_directory("./test_4bit_second")
safe_remove_directory("./test_should_fail")

print("✅ All tests passed successfully!")


================================================
FILE: tests/saving/language_models/test_merge_model_perplexity_llama-3.2.py
================================================
from unsloth import FastLanguageModel, FastVisionModel, UnslothVisionDataCollator
from unsloth.chat_templates import get_chat_template
from trl import SFTTrainer, SFTConfig
from transformers import (
    DataCollatorForLanguageModeling,
    DataCollatorForSeq2Seq,
    TrainingArguments,
)
from datasets import load_dataset, Dataset
import torch
from tqdm import tqdm
import pandas as pd
import multiprocessing as mp
from multiprocessing import Process, Queue
import gc

# ruff: noqa
import sys
from pathlib import Path


REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))


from tests.utils.cleanup_utils import safe_remove_directory
from tests.utils.perplexity_eval import (
    ppl_model,
    add_to_comparison,
    print_model_comparison,
)


# Define helper functions outside of main
def formatting_prompts_func(examples):
    convos = examples["messages"]
    texts = [
        tokenizer.apply_chat_template(
            convo, tokenize = False, add_generation_prompt = False
        )
        for convo in convos
    ]
    return {"text": texts}


def load_and_compute_8bit_ppl(result_queue, load_in_4bit = False, load_in_8bit = False):
    """Load model and compute perplexity in subprocess"""
    from unsloth import FastLanguageModel
    from unsloth.chat_templates import get_chat_template
    from tests.utils.perplexity_eval import ppl_model

    # Load model
    merged_model, merged_tokenizer = FastLanguageModel.from_pretrained(
        model_name = "./unsloth_out/merged_llama_text_model",
        max_seq_length = 2048,
        load_in_4bit = load_in_4bit,
        load_in_8bit = load_in_8bit,
    )
    # Set up tokenizer
    merged_tokenizer = get_chat_template(
        merged_tokenizer,
        chat_template = "llama-3.1",
    )

    # Load dataset fresh in subprocess
    dataset_ppl = load_dataset(
        "allenai/openassistant-guanaco-reformatted", split = "eval"
    )

    # Format the dataset
    def formatting_prompts_func(examples):
        convos = examples["messages"]
        texts = [
            merged_tokenizer.apply_chat_template(
                convo, tokenize = False, add_generation_prompt = False
            )
            for convo in convos
        ]
        return {"text": texts}

    dataset_ppl = dataset_ppl.map(formatting_prompts_func, batched = True)

    # Compute perplexity using the passed dataset
    ppl_value = ppl_model(merged_model, merged_tokenizer, dataset_ppl)

    # IMPORTANT: Convert to Python float if it's a tensor
    if torch.is_tensor(ppl_value):
        ppl_value = ppl_value.cpu().item()  # Move to CPU and convert to Python scalar
    elif hasattr(ppl_value, "item"):
        ppl_value = ppl_value.item()  # Convert numpy or other array types
    else:
        ppl_value = float(ppl_value)  # Ensure it's a float

    # Return only the perplexity value
    result_queue.put(ppl_value)

    # Clean up
    del merged_model
    del merged_tokenizer
    del dataset_ppl
    torch.cuda.empty_cache()
    gc.collect()


# Main execution code should be wrapped in this guard
if __name__ == "__main__":
    mp.set_start_method("spawn", force = True)

    if torch.cuda.is_bf16_supported():
        compute_dtype = torch.bfloat16
        attn_implementation = "flash_attention_2"
    else:
        compute_dtype = torch.float16
        attn_implementation = "sdpa"

    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "unsloth/Llama-3.2-3B-Instruct",
        max_seq_length = 2048,
        dtype = compute_dtype,
        load_in_4bit = True,
        load_in_8bit = False,
        full_finetuning = False,
        attn_implementation = attn_implementation,
    )

    tokenizer = get_chat_template(
        tokenizer,
        chat_template = "llama-3.1",
    )

    from unsloth.chat_templates import standardize_sharegpt

    dataset_train = load_dataset(
        "allenai/openassistant-guanaco-reformatted", split = "train"
    )
    dataset_ppl = load_dataset(
        "allenai/openassistant-guanaco-reformatted", split = "eval"
    )

    dataset_train = dataset_train.map(formatting_prompts_func, batched = True)
    dataset_ppl = dataset_ppl.map(formatting_prompts_func, batched = True)

    add_to_comparison("Base model 4 bits", ppl_model(model, tokenizer, dataset_ppl))

    model = FastLanguageModel.get_peft_model(
        model,
        r = 16,
        target_modules = [
            "k_proj",
            "q_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "down_proj",
            "up_proj",
        ],
        lora_alpha = 16,
        lora_dropout = 0,
        bias = "none",
        use_gradient_checkpointing = "unsloth",
        random_state = 3407,
        use_rslora = False,
        loftq_config = None,
    )

    from unsloth import is_bfloat16_supported

    trainer = SFTTrainer(
        model = model,
        tokenizer = tokenizer,
        train_dataset = dataset_train,
        dataset_text_field = "text",
        max_seq_length = 2048,
        data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
        dataset_num_proc = 2,
        packing = False,
        args = TrainingArguments(
            per_device_train_batch_size = 2,
            gradient_accumulation_steps = 4,
            warmup_ratio = 0.1,
            max_steps = 10,
            learning_rate = 2e-4,
            fp16 = not is_bfloat16_supported(),
            bf16 = is_bfloat16_supported(),
            logging_steps = 50,
            optim = "adamw_8bit",
            lr_scheduler_type = "linear",
            seed = 3407,
            output_dir = "outputs",
            report_to = "none",
        ),
    )

    from unsloth.chat_templates import train_on_responses_only

    trainer = train_on_responses_only(
        trainer,
        instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
        response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
    )

    # run training
    trainer_stats = trainer.train()

    add_to_comparison("Qlora model", ppl_model(model, tokenizer, dataset_ppl))

    # saving and merging the model to local disk
    print("merge and save to local disk")
    model.save_pretrained_merged(
        save_directory = "./unsloth_out/merged_llama_text_model", tokenizer = tokenizer
    )

    # print("cleaning")
    # del model
    # del tokenizer
    # torch.cuda.empty_cache()
    # gc.collect()

    # load model from local disk and test
    print("Loading merged model in 4 bit for perplexity test")
    merged_model, merged_tokenizer = FastLanguageModel.from_pretrained(
        model_name = "./unsloth_out/merged_llama_text_model",
        max_seq_length = 2048,
        load_in_4bit = True,
        load_in_8bit = False,
    )

    add_to_comparison(
        "merged model load 4bit", ppl_model(merged_model, merged_tokenizer, dataset_ppl)
    )

    print("Computing 8-bit model perplexity in subprocess...")
    result_queue = mp.Queue()
    p = mp.Process(target = load_and_compute_8bit_ppl, args = (result_queue, False, True))
    p.start()
    p.join()

    ppl_8bit = result_queue.get()
    add_to_comparison("merged model loaded 8bits", ppl_8bit)

    print("Loading merged model in 16 bit for perplexity test")
    merged_model, merged_tokenizer = FastLanguageModel.from_pretrained(
        model_name = "./unsloth_out/merged_llama_text_model",
        max_seq_length = 2048,
        load_in_4bit = False,
        load_in_8bit = False,
    )

    add_to_comparison(
        "merged model loaded 16bits",
        ppl_model(merged_model, merged_tokenizer, dataset_ppl),
    )

    print_model_comparison()

    # final cleanup
    safe_remove_directory("./outputs")
    safe_remove_directory("./unsloth_compiled_cache")
    safe_remove_directory("./unsloth_out")


================================================
FILE: tests/saving/language_models/test_merge_model_perplexity_mistral.py
================================================
from unsloth import FastLanguageModel, FastVisionModel, UnslothVisionDataCollator
from unsloth.chat_templates import get_chat_template
from trl import SFTTrainer, SFTConfig
from transformers import (
    DataCollatorForLanguageModeling,
    DataCollatorForSeq2Seq,
    TrainingArguments,
)
from datasets import load_dataset, Dataset
import torch
from tqdm import tqdm
import pandas as pd
import multiprocessing as mp
from multiprocessing import Process, Queue
import gc

# ruff: noqa
import sys
from pathlib import Path


REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))

from tests.utils.cleanup_utils import safe_remove_directory
from tests.utils.perplexity_eval import (
    ppl_model,
    add_to_comparison,
    print_model_comparison,
)


def load_and_compute_8bit_ppl(result_queue, load_in_4bit = False, load_in_8bit = False):
    """Load model and compute perplexity in subprocess"""
    from unsloth import FastLanguageModel
    from tests.utils.perplexity_eval import ppl_model

    # Load model
    merged_model, merged_tokenizer = FastLanguageModel.from_pretrained(
        model_name = "./unsloth_out/merged_mistral_text_model",
        max_seq_length = 2048,
        load_in_4bit = load_in_4bit,
        load_in_8bit = load_in_8bit,
    )
    # Set up tokenizer
    # merged_tokenizer = get_chat_template(
    #     merged_tokenizer,
    #     chat_template="llama-3.1",
    # )

    # Load dataset fresh in subprocess
    dataset_ppl = load_dataset(
        "allenai/openassistant-guanaco-reformatted", split = "eval"
    )

    alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

    ### Instruction:
    {}

    ### Input:
    {}

    ### Response:
    {}"""

    EOS_TOKEN = merged_tokenizer.eos_token

    def formatting_prompts_func(examples):
        instructions = []
        inputs = []
        outputs = []
        texts = []

        for conversation in examples["messages"]:
            # Extract user message and assistant response
            user_message = ""
            assistant_message = ""

            for turn in conversation:
                if turn["role"] == "user":
                    user_message = turn["content"]
                elif turn["role"] == "assistant":
                    assistant_message = turn["content"]

            # Store intermediate format
            instruction = "Complete the statement"
            instructions.append(instruction)
            inputs.append(user_message)
            outputs.append(assistant_message)

            # Create formatted text
            text = (
                alpaca_prompt.format(instruction, user_message, assistant_message)
                + EOS_TOKEN
            )
            texts.append(text)

        return {
            "instruction": instructions,
            "input": inputs,
            "output": outputs,
            "text": texts,
        }

    dataset_ppl = dataset_ppl.map(formatting_prompts_func, batched = True)

    # Compute perplexity using the passed dataset
    ppl_value = ppl_model(merged_model, merged_tokenizer, dataset_ppl)

    # IMPORTANT: Convert to Python float if it's a tensor
    if torch.is_tensor(ppl_value):
        ppl_value = ppl_value.cpu().item()  # Move to CPU and convert to Python scalar
    elif hasattr(ppl_value, "item"):
        ppl_value = ppl_value.item()  # Convert numpy or other array types
    else:
        ppl_value = float(ppl_value)  # Ensure it's a float

    # Return only the perplexity value
    result_queue.put(ppl_value)

    # Clean up
    del merged_model
    del merged_tokenizer
    del dataset_ppl
    torch.cuda.empty_cache()
    gc.collect()


# Main execution code should be wrapped in this guard
if __name__ == "__main__":
    mp.set_start_method("spawn", force = True)

    if torch.cuda.is_bf16_supported():
        compute_dtype = torch.bfloat16
        attn_implementation = "flash_attention_2"
    else:
        compute_dtype = torch.float16
        attn_implementation = "sdpa"

    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "unsloth/mistral-7b-v0.3",
        max_seq_length = 2048,
        dtype = compute_dtype,
        load_in_4bit = True,
        load_in_8bit = False,
        full_finetuning = False,
        attn_implementation = attn_implementation,
    )

    EOS_TOKEN = tokenizer.eos_token

    alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

    ### Instruction:
    {}

    ### Input:
    {}

    ### Response:
    {}"""

    # Define helper functions outside of main
    def formatting_prompts_func(examples):
        instructions = []
        inputs = []
        outputs = []
        texts = []

        for conversation in examples["messages"]:
            # Extract user message and assistant response
            user_message = ""
            assistant_message = ""

            for turn in conversation:
                if turn["role"] == "user":
                    user_message = turn["content"]
                elif turn["role"] == "assistant":
                    assistant_message = turn["content"]

            # Store intermediate format
            instruction = "Complete the statement"
            instructions.append(instruction)
            inputs.append(user_message)
            outputs.append(assistant_message)

            # Create formatted text
            text = (
                alpaca_prompt.format(instruction, user_message, assistant_message)
                + EOS_TOKEN
            )
            texts.append(text)

        return {
            "instruction": instructions,
            "input": inputs,
            "output": outputs,
            "text": texts,
        }

    dataset_train = load_dataset(
        "allenai/openassistant-guanaco-reformatted", split = "train"
    )
    dataset_ppl = load_dataset(
        "allenai/openassistant-guanaco-reformatted", split = "eval"
    )

    dataset_train = dataset_train.map(formatting_prompts_func, batched = True)
    dataset_ppl = dataset_ppl.map(formatting_prompts_func, batched = True)

    add_to_comparison("Base model 4 bits", ppl_model(model, tokenizer, dataset_ppl))

    model = FastLanguageModel.get_peft_model(
        model,
        r = 16,
        target_modules = [
            "k_proj",
            "q_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "down_proj",
            "up_proj",
        ],
        lora_alpha = 16,
        lora_dropout = 0,
        bias = "none",
        use_gradient_checkpointing = "unsloth",
        random_state = 3407,
        use_rslora = False,
        loftq_config = None,
    )

    from unsloth import is_bfloat16_supported

    trainer = SFTTrainer(
        model = model,
        tokenizer = tokenizer,
        train_dataset = dataset_train,
        dataset_text_field = "text",
        max_seq_length = 2048,
        dataset_num_proc = 2,
        packing = False,
        args = TrainingArguments(
            per_device_train_batch_size = 2,
            gradient_accumulation_steps = 4,
            warmup_ratio = 0.1,
            max_steps = 200,
            learning_rate = 2e-4,
            fp16 = not is_bfloat16_supported(),
            bf16 = is_bfloat16_supported(),
            logging_steps = 50,
            optim = "adamw_8bit",
            lr_scheduler_type = "linear",
            seed = 3407,
            output_dir = "outputs",
            report_to = "none",
        ),
    )

    # run training
    trainer_stats = trainer.train()

    add_to_comparison("Qlora model", ppl_model(model, tokenizer, dataset_ppl))

    # saving and merging the model to local disk
    print("merge and save to local disk")
    model.save_pretrained_merged(
        save_directory = "./unsloth_out/merged_mistral_text_model", tokenizer = tokenizer
    )

    # print("cleaning")
    # del model
    # del tokenizer
    # torch.cuda.empty_cache()
    # gc.collect()

    # load model from local disk and test
    print("Loading merged model in 4 bit for perplexity test")
    merged_model, merged_tokenizer = FastLanguageModel.from_pretrained(
        model_name = "./unsloth_out/merged_mistral_text_model",
        max_seq_length = 2048,
        load_in_4bit = True,
        load_in_8bit = False,
    )

    add_to_comparison(
        "merged model load 4bit", ppl_model(merged_model, merged_tokenizer, dataset_ppl)
    )

    print("Computing 8-bit model perplexity in subprocess...")
    result_queue = mp.Queue()
    p = mp.Process(target = load_and_compute_8bit_ppl, args = (result_queue, False, True))
    p.start()
    p.join()

    ppl_8bit = result_queue.get()
    add_to_comparison("merged model loaded 8bits", ppl_8bit)

    print("Loading merged model in 16 bit for perplexity test")
    merged_model, merged_tokenizer = FastLanguageModel.from_pretrained(
        model_name = "./unsloth_out/merged_mistral_text_model",
        max_seq_length = 2048,
        load_in_4bit = False,
        load_in_8bit = False,
    )

    add_to_comparison(
        "merged model loaded 16bits",
        ppl_model(merged_model, merged_tokenizer, dataset_ppl),
    )

    print_model_comparison()

    safe_remove_directory("./outputs")
    safe_remove_directory("./unsloth_compiled_cache")
    safe_remove_directory("./unsloth_out")


================================================
FILE: tests/saving/language_models/test_merge_model_perplexity_phi_4.py
================================================
from unsloth import FastLanguageModel, FastVisionModel, UnslothVisionDataCollator
from unsloth.chat_templates import get_chat_template
from trl import SFTTrainer, SFTConfig
from transformers import (
    DataCollatorForLanguageModeling,
    DataCollatorForSeq2Seq,
    TrainingArguments,
)
from datasets import load_dataset, Dataset
import torch
from tqdm import tqdm
import pandas as pd
import multiprocessing as mp
from multiprocessing import Process, Queue
import gc

# ruff: noqa
import sys
from pathlib import Path


REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))


from tests.utils.cleanup_utils import safe_remove_directory
from tests.utils.perplexity_eval import (
    ppl_model,
    add_to_comparison,
    print_model_comparison,
)


# Define helper functions outside of main
def formatting_prompts_func(examples):
    convos = examples["messages"]
    texts = [
        tokenizer.apply_chat_template(
            convo, tokenize = False, add_generation_prompt = False
        )
        for convo in convos
    ]
    return {
        "text": texts,
    }


def load_and_compute_8bit_ppl(result_queue, load_in_4bit = False, load_in_8bit = False):
    """Load model and compute perplexity in subprocess"""
    from unsloth import FastLanguageModel
    from unsloth.chat_templates import get_chat_template
    from tests.utils.perplexity_eval import ppl_model

    # Load model
    merged_model, merged_tokenizer = FastLanguageModel.from_pretrained(
        model_name = "./unsloth_out/merged_phi4_text_model",
        max_seq_length = 2048,
        load_in_4bit = load_in_4bit,
        load_in_8bit = load_in_8bit,
    )
    # Set up tokenizer
    merged_tokenizer = get_chat_template(
        merged_tokenizer,
        chat_template = "phi-4",
    )

    # Load dataset fresh in subprocess
    dataset_ppl = load_dataset(
        "allenai/openassistant-guanaco-reformatted", split = "eval"
    )

    # Format the dataset
    def formatting_prompts_func(examples):
        convos = examples["messages"]
        texts = [
            merged_tokenizer.apply_chat_template(
                convo, tokenize = False, add_generation_prompt = False
            )
            for convo in convos
        ]
        return {"text": texts}

    dataset_ppl = dataset_ppl.map(formatting_prompts_func, batched = True)

    # Compute perplexity using the passed dataset
    ppl_value = ppl_model(merged_model, merged_tokenizer, dataset_ppl)

    # IMPORTANT: Convert to Python float if it's a tensor
    if torch.is_tensor(ppl_value):
        ppl_value = ppl_value.cpu().item()  # Move to CPU and convert to Python scalar
    elif hasattr(ppl_value, "item"):
        ppl_value = ppl_value.item()  # Convert numpy or other array types
    else:
        ppl_value = float(ppl_value)  # Ensure it's a float

    # Return only the perplexity value
    result_queue.put(ppl_value)

    # Clean up
    del merged_model
    del merged_tokenizer
    del dataset_ppl
    torch.cuda.empty_cache()
    gc.collect()


# Main execution code should be wrapped in this guard
if __name__ == "__main__":
    mp.set_start_method("spawn", force = True)

    if torch.cuda.is_bf16_supported():
        compute_dtype = torch.bfloat16
        attn_implementation = "flash_attention_2"
    else:
        compute_dtype = torch.float16
        attn_implementation = "sdpa"

    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "unsloth/Phi-4",
        max_seq_length = 2048,
        dtype = compute_dtype,
        load_in_4bit = True,
        load_in_8bit = False,
        full_finetuning = False,
        attn_implementation = attn_implementation,
    )

    tokenizer = get_chat_template(
        tokenizer,
        chat_template = "phi-4",
    )

    dataset_train = load_dataset(
        "allenai/openassistant-guanaco-reformatted", split = "train"
    )
    dataset_ppl = load_dataset(
        "allenai/openassistant-guanaco-reformatted", split = "eval"
    )

    dataset_train = dataset_train.map(formatting_prompts_func, batched = True)
    dataset_ppl = dataset_ppl.map(formatting_prompts_func, batched = True)

    add_to_comparison("Base model 4 bits", ppl_model(model, tokenizer, dataset_ppl))

    model = FastLanguageModel.get_peft_model(
        model,
        r = 16,
        target_modules = [
            "k_proj",
            "q_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "down_proj",
            "up_proj",
        ],
        lora_alpha = 16,
        lora_dropout = 0,
        bias = "none",
        use_gradient_checkpointing = "unsloth",
        random_state = 3407,
        use_rslora = False,
        loftq_config = None,
    )

    from unsloth import is_bfloat16_supported

    trainer = SFTTrainer(
        model = model,
        tokenizer = tokenizer,
        train_dataset = dataset_train,
        dataset_text_field = "text",
        max_seq_length = 2048,
        data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
        dataset_num_proc = 2,
        packing = False,
        args = TrainingArguments(
            per_device_train_batch_size = 2,
            gradient_accumulation_steps = 4,
            warmup_ratio = 0.1,
            max_steps = 200,
            learning_rate = 2e-4,
            fp16 = not is_bfloat16_supported(),
            bf16 = is_bfloat16_supported(),
            logging_steps = 50,
            optim = "adamw_8bit",
            lr_scheduler_type = "linear",
            seed = 3407,
            output_dir = "outputs",
            report_to = "none",
        ),
    )

    from unsloth.chat_templates import train_on_responses_only

    trainer = train_on_responses_only(
        trainer,
        instruction_part = "<|im_start|>user<|im_sep|>\n\n",
        response_part = "<|im_start|>assistant<|im_sep|>\n\n",
    )

    # run training
    trainer_stats = trainer.train()

    add_to_comparison("Qlora model", ppl_model(model, tokenizer, dataset_ppl))

    # saving and merging the model to local disk
    print("merge and save to local disk")
    model.save_pretrained_merged(
        save_directory = "./unsloth_out/merged_phi4_text_model", tokenizer = tokenizer
    )

    # print("cleaning")
    # del model
    # del tokenizer
    # torch.cuda.empty_cache()
    # gc.collect()

    # load model from local disk and test
    print("Loading merged model in 4 bit for perplexity test")
    merged_model, merged_tokenizer = FastLanguageModel.from_pretrained(
        model_name = "./unsloth_out/merged_phi4_text_model",
        max_seq_length = 2048,
        load_in_4bit = True,
        load_in_8bit = False,
    )

    add_to_comparison(
        "merged model load 4bit", ppl_model(merged_model, merged_tokenizer, dataset_ppl)
    )

    print("Computing 8-bit model perplexity in subprocess...")
    result_queue = mp.Queue()
    p = mp.Process(target = load_and_compute_8bit_ppl, args = (result_queue, False, True))
    p.start()
    p.join()

    ppl_8bit = result_queue.get()
    add_to_comparison("merged model loaded 8bits", ppl_8bit)

    print("Loading merged model in 16 bit for perplexity test")
    merged_model, merged_tokenizer = FastLanguageModel.from_pretrained(
        model_name = "./unsloth_out/merged_phi4_text_model",
        max_seq_length = 2048,
        load_in_4bit = False,
        load_in_8bit = False,
    )

    add_to_comparison(
        "merged model loaded 16bits",
        ppl_model(merged_model, merged_tokenizer, dataset_ppl),
    )

    print_model_comparison()

    # final cleanup
    safe_remove_directory("./outputs")
    safe_remove_directory("./unsloth_compiled_cache")
    safe_remove_directory("./unsloth_out")


================================================
FILE: tests/saving/language_models/test_merged_model_perplexity_llama-3.1-8b.py
================================================
from unsloth import FastLanguageModel, FastVisionModel, UnslothVisionDataCollator
from unsloth.chat_templates import get_chat_template
from trl import SFTTrainer, SFTConfig
from transformers import (
    DataCollatorForLanguageModeling,
    DataCollatorForSeq2Seq,
    TrainingArguments,
)
from datasets import load_dataset, Dataset
import torch
from tqdm import tqdm
import pandas as pd
import multiprocessing as mp
from multiprocessing import Process, Queue
import gc

# ruff: noqa
import sys
from pathlib import Path


REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))

from tests.utils.cleanup_utils import safe_remove_directory
from tests.utils.perplexity_eval import (
    ppl_model,
    add_to_comparison,
    print_model_comparison,
)


# Define helper functions outside of main
def formatting_prompts_func(examples):
    convos = examples["messages"]
    texts = [
        tokenizer.apply_chat_template(
            convo, tokenize = False, add_generation_prompt = False
        )
        for convo in convos
    ]
    return {"text": texts}


def load_and_compute_8bit_ppl(result_queue, load_in_4bit = False, load_in_8bit = False):
    """Load model and compute perplexity in subprocess"""
    from unsloth import FastLanguageModel
    from unsloth.chat_templates import get_chat_template
    from tests.utils.perplexity_eval import ppl_model

    # Load model
    merged_model, merged_tokenizer = FastLanguageModel.from_pretrained(
        model_name = "./unsloth_out/merged_llama_text_model",
        max_seq_length = 2048,
        load_in_4bit = load_in_4bit,
        load_in_8bit = load_in_8bit,
    )
    # Set up tokenizer
    merged_tokenizer = get_chat_template(
        merged_tokenizer,
        chat_template = "llama-3.1",
    )

    # Load dataset fresh in subprocess
    dataset_ppl = load_dataset(
        "allenai/openassistant-guanaco-reformatted", split = "eval"
    )

    # Format the dataset
    def formatting_prompts_func(examples):
        convos = examples["messages"]
        texts = [
            merged_tokenizer.apply_chat_template(
                convo, tokenize = False, add_generation_prompt = False
            )
            for convo in convos
        ]
        return {"text": texts}

    dataset_ppl = dataset_ppl.map(formatting_prompts_func, batched = True)

    # Compute perplexity using the passed dataset
    ppl_value = ppl_model(merged_model, merged_tokenizer, dataset_ppl)

    # IMPORTANT: Convert to Python float if it's a tensor
    if torch.is_tensor(ppl_value):
        ppl_value = ppl_value.cpu().item()  # Move to CPU and convert to Python scalar
    elif hasattr(ppl_value, "item"):
        ppl_value = ppl_value.item()  # Convert numpy or other array types
    else:
        ppl_value = float(ppl_value)  # Ensure it's a float

    # Return only the perplexity value
    result_queue.put(ppl_value)

    # Clean up
    del merged_model
    del merged_tokenizer
    del dataset_ppl
    torch.cuda.empty_cache()
    gc.collect()


# Main execution code should be wrapped in this guard
if __name__ == "__main__":
    mp.set_start_method("spawn", force = True)

    if torch.cuda.is_bf16_supported():
        compute_dtype = torch.bfloat16
        attn_implementation = "flash_attention_2"
    else:
        compute_dtype = torch.float16
        attn_implementation = "sdpa"

    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "unsloth/Llama-3.1-8B-Instruct",
        max_seq_length = 2048,
        dtype = compute_dtype,
        load_in_4bit = True,
        load_in_8bit = False,
        full_finetuning = False,
        attn_implementation = attn_implementation,
    )

    tokenizer = get_chat_template(
        tokenizer,
        chat_template = "llama-3.1",
    )

    from unsloth.chat_templates import standardize_sharegpt

    dataset_train = load_dataset(
        "allenai/openassistant-guanaco-reformatted", split = "train"
    )
    dataset_ppl = load_dataset(
        "allenai/openassistant-guanaco-reformatted", split = "eval"
    )

    dataset_train = dataset_train.map(formatting_prompts_func, batched = True)
    dataset_ppl = dataset_ppl.map(formatting_prompts_func, batched = True)

    print("\n dataset sample [0]")
    print(dataset_train[0])

    add_to_comparison("Base model 4 bits", ppl_model(model, tokenizer, dataset_ppl))

    model = FastLanguageModel.get_peft_model(
        model,
        r = 16,
        target_modules = [
            "k_proj",
            "q_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "down_proj",
            "up_proj",
        ],
        lora_alpha = 16,
        lora_dropout = 0,
        bias = "none",
        use_gradient_checkpointing = "unsloth",
        random_state = 3407,
        use_rslora = False,
        loftq_config = None,
    )

    from unsloth import is_bfloat16_supported

    trainer = SFTTrainer(
        model = model,
        tokenizer = tokenizer,
        train_dataset = dataset_train,
        dataset_text_field = "text",
        max_seq_length = 2048,
        data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
        dataset_num_proc = 2,
        packing = False,
        args = TrainingArguments(
            per_device_train_batch_size = 2,
            gradient_accumulation_steps = 4,
            warmup_ratio = 0.1,
            max_steps = 200,
            learning_rate = 2e-4,
            fp16 = not is_bfloat16_supported(),
            bf16 = is_bfloat16_supported(),
            logging_steps = 50,
            optim = "adamw_8bit",
            lr_scheduler_type = "linear",
            seed = 3407,
            output_dir = "outputs",
            report_to = "none",
        ),
    )

    from unsloth.chat_templates import train_on_responses_only

    trainer = train_on_responses_only(
        trainer,
        instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
        response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
    )

    tokenizer.decode(trainer.train_dataset[0]["input_ids"])

    # run training
    trainer_stats = trainer.train()

    add_to_comparison("Qlora model", ppl_model(model, tokenizer, dataset_ppl))

    # saving and merging the model to local disk
    print("merge and save to local disk")
    model.save_pretrained_merged(
        save_directory = "./unsloth_out/merged_llama_text_model", tokenizer = tokenizer
    )

    # print("cleaning")
    # del model
    # del tokenizer
    # torch.cuda.empty_cache()
    # gc.collect()

    # load model from local disk and test
    print("Loading merged model in 4 bit for perplexity test")
    merged_model, merged_tokenizer = FastLanguageModel.from_pretrained(
        model_name = "./unsloth_out/merged_llama_text_model",
        max_seq_length = 2048,
        load_in_4bit = True,
        load_in_8bit = False,
    )

    add_to_comparison(
        "merged model load 4bit", ppl_model(merged_model, merged_tokenizer, dataset_ppl)
    )

    print("Computing 8-bit model perplexity in subprocess...")
    result_queue = mp.Queue()
    p = mp.Process(target = load_and_compute_8bit_ppl, args = (result_queue, False, True))
    p.start()
    p.join()

    ppl_8bit = result_queue.get()
    add_to_comparison("merged model loaded 8bits", ppl_8bit)

    print("Loading merged model in 16 bit for perplexity test")
    merged_model, merged_tokenizer = FastLanguageModel.from_pretrained(
        model_name = "./unsloth_out/merged_llama_text_model",
        max_seq_length = 2048,
        load_in_4bit = False,
        load_in_8bit = False,
    )

    add_to_comparison(
        "merged model loaded 16bits",
        ppl_model(merged_model, merged_tokenizer, dataset_ppl),
    )

    print_model_comparison()

    # final cleanup
    safe_remove_directory("./outputs")
    safe_remove_directory("./unsloth_compiled_cache")
    safe_remove_directory("./unsloth_out")


================================================
FILE: tests/saving/language_models/test_merged_model_perplexity_qwen_2.5.py
================================================
from unsloth import FastLanguageModel, FastVisionModel, UnslothVisionDataCollator
from unsloth.chat_templates import get_chat_template
from trl import SFTTrainer, SFTConfig
from transformers import (
    DataCollatorForLanguageModeling,
    DataCollatorForSeq2Seq,
    TrainingArguments,
)
from datasets import load_dataset, Dataset
import torch
from tqdm import tqdm
import pandas as pd
import multiprocessing as mp
from multiprocessing import Process, Queue
import gc

# ruff: noqa
import sys
from pathlib import Path


REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))

from tests.utils.cleanup_utils import safe_remove_directory
from tests.utils.perplexity_eval import (
    ppl_model,
    add_to_comparison,
    print_model_comparison,
)


alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""


# Define helper functions outside of main
def formatting_prompts_func(examples):
    instructions = []
    inputs = []
    outputs = []
    texts = []

    for conversation in examples["messages"]:
        # Extract user message and assistant response
        user_message = ""
        assistant_message = ""

        for turn in conversation:
            if turn["role"] == "user":
                user_message = turn["content"]
            elif turn["role"] == "assistant":
                assistant_message = turn["content"]

        # Store intermediate format
        instruction = "Complete the statement"
        instructions.append(instruction)
        inputs.append(user_message)
        outputs.append(assistant_message)

        # Create formatted text
        text = alpaca_prompt.format(instruction, user_message, assistant_message)
        texts.append(text)

    return {
        "instruction": instructions,
        "input": inputs,
        "output": outputs,
        "text": texts,
    }


def load_and_compute_8bit_ppl(result_queue, load_in_4bit = False, load_in_8bit = False):
    """Load model and compute perplexity in subprocess"""
    from unsloth import FastLanguageModel
    from tests.utils.perplexity_eval import ppl_model

    # Load model
    merged_model, merged_tokenizer = FastLanguageModel.from_pretrained(
        model_name = "./unsloth_out/merged_qwen_text_model",
        max_seq_length = 2048,
        load_in_4bit = load_in_4bit,
        load_in_8bit = load_in_8bit,
    )
    # Set up tokenizer
    # merged_tokenizer = get_chat_template(
    #     merged_tokenizer,
    #     chat_template="llama-3.1",
    # )

    # Load dataset fresh in subprocess
    dataset_ppl = load_dataset(
        "allenai/openassistant-guanaco-reformatted", split = "eval"
    )

    alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

    ### Instruction:
    {}

    ### Input:
    {}

    ### Response:
    {}"""

    def formatting_prompts_func(examples):
        instructions = []
        inputs = []
        outputs = []
        texts = []

        for conversation in examples["messages"]:
            # Extract user message and assistant response
            user_message = ""
            assistant_message = ""

            for turn in conversation:
                if turn["role"] == "user":
                    user_message = turn["content"]
                elif turn["role"] == "assistant":
                    assistant_message = turn["content"]

            # Store intermediate format
            instruction = "Complete the statement"
            instructions.append(instruction)
            inputs.append(user_message)
            outputs.append(assistant_message)

            # Create formatted text
            text = alpaca_prompt.format(instruction, user_message, assistant_message)
            texts.append(text)

        return {
            "instruction": instructions,
            "input": inputs,
            "output": outputs,
            "text": texts,
        }

    dataset_ppl = dataset_ppl.map(formatting_prompts_func, batched = True)

    # Compute perplexity using the passed dataset
    ppl_value = ppl_model(merged_model, merged_tokenizer, dataset_ppl)

    # IMPORTANT: Convert to Python float if it's a tensor
    if torch.is_tensor(ppl_value):
        ppl_value = ppl_value.cpu().item()  # Move to CPU and convert to Python scalar
    elif hasattr(ppl_value, "item"):
        ppl_value = ppl_value.item()  # Convert numpy or other array types
    else:
        ppl_value = float(ppl_value)  # Ensure it's a float

    # Return only the perplexity value
    result_queue.put(ppl_value)

    # Clean up
    # del merged_model
    # del merged_tokenizer
    # del dataset_ppl
    # torch.cuda.empty_cache()
    # gc.collect()


# Main execution code should be wrapped in this guard
if __name__ == "__main__":
    mp.set_start_method("spawn", force = True)

    if torch.cuda.is_bf16_supported():
        compute_dtype = torch.bfloat16
        attn_implementation = "flash_attention_2"
    else:
        compute_dtype = torch.float16
        attn_implementation = "sdpa"

    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "unsloth/Qwen2.5-7B-Instruct",
        max_seq_length = 2048,
        dtype = compute_dtype,
        load_in_4bit = True,
        load_in_8bit = False,
        full_finetuning = False,
        attn_implementation = attn_implementation,
    )

    dataset_train = load_dataset(
        "allenai/openassistant-guanaco-reformatted", split = "train"
    )
    dataset_ppl = load_dataset(
        "allenai/openassistant-guanaco-reformatted", split = "eval"
    )

    dataset_train = dataset_train.map(formatting_prompts_func, batched = True)
    dataset_ppl = dataset_ppl.map(formatting_prompts_func, batched = True)

    add_to_comparison("Base model 4 bits", ppl_model(model, tokenizer, dataset_ppl))

    model = FastLanguageModel.get_peft_model(
        model,
        r = 16,
        target_modules = [
            "k_proj",
            "q_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "down_proj",
            "up_proj",
        ],
        lora_alpha = 16,
        lora_dropout = 0,
        bias = "none",
        use_gradient_checkpointing = "unsloth",
        random_state = 3407,
        use_rslora = False,
        loftq_config = None,
    )

    from unsloth import is_bfloat16_supported

    trainer = SFTTrainer(
        model = model,
        tokenizer = tokenizer,
        train_dataset = dataset_train,
        dataset_text_field = "text",
        max_seq_length = 2048,
        data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
        dataset_num_proc = 2,
        packing = False,
        args = TrainingArguments(
            per_device_train_batch_size = 2,
            gradient_accumulation_steps = 4,
            warmup_ratio = 0.1,
            max_steps = 200,
            learning_rate = 2e-4,
            fp16 = not is_bfloat16_supported(),
            bf16 = is_bfloat16_supported(),
            logging_steps = 50,
            optim = "adamw_8bit",
            lr_scheduler_type = "linear",
            seed = 3407,
            output_dir = "outputs",
            report_to = "none",
        ),
    )

    # run training
    trainer_stats = trainer.train()

    add_to_comparison("Qlora model", ppl_model(model, tokenizer, dataset_ppl))

    # saving and merging the model to local disk
    print("merge and save to local disk")
    model.save_pretrained_merged(
        save_directory = "./unsloth_out/merged_qwen_text_model", tokenizer = tokenizer
    )

    # print("cleaning")
    # del model
    # del tokenizer
    # torch.cuda.empty_cache()
    # gc.collect()

    # load model from local disk and test
    print("Loading merged model in 4 bit for perplexity test")
    merged_model, merged_tokenizer = FastLanguageModel.from_pretrained(
        model_name = "./unsloth_out/merged_qwen_text_model",
        max_seq_length = 2048,
        load_in_4bit = True,
        load_in_8bit = False,
    )

    add_to_comparison(
        "merged model load 4bit", ppl_model(merged_model, merged_tokenizer, dataset_ppl)
    )

    print("Computing 8-bit model perplexity in subprocess...")
    result_queue = mp.Queue()
    p = mp.Process(target = load_and_compute_8bit_ppl, args = (result_queue, False, True))
    p.start()
    p.join()

    ppl_8bit = result_queue.get()
    add_to_comparison("merged model loaded 8bits", ppl_8bit)

    print("Loading merged model in 16 bit for perplexity test")
    merged_model, merged_tokenizer = FastLanguageModel.from_pretrained(
        model_name = "./unsloth_out/merged_qwen_text_model",
        max_seq_length = 2048,
        load_in_4bit = False,
        load_in_8bit = False,
    )

    add_to_comparison(
        "merged model loaded 16bits",
        ppl_model(merged_model, merged_tokenizer, dataset_ppl),
    )

    print_model_comparison()

    safe_remove_directory("./outputs")
    safe_remove_directory("./unsloth_compiled_cache")
    safe_remove_directory("./unsloth_out")


================================================
FILE: tests/saving/language_models/test_push_to_hub_merged.py
================================================
from unsloth import FastLanguageModel, FastVisionModel, UnslothVisionDataCollator
from unsloth.chat_templates import get_chat_template
from trl import SFTTrainer, SFTConfig
from transformers import (
    DataCollatorForLanguageModeling,
    DataCollatorForSeq2Seq,
    TrainingArguments,
)
from datasets import load_dataset, Dataset
import torch
from tqdm import tqdm
import pandas as pd
import multiprocessing as mp
from multiprocessing import Process, Queue
import gc
import os
from huggingface_hub import HfFileSystem, hf_hub_download

# ruff: noqa
import sys
from pathlib import Path


REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))

from tests.utils.cleanup_utils import safe_remove_directory
from tests.utils.perplexity_eval import (
    ppl_model,
    add_to_comparison,
    print_model_comparison,
)


# Define helper functions outside of main
def formatting_prompts_func(examples):
    convos = examples["messages"]
    texts = [
        tokenizer.apply_chat_template(
            convo, tokenize = False, add_generation_prompt = False
        )
        for convo in convos
    ]
    return {"text": texts}


if torch.cuda.is_bf16_supported():
    compute_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
else:
    compute_dtype = torch.float16
    attn_implementation = "sdpa"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-1B-Instruct",
    max_seq_length = 2048,
    dtype = compute_dtype,
    load_in_4bit = True,
    load_in_8bit = False,
    full_finetuning = False,
    attn_implementation = attn_implementation,
)

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)

from unsloth.chat_templates import standardize_sharegpt

dataset_train = load_dataset("allenai/openassistant-guanaco-reformatted", split = "train")
dataset_ppl = load_dataset("allenai/openassistant-guanaco-reformatted", split = "eval")

dataset_train = dataset_train.map(formatting_prompts_func, batched = True)
dataset_ppl = dataset_ppl.map(formatting_prompts_func, batched = True)

add_to_comparison("Base model 4 bits", ppl_model(model, tokenizer, dataset_ppl))

model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = [
        "k_proj",
        "q_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "down_proj",
        "up_proj",
    ],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset_train,
    dataset_text_field = "text",
    max_seq_length = 2048,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_ratio = 0.1,
        max_steps = 30,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 50,
        optim = "adamw_8bit",
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",
    ),
)

from unsloth.chat_templates import train_on_responses_only

trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

# run training
trainer_stats = trainer.train()


# saving and merging the model to local disk
hf_username = os.environ.get("HF_USER", "")
if not hf_username:
    hf_username = input("Please enter your Hugging Face username: ").strip()
    os.environ["HF_USER"] = hf_username

hf_token = os.environ.get("HF_TOKEN", "")
if not hf_token:
    hf_token = input("Please enter your Hugging Face token: ").strip()
    os.environ["HF_TOKEN"] = hf_token


repo_name = f"{hf_username}/merged_llama_text_model"
success = {
    "upload": False,
    "download": False,
}

# Stage 1: Upload model to Hub
try:
    print("\n" + "=" * 80)
    print("=== UPLOADING MODEL TO HUB ===".center(80))
    print("=" * 80 + "\n")
    model.push_to_hub_merged(repo_name, tokenizer = tokenizer, token = hf_token)
    success["upload"] = True
    print("✅ Model uploaded successfully!")
except Exception as e:
    print(f"❌ Failed to upload model: {e}")
    raise Exception("Model upload failed.")

t
# Stage 2: Test downloading the model (even if cached)
safe_remove_directory(f"./{hf_username}")

try:
    print("\n" + "=" * 80)
    print("=== TESTING MODEL DOWNLOAD ===".center(80))
    print("=" * 80 + "\n")
    # Force download even if cached
    model, tokenizer = FastLanguageModel.from_pretrained(
        f"{hf_username}/merged_llama_text_model"
    )
    success["download"] = True
    print("✅ Model downloaded successfully!")
except Exception as e:
    print(f"❌ Download failed: {e}")
    raise Exception("Model download failed.")

# Final report
print("\n" + "=" * 80)
print("=== VALIDATION REPORT ===".center(80))
print("=" * 80 + "\n")
for stage, passed in success.items():
    status = "✓" if passed else "✗"
    print(f"{status} {stage.replace('_', ' ').title()}")
print("\n" + "=" * 80)

if all(success.values()):
    print("\n🎉 All stages completed successfully!")
else:
    raise Exception("Validation failed for one or more stages.")

# final cleanup
safe_remove_directory("./outputs")
safe_remove_directory("./unsloth_compiled_cache")


================================================
FILE: tests/saving/language_models/test_push_to_hub_merged_sharded_index_file.py
================================================
from unsloth import FastLanguageModel, FastVisionModel, UnslothVisionDataCollator
from unsloth.chat_templates import get_chat_template
from trl import SFTTrainer, SFTConfig
from transformers import (
    DataCollatorForLanguageModeling,
    DataCollatorForSeq2Seq,
    TrainingArguments,
)
from datasets import load_dataset, Dataset
import torch
from tqdm import tqdm
import pandas as pd
import multiprocessing as mp
from multiprocessing import Process, Queue
import gc
import os
from huggingface_hub import HfFileSystem, hf_hub_download

# ruff: noqa
import sys
from pathlib import Path


REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))

from tests.utils.cleanup_utils import safe_remove_directory
from tests.utils.perplexity_eval import (
    ppl_model,
    add_to_comparison,
    print_model_comparison,
)


# Define helper functions outside of main
def formatting_prompts_func(examples):
    convos = examples["messages"]
    texts = [
        tokenizer.apply_chat_template(
            convo, tokenize = False, add_generation_prompt = False
        )
        for convo in convos
    ]
    return {"text": texts}


if torch.cuda.is_bf16_supported():
    compute_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
else:
    compute_dtype = torch.float16
    attn_implementation = "sdpa"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.1-8B-Instruct",
    max_seq_length = 2048,
    dtype = compute_dtype,
    load_in_4bit = True,
    load_in_8bit = False,
    full_finetuning = False,
    attn_implementation = attn_implementation,
)

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)

from unsloth.chat_templates import standardize_sharegpt

dataset_train = load_dataset("allenai/openassistant-guanaco-reformatted", split = "train")
dataset_ppl = load_dataset("allenai/openassistant-guanaco-reformatted", split = "eval")

dataset_train = dataset_train.map(formatting_prompts_func, batched = True)
dataset_ppl = dataset_ppl.map(formatting_prompts_func, batched = True)

add_to_comparison("Base model 4 bits", ppl_model(model, tokenizer, dataset_ppl))

model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = [
        "k_proj",
        "q_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "down_proj",
        "up_proj",
    ],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset_train,
    dataset_text_field = "text",
    max_seq_length = 2048,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_ratio = 0.1,
        max_steps = 30,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 50,
        optim = "adamw_8bit",
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",
    ),
)

from unsloth.chat_templates import train_on_responses_only

trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

# run training
trainer_stats = trainer.train()


# saving and merging the model to local disk
hf_username = os.environ.get("HF_USER", "")
if not hf_username:
    hf_username = input("Please enter your Hugging Face username: ").strip()
    os.environ["HF_USER"] = hf_username

hf_token = os.environ.get("HF_TOKEN", "")
if not hf_token:
    hf_token = input("Please enter your Hugging Face token: ").strip()
    os.environ["HF_TOKEN"] = hf_token


repo_name = f"{hf_username}/merged_llama_text_model"
success = {
    "upload": False,
    "safetensors_check": False,
    "download": False,
}

# Stage 1: Upload model to Hub
try:
    print("\n" + "=" * 80)
    print("=== UPLOADING MODEL TO HUB ===".center(80))
    print("=" * 80 + "\n")
    model.push_to_hub_merged(repo_name, tokenizer = tokenizer, token = hf_token)
    success["upload"] = True
    print("✅ Model uploaded successfully!")
except Exception as e:
    print(f"❌ Failed to upload model: {e}")
    raise Exception("Model upload failed.")

# Stage 2: Verify safetensors.index.json exists
try:
    print("\n" + "=" * 80)
    print("=== VERIFYING REPO CONTENTS ===".center(80))
    print("=" * 80 + "\n")
    fs = HfFileSystem(token = hf_token)
    file_list = fs.ls(repo_name, detail = True)
    safetensors_found = any(
        file["name"].endswith("model.safetensors.index.json") for file in file_list
    )
    if safetensors_found:
        success["safetensors_check"] = True
        print("✅ model.safetensors.index.json found in repo!")
    else:
        raise Exception("model.safetensors.index.json not found in repo.")
except Exception as e:
    print(f"❌ Verification failed: {e}")
    raise Exception("Repo verification failed.")

# Stage 3: Test downloading the model (even if cached)
safe_remove_directory("./RTannous")

try:
    print("\n" + "=" * 80)
    print("=== TESTING MODEL DOWNLOAD ===".center(80))
    print("=" * 80 + "\n")
    # Force download even if cached
    model, tokenizer = FastLanguageModel.from_pretrained(
        f"{hf_username}/merged_llama_text_model"
    )
    success["download"] = True
    print("✅ Model downloaded successfully!")
except Exception as e:
    print(f"❌ Download failed: {e}")
    raise Exception("Model download failed.")

# Final report
print("\n" + "=" * 80)
print("=== VALIDATION REPORT ===".center(80))
print("=" * 80 + "\n")
for stage, passed in success.items():
    status = "✓" if passed else "✗"
    print(f"{status} {stage.replace('_', ' ').title()}")
print("\n" + "=" * 80)

if all(success.values()):
    print("\n🎉 All stages completed successfully!")
else:
    raise Exception("Validation failed for one or more stages.")

# final cleanup
safe_remove_directory("./outputs")
safe_remove_directory("./unsloth_compiled_cache")


================================================
FILE: tests/saving/language_models/test_save_merged_grpo_model.py
================================================
# -*- coding: utf-8 -*-
"""test_Llama3_1_(3B)_GRPO_LoRA (1).ipynb

### Unsloth

"""

from unsloth import FastLanguageModel
import torch
import sys
from pathlib import Path
import multiprocessing as mp
import gc
from multiprocessing import Queue

REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))

from tests.utils.cleanup_utils import safe_remove_directory
from tests.utils.aime_eval import evaluate_model_aime, compare_aime_results


max_seq_length = 2048  # Can increase for longer reasoning traces
lora_rank = 64  # Larger rank = smarter, but slower


def evaluate_merged_model(result_queue, load_in_4bit = False, load_in_8bit = False):
    from unsloth import FastLanguageModel
    from tests.utils.aime_eval import evaluate_model_aime

    max_seq_length = 2048  # Can increase for longer reasoning traces
    lora_rank = 64  # Larger rank = smarter, but slower

    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "./final_merged_model",
        max_seq_length = max_seq_length,
        load_in_4bit = True,  # False for LoRA 16bit
        fast_inference = True,  # Enable vLLM fast inference
        max_lora_rank = lora_rank,
        gpu_memory_utilization = 0.8,  # Reduce if out of memory
    )

    print(f"\n{'='*60}")
    if load_in_4bit:
        print("🔍 EVALUATION Merged model: 4 bits load")
        model_type = "merged_model_4bits"
    elif load_in_8bit:
        print("🔍 EVALUATION Merged model: 8 bits load")
        model_type = "merged_model_8bits"
    else:
        print("🔍 EVALUATION Merged model: 16 bits load")
        model_type = "merged_model_16bits"
    print(f"{'='*60}")

    evaluate_model_aime(
        model = model,
        tokenizer = tokenizer,
        model_type = model_type,
        temperature = 0.3,
        n_sampling = 8,
        max_tokens = 32768,
        top_p = 0.95,
        seed = 0,
    )

    result_queue.put(results)

    del model
    del tokenizer
    torch.cuda.empty_cache()
    gc.collect()


# Main execution code should be wrapped in this guard
def training_run(result_queue):
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "meta-llama/Llama-3.2-3B-Instruct",
        max_seq_length = max_seq_length,
        load_in_4bit = False,  # False for LoRA 16bit
        fast_inference = True,  # Enable vLLM fast inference
        max_lora_rank = lora_rank,
        gpu_memory_utilization = 0.8,  # Reduce if out of memory
    )

    """### Helper Functions
    <a name="Data"></a>

#### Helper functions - Data Prep
    """

    import re
    import json

    reasoning_start = "<reasoning>"
    reasoning_end = "</reasoning>"
    solution_start = "<answer>"
    solution_end = "</answer>"

    def extract_hash_answer(text):
        """Extract answer from GSM8K format"""
        if "####" not in text:
            return None
        return text.split("####")[1].strip()

    def prepare_gsm8k_dataset(dataset):
        """Format GSM8K dataset for training"""
        reasoning_start = "<reasoning>"
        reasoning_end = "</reasoning>"
        solution_start = "<answer>"
        solution_end = "</answer>"

        system_prompt = (
            f"You are given a problem. Think about the problem and reason step by step. "
            f"Place your thinking process between {reasoning_start} and {reasoning_end}. "
            f"Then, provide your final numerical solution between {solution_start}{solution_end}"
        )

        def format_gsm8k(example):
            return {
                "prompt": [
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": example["question"]},
                ],
                "answer": extract_hash_answer(example["answer"]),
            }

        return dataset.map(format_gsm8k)

    def prepare_limo_dataset(dataset):
        """Format LIMO dataset for SFT training"""
        if dataset is None:
            return None

        system_prompt = """You are a helpful reasoning assistant. When given a problem, think through it step by step and provide your answer in the following format:

    <reasoning>
    [Your detailed step-by-step reasoning and solution process]
    </reasoning>
    <answer>
    [Your final numerical answer]
    </answer>"""

        def format_limo(example):
            # Create the assistant response
            assistant_response = f"<reasoning>\n{example['solution']}\n</reasoning>\n<answer>\n{example['answer']}\n</answer>"

            # Return a DICTIONARY with the conversation in a field
            return {
                "prompt": [  # ← This is the key change - wrap in a dict
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": example["question"]},
                    {"role": "assistant", "content": assistant_response},
                ]
            }

        return dataset.map(format_limo)

    print("\n✅ Dataset preparation functions defined!")

    """#### Helper functions - Evaluation"""

    def get_max_prompt_length(dataset, tokenizer):
        """Calculate maximum and average prompt length in dataset"""
        print("Analyzing prompt lengths...")

        lengths = dataset.map(
            lambda x: {
                "tokens": tokenizer.apply_chat_template(
                    x["prompt"], add_generation_prompt = True, tokenize = True
                )
            },
            batched = True,
        ).map(lambda x: {"length": len(x["tokens"])})["length"]

        max_length = max(lengths)
        avg_length = sum(lengths) / len(lengths)
        min_length = min(lengths)

        print(
            f"Prompt lengths - Min: {min_length}, Max: {max_length}, Avg: {avg_length:.1f}"
        )
        return max_length, avg_length

    def extract_unsloth_answer(text, start_tag = "<SOLUTION>", end_tag = "</SOLUTION>"):
        """Extract answer from Unsloth SOLUTION tags"""
        pattern = re.escape(start_tag) + r"(.*?)" + re.escape(end_tag)
        matches = re.findall(pattern, text, re.DOTALL)

        if matches:
            answer = matches[-1]  # Get the last match
            answer = re.sub(r"[%$,]", "", answer).strip()
            return answer
        return ""

    def find_number(search_string):
        """Find the last number in a string"""
        numbers = re.compile(
            r"-?[\d,]*\.?\d+",
            re.MULTILINE | re.DOTALL | re.IGNORECASE,
        ).findall(search_string)

        if numbers:
            return numbers[-1].replace(",", "").strip()
        return ""

    def remove_symbols(x: str) -> str:
        """Remove commas, percent and dollar symbols"""
        if not x:
            return ""
        return x.replace(",", "").replace("%", "").replace("$", "").strip()

    def get_num_tokens(text, tokenizer_instance):
        """Count tokens in text"""
        if not text:
            return 0
        encoding = tokenizer_instance(text, return_tensors = "pt")
        return len(encoding["input_ids"][0])

    def check_format_compliance(text, format_type = "unsloth"):
        """Check if response follows expected format"""
        if format_type == "unsloth":
            reasoning_start = "<start_reasoning>"
            reasoning_end = "<end_reasoning>"
            solution_start = "<SOLUTION>"
            solution_end = "</SOLUTION>"

            pattern = (
                rf"^[\s]*{re.escape(reasoning_start)}.+?{re.escape(reasoning_end)}.*?"
                rf"{re.escape(solution_start)}.+?{re.escape(solution_end)}[\s]*$"
            )
        else:
            return False

        return bool(re.match(pattern, text.strip(), re.DOTALL))

    def normalize_answer(answer):
        """Normalize answer for comparison"""
        if not answer:
            return ""

        normalized = remove_symbols(str(answer))

        try:
            float_val = float(normalized)
            if float_val.is_integer():
                return str(int(float_val))
            else:
                return str(float_val)
        except (ValueError, TypeError):
            return normalized

    def evaluate_answer_correctness(extracted_answer, ground_truth):
        """Evaluate answer correctness with multiple criteria"""
        if not extracted_answer or not ground_truth:
            return False, False, 0.0

        norm_extracted = normalize_answer(extracted_answer)
        norm_ground_truth = normalize_answer(ground_truth)

        if norm_extracted == norm_ground_truth:
            return True, True, 1.0

        try:
            extracted_num = float(norm_extracted)
            ground_truth_num = float(norm_ground_truth)

            if ground_truth_num != 0:
                relative_error = abs(extracted_num - ground_truth_num) / abs(
                    ground_truth_num
                )

                if relative_error < 0.01:
                    return True, True, 0.9
                elif relative_error < 0.05:
                    return False, True, 0.7
                elif relative_error < 0.10:
                    return False, True, 0.5
            else:
                if extracted_num == 0:
                    return True, True, 1.0
                elif abs(extracted_num) < 0.01:
                    return False, True, 0.7

        except (ValueError, TypeError):
            if norm_extracted.lower() == norm_ground_truth.lower():
                return True, True, 1.0

        return False, False, 0.0

    """#### Reward Functions for GRPO"""

    def match_format_exactly(completions, **kwargs):
        """Reward function for exact format matching"""
        reasoning_start = "<reasoning>"
        reasoning_end = "</reasoning>"
        solution_start = "<answer>"
        solution_end = "</answer>"

        pattern = (
            rf"^[\s]*{re.escape(reasoning_start)}.+?{re.escape(reasoning_end)}.*?"
            rf"{re.escape(solution_start)}.+?{re.escape(solution_end)}[\s]*$"
        )

        responses = [completion[0]["content"] for completion in completions]
        rewards = [
            3.0 if re.match(pattern, response, re.DOTALL) else 0.0
            for response in responses
        ]
        return rewards

    def match_format_approximately(completions, **kwargs):
        """Reward function for approximate format matching"""
        reasoning_start = "<reasoning>"
        reasoning_end = "</reasoning>"
        solution_start = "<answerr>"
        solution_end = "</answer>"

        scores = []
        for completion in completions:
            score = 0
            response = completion[0]["content"]
            score += 0.5 if response.count(reasoning_start) == 1 else -1.0
            score += 0.5 if response.count(reasoning_end) == 1 else -1.0
            score += 0.5 if response.count(solution_start) == 1 else -1.0
            score += 0.5 if response.count(solution_end) == 1 else -1.0
            scores.append(score)
        return scores

    def check_answer_correctness(prompts, completions, answer, **kwargs):
        """Reward function for answer correctness"""

        def extract_solution_answer(text):
            pattern = r"<answer>(.*?)</answer>"
            match = re.search(pattern, text, re.DOTALL)
            if match:
                return re.sub(r"[%$,]", "", match.group(1)).strip()
            return ""

        responses = [completion[0]["content"] for completion in completions]
        extracted_responses = [extract_solution_answer(r) for r in responses]

        scores = []
        for guess, true_answer in zip(extracted_responses, answer):
            score = 0
            if not guess:
                scores.append(0)
                continue

            if guess == true_answer:
                score += 3.0
            elif guess.strip() == true_answer.strip():
                score += 1.5
            else:
                try:
                    ratio = float(guess) / float(true_answer)
                    if 0.9 <= ratio <= 1.1:
                        score += 1.0
                    elif 0.8 <= ratio <= 1.2:
                        score += 0.5
                    else:
                        score -= 1.5
                except:
                    score -= 1.5
            scores.append(score)
        return scores

    print("✅ Reward functions defined!")

    """#### Main Evaluation Function"""

    import gc

    """#### Comparison and Memory Management"""

    def compare_model_results(all_results):
        """Generate comprehensive comparison of multiple model results"""
        print(f"\n{'='*80}")
        print("COMPREHENSIVE MODEL COMPARISON")
        print(f"{'='*80}")

        # Main table
        print(
            f"{'Model':<15} {'Format %':<10} {'Exact %':<10} {'Plausible %':<12} {'Confidence':<12}"
        )
        print("-" * 80)

        for result in all_results:
            print(
                f"{result['model_type']:<15} "
                f"{result['correct_format_pct']:<10.1f} "
                f"{result['exact_match_pct']:<10.1f} "
                f"{result['plausible_match_pct']:<12.1f} "
                f"{result['avg_confidence']:<12.3f}"
            )

        # Improvement analysis
        if len(all_results) > 1:
            print(f"\n{'='*50}")
            print("IMPROVEMENT ANALYSIS")
            print(f"{'='*50}")

            base_result = all_results[0]
            for result in all_results[1:]:
                print(f"\n{result['model_type']} vs {base_result['model_type']}:")
                format_improvement = (
                    result["correct_format_pct"] - base_result["correct_format_pct"]
                )
                exact_improvement = (
                    result["exact_match_pct"] - base_result["exact_match_pct"]
                )
                plausible_improvement = (
                    result["plausible_match_pct"] - base_result["plausible_match_pct"]
                )

                print(f"  Format compliance: {format_improvement:+.1f}%")
                print(f"  Exact matches:     {exact_improvement:+.1f}%")
                print(f"  Plausible matches: {plausible_improvement:+.1f}%")

        # Save comparison
        comparison_data = {
            "summary": all_results,
            "best_model": max(all_results, key = lambda x: x["exact_match_pct"]),
        }

        with open("model_comparison_comprehensive.json", "w") as f:
            json.dump(comparison_data, f, indent = 4)

        print(
            f"\nBest performing model: {comparison_data['best_model']['model_type']} "
            f"({comparison_data['best_model']['exact_match_pct']:.1f}% exact matches)"
        )

    def cleanup_memory():
        """Comprehensive memory cleanup"""
        print("🧹 Cleaning up GPU memory...")
        for _ in range(10):
            torch.cuda.empty_cache()
            gc.collect()

        if torch.cuda.is_available():
            allocated = torch.cuda.memory_allocated() / 1024**3
            reserved = torch.cuda.memory_reserved() / 1024**3
            print(
                f"GPU memory - Allocated: {allocated:.2f} GB, Reserved: {reserved:.2f} GB"
            )

    """#### Data Loading and Preparation"""

    from datasets import load_dataset

    # Load GSM8K
    gsm8k_dataset = load_dataset("openai/gsm8k", "main", split = "train")

    # Load LIMO (adjust this based on your access method)
    limo_train = load_dataset("GAIR/LIMO", split = "train")

    # Prepare datasets
    gsm8k_train = prepare_gsm8k_dataset(gsm8k_dataset)
    limo_train = prepare_limo_dataset(limo_train)

    print(f"  GSM8K train: {len(gsm8k_train)}")
    print(f"  LIMO train:  {len(limo_train) if limo_train else 0}")

    # Store results
    all_results = []

    # Single temperature evaluation on combined dataset
    results = evaluate_model_aime(
        model = model,
        tokenizer = tokenizer,
        model_type = "base",
        temperature = 0.3,
        n_sampling = 8,
        max_tokens = 32768,
        top_p = 0.95,
        seed = 0,
    )

    from unsloth.chat_templates import get_chat_template

    tokenizer = get_chat_template(
        tokenizer,
        chat_template = "llama-3.1",
    )

    def formatting_prompts_func(examples):
        convos = examples["prompt"]
        texts = [
            tokenizer.apply_chat_template(
                convo, tokenize = False, add_generation_prompt = False
            )
            for convo in convos
        ]
        return {
            "text": texts,
        }

    limo_train = limo_train.map(
        formatting_prompts_func,
        batched = True,
    )

    from trl import SFTTrainer
    from transformers import DataCollatorForSeq2Seq, TrainingArguments
    from unsloth import is_bfloat16_supported

    print(f"\n{'*'*60}")
    print("🎯 STAGE 1: Qlora Fine-Tuning on LIMO")
    print(f"{'*'*60}")

    model = FastLanguageModel.get_peft_model(
        model,
        r = lora_rank,  # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
        target_modules = [
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
        ],  # Remove QKVO if out of memory
        lora_alpha = lora_rank,
        use_gradient_checkpointing = "unsloth",  # Enable long context finetuning
        random_state = 3407,
    )

    if limo_train is not None:
        trainer = SFTTrainer(
            model = model,
            tokenizer = tokenizer,
            train_dataset = limo_train,
            dataset_text_field = "text",
            max_seq_length = max_seq_length,
            data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
            dataset_num_proc = 2,
            packing = False,  # Can make training 5x faster for short sequences.
            args = TrainingArguments(
                per_device_train_batch_size = 2,
                gradient_accumulation_steps = 4,
                warmup_steps = 5,
                num_train_epochs = 1,  # Set this for 1 full training run.
                # max_steps = 60,
                learning_rate = 2e-4,
                fp16 = not is_bfloat16_supported(),
                bf16 = is_bfloat16_supported(),
                logging_steps = 1,
                optim = "adamw_8bit",
                weight_decay = 0.01,
                lr_scheduler_type = "linear",
                seed = 3407,
                output_dir = "outputs",
                report_to = "none",  # Use this for WandB etc
            ),
        )

        from unsloth.chat_templates import train_on_responses_only

        trainer = train_on_responses_only(
            trainer,
            instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
            response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
        )

        # Train
        print(f"🚂 Starting SFT training on {len(limo_train)} examples...")
        trainer.train()

        # Save checkpoint
        model.save_pretrained("qlora_checkpoint")
        tokenizer.save_pretrained("qlora_checkpoint")
        print("💾 Qlora checkpoint saved!")

        # Cleanup
        del trainer
        cleanup_memory()

        print("✅ Qlora training completed!")
    else:
        print("⚠️ Skipping Qlora training - no LIMO dataset available")

    # Cleanup
    cleanup_memory()

    global PRINTED_TIMES
    PRINTED_TIMES = 0
    global PRINT_EVERY_STEPS
    PRINT_EVERY_STEPS = 5

    match_numbers = re.compile(
        solution_start + r".*?([\d\.\,]{1,})", flags = re.MULTILINE | re.DOTALL
    )

    def check_numbers(prompts, completions, answer, **kwargs):
        question = prompts[0][-1]["content"]
        responses = [completion[0]["content"] for completion in completions]

        extracted_responses = [
            guess.group(1) if (guess := match_numbers.search(r)) is not None else None
            for r in responses
        ]

        scores = []
        # Print only every few steps
        global PRINTED_TIMES
        global PRINT_EVERY_STEPS
        if PRINTED_TIMES % PRINT_EVERY_STEPS == 0:
            print(
                "*" * 20,
                f"Question:\n{question}",
                f"\nAnswer:\n{answer[0]}",
                f"\nResponse:\n{responses[0]}",
                f"\nExtracted:\n{extracted_responses[0]}",
            )
        PRINTED_TIMES += 1

        for guess, true_answer in zip(extracted_responses, answer):
            if guess is None:
                scores.append(0)
                continue
            # Convert to numbers
            try:
                true_answer = float(true_answer.strip())
                # Remove commas like in 123,456
                guess = float(guess.strip().replace(",", ""))
                scores.append(1.5 if guess == true_answer else -0.5)
            except:
                scores.append(0)
                continue
        return scores

    print(f"\n{'*'*60}")
    print("🎯 STAGE 2: GRPO Fine-Tuning on GSM8K")
    print(f"{'*'*60}")

    # Get max prompt length
    max_prompt_length, _ = get_max_prompt_length(gsm8k_train, tokenizer)
    max_prompt_length = min(max_prompt_length + 10, 512)  # Add buffer, cap at 512

    print(f"Using max_prompt_length: {max_prompt_length}")

    from trl import GRPOConfig, GRPOTrainer

    training_args = GRPOConfig(
        learning_rate = 5e-6,
        weight_decay = 0.1,
        warmup_ratio = 0.1,
        lr_scheduler_type = "cosine",
        optim = "adamw_torch_fused",
        logging_steps = 1,
        per_device_train_batch_size = 1,
        gradient_accumulation_steps = 4,  # Increase to 4 for smoother training
        num_generations = 8,  # Decrease if out of memory
        max_prompt_length = max_prompt_length,
        max_completion_length = max_seq_length - max_prompt_length,
        # num_train_epochs = 1, # Set to 1 for a full training run
        # max_steps = 250,
        max_steps = 1000,
        save_steps = 250,
        max_grad_norm = 0.1,
        report_to = "none",  # Can use Weights & Biases
        output_dir = "outputs",
    )

    trainer = GRPOTrainer(
        model = model,
        processing_class = tokenizer,
        reward_funcs = [
            match_format_exactly,
            match_format_approximately,
            check_answer_correctness,
            check_numbers,
        ],
        args = training_args,
        train_dataset = gsm8k_train,
    )

    # Train
    print(f"🚂 Starting GRPO training on {len(gsm8k_train)} examples...")
    trainer.train()

    # Save checkpoint
    model.save_pretrained("grpo_checkpoint")
    tokenizer.save_pretrained("grpo_checkpoint")
    print("💾 GRPO checkpoint saved!")

    # Cleanup
    del trainer
    del training_args
    cleanup_memory()

    print("✅ GRPO training completed!")

    print(f"\n{'='*60}")
    print("🔍 EVALUATION 3: Final GRPO Model")
    print(f"{'='*60}")

    grpo_results = evaluate_model_aime(
        model = model,
        tokenizer = tokenizer,
        model_type = "grpo",
        temperature = 0.3,
        n_sampling = 8,
        max_tokens = 32768,
        top_p = 0.95,
        seed = 0,
    )

    all_results.append(grpo_results)
    print("✅ Final model evaluation complete!")

    print(f"\n{'='*60}")
    print("💾 SAVING FINAL MODEL")
    print(f"{'='*60}")

    # Save as merged model
    try:
        model.save_pretrained_merged(
            "final_merged_model", tokenizer, save_method = "merged_16bit"
        )
        print("✅ Merged model saved to: final_merged_model/")
    except Exception as e:
        print(f"⚠️ Could not save merged model: {e}")
        print("Final model saved as LoRA adapter only")

    print("💾 Model saving complete!")

    safe_remove_directory("./unsloth_compiled_cache")

    result_queue.put(results)

    # Clean up
    del model
    del tokenizer
    torch.cuda.empty_cache()
    gc.collect()

    # # Merged model load 16 bits model AIME eval
    # result_queue = mp.Queue()
    # p = mp.Process(target=evaluate_merged_model, args=(result_queue, False, False))
    # p.start()
    # p.join()
    #
    # merged_16bits = result_queue.get()
    # all_results.append(merged_16bits)
    #
    # # Clean up
    # del merged_model
    # del merged_tokenizer
    # del dataset_ppl
    # torch.cuda.empty_cache()
    # gc.collect()
    #
    # safe_remove_directory("./unsloth_compiled_cache")
    #
    # # Merged model load 8 bits model AIME eval
    #
    # result_queue = mp.Queue()
    # p = mp.Process(target=evaluate_merged_model, args=(result_queue, False, True))
    # p.start()
    # p.join()
    #
    # merged_16bits = result_queue.get()
    # all_results.append(merged_16bits)

    # Merged model load 4 bits AIME eval
    # result_queue = mp.Queue()
    # p = mp.Process(target=evaluate_merged_model, args=(result_queue, True, False))
    # p.start()
    # p.join()
    #
    # merged_16bits = result_queue.get()
    # all_results.append(merged_16bits)


if __name__ == "__main__":
    mp.set_start_method("spawn", force = True)
    result_queue = mp.Queue()
    all_results = []

    # run main finetuning and grpo loop
    p = mp.Process(target = training_run, args = (result_queue,))
    p.start()
    p.join()

    results = result_queue.get()
    all_results = results

    # evaluate merged model loaded 16bits
    p = mp.Process(target = evaluate_merged_model, args = (result_queue, False, False))
    p.start()
    p.join()

    merged_load_16bits = result_queue.get()
    all_results.append(merged_load_16bits)
    safe_remove_directory("./unsloth_compiled_cache")

    # Merged model load 8 bits model AIME eval
    p = mp.Process(target = evaluate_merged_model, args = (result_queue, False, True))
    p.start()
    p.join()

    merged_load_8bits = result_queue.get()
    all_results.append(merged_load_8bits)

    safe_remove_directory("./unsloth_compiled_cache")

    # Merged model load 4 bits model AIME eval
    p = mp.Process(target = evaluate_merged_model, args = (result_queue, True, False))
    p.start()
    p.join()

    merged_load_4bits = result_queue.get()
    all_results.append(merged_load_4bits)

    safe_remove_directory("./unsloth_compiled_cache")

    # AIME-specific comparison function

    print(f"\n{'='*80}")
    print("🏆 FINAL TRAINING PIPELINE RESULTS")
    print(f"{'='*80}")

    # Use the AIME-specific comparison
    compare_aime_results(all_results)


================================================
FILE: tests/saving/non_peft/test_mistral_non_peft.py
================================================
from unsloth import FastLanguageModel
from transformers import AutoModelForCausalLM
from peft import PeftModel
from pathlib import Path
import sys
import warnings

REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))

from tests.utils.cleanup_utils import safe_remove_directory


print(f"\n{'='*80}")
print("🔍 PHASE 1: Loading Base Model")
print(f"{'='*80}")

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-v0.3",
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
    load_in_8bit = False,
    full_finetuning = False,
)


print("✅ Base model loaded successfully!")

### Attemtping save merge


print(f"\n{'='*80}")
print("🔍 PHASE 2: Attempting save_pretrained_merged (Should Warn)")
print(f"{'='*80}")

with warnings.catch_warnings(record = True) as w:
    warnings.simplefilter("always")
    model.save_pretrained_merged("test_output", tokenizer)

    # Verify warning
    assert len(w) >= 1, "Expected warning but none raised"
    warning_msg = str(w[0].message)
    expected_msg = "Model is not a PeftModel (no Lora adapters detected). Skipping Merge. Please use save_pretrained() or push_to_hub() instead!"
    assert expected_msg in warning_msg, f"Unexpected warning: {warning_msg}"
    assert expected_msg in warning_msg, f"Unexpected warning: {warning_msg}"

print("✅ Correct warning detected for non-PeftModel merge attempt!")


print(f"\n{'='*80}")
print("🔍 PHASE 3: Using save_pretrained (Should Succeed)")
print(f"{'='*80}")


try:
    with warnings.catch_warnings():
        warnings.simplefilter("error")  # Treat warnings as errors here
        model.save_pretrained("test_output")
        print("✅ Standard save_pretrained completed successfully!")
except Exception as e:
    assert False, f"Phase 3 failed: {e}"

safe_remove_directory("./test_output")
safe_remove_directory("./unsloth_compiled_cache")


================================================
FILE: tests/saving/non_peft/test_whisper_non_peft.py
================================================
from unsloth import FastLanguageModel, FastModel
from transformers import AutoModelForCausalLM, WhisperForConditionalGeneration
from peft import PeftModel
from pathlib import Path
import sys
import warnings

REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))

from tests.utils.cleanup_utils import safe_remove_directory


print(f"\n{'='*80}")
print("🔍 PHASE 1: Loading Base Model")
print(f"{'='*80}")

model, tokenizer = FastModel.from_pretrained(
    model_name = "unsloth/whisper-large-v3",
    dtype = None,  # Leave as None for auto detection
    load_in_4bit = False,  # Set to True to do 4bit quantization which reduces memory
    auto_model = WhisperForConditionalGeneration,
    whisper_language = "English",
    whisper_task = "transcribe",
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

print("✅ Base model loaded successfully!")

### Attemtping save merge


print(f"\n{'='*80}")
print("🔍 PHASE 2: Attempting save_pretrained_merged (Should Warn)")
print(f"{'='*80}")

with warnings.catch_warnings(record = True) as w:
    warnings.simplefilter("always")
    model.save_pretrained_merged("test_output", tokenizer)

    # Verify warning
    assert len(w) >= 1, "Expected warning but none raised"
    warning_msg = str(w[0].message)
    expected_msg = "Model is not a PeftModel (no Lora adapters detected). Skipping Merge. Please use save_pretrained() or push_to_hub() instead!"
    assert expected_msg in warning_msg, f"Unexpected warning: {warning_msg}"
    assert expected_msg in warning_msg, f"Unexpected warning: {warning_msg}"

print("✅ Correct warning detected for non-PeftModel merge attempt!")


print(f"\n{'='*80}")
print("🔍 PHASE 3: Using save_pretrained (Should Succeed)")
print(f"{'='*80}")


try:
    with warnings.catch_warnings():
        warnings.simplefilter("error")  # Treat warnings as errors here
        model.save_pretrained("test_output")
        print("✅ Standard save_pretrained completed successfully!")
except Exception as e:
    assert False, f"Phase 3 failed: {e}"

safe_remove_directory("./test_output")
safe_remove_directory("./unsloth_compiled_cache")


================================================
FILE: tests/saving/test_unsloth_save.py
================================================
import json
import os
import shutil
import tempfile
import pytest
import importlib

from unsloth import FastLanguageModel, FastModel

model_to_test = [
    # Text Models
    "unsloth/tinyllama",
    "unsloth/tinyllama-bnb-4bit",
    "unsloth/Qwen2.5-0.5B-Instruct",
    "unsloth/Qwen2.5-0.5B-Instruct-bnb-4bit",
    "unsloth/Phi-4-mini-instruct",
    "unsloth/Phi-4-mini-instruct-bnb-4bit",
    "unsloth/Qwen2.5-0.5B",
    # Vision Models
    "unsloth/gemma-3-4b-it",
    "unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit",
    "unsloth/Qwen2.5-VL-3B-Instruct-bnb-4bit",
]

torchao_models = [
    "unsloth/tinyllama",
    "unsloth/Qwen2.5-0.5B-Instruct",
    # "unsloth/Phi-4-mini-instruct",
    # "unsloth/Qwen2.5-0.5B",
    # Skip the -bnb-4bit variants since they're already quantized
]


# Variables
save_file_sizes = {}
save_file_sizes["merged_16bit"] = {}
save_file_sizes["merged_4bit"] = {}
save_file_sizes["torchao"] = {}

tokenizer_files = [
    "tokenizer_config.json",
    "special_tokens_map.json",
]


@pytest.fixture(scope = "session", params = model_to_test)
def loaded_model_tokenizer(request):
    model_name = request.param
    print("Loading model and tokenizer...")

    model, tokenizer = FastModel.from_pretrained(
        model_name,  # use small model
        max_seq_length = 128,
        dtype = None,
        load_in_4bit = True,
    )

    # Apply LoRA
    model = FastModel.get_peft_model(
        model,
        r = 16,
        target_modules = ["q_proj", "k_proj", "v_proj", "o_proj"],
        lora_alpha = 16,
        use_gradient_checkpointing = "unsloth",
    )

    return model, tokenizer


@pytest.fixture(scope = "session", params = torchao_models)
def fp16_model_tokenizer(request):
    """Load model in FP16 for TorchAO quantization"""
    model_name = request.param
    print(f"Loading model in FP16 for TorchAO: {model_name}")

    model, tokenizer = FastModel.from_pretrained(
        model_name,
        max_seq_length = 128,
        dtype = None,
        load_in_4bit = False,  # No BnB quantization
    )

    # Apply LoRA
    model = FastModel.get_peft_model(
        model,
        r = 16,
        target_modules = ["q_proj", "k_proj", "v_proj", "o_proj"],
        lora_alpha = 16,
        use_gradient_checkpointing = "unsloth",
    )

    return model, tokenizer


@pytest.fixture(scope = "session")
def model(loaded_model_tokenizer):
    return loaded_model_tokenizer[0]


@pytest.fixture(scope = "session")
def tokenizer(loaded_model_tokenizer):
    return loaded_model_tokenizer[1]


@pytest.fixture
def temp_save_dir():
    dir = tempfile.mkdtemp()
    print(f"Temporary directory created at: {dir}")
    yield dir
    print(f"Temporary directory deleted: {dir}")
    shutil.rmtree(dir)


def delete_quantization_config(model):
    # Since merged, edit quantization_config
    old_config = model.config
    new_config = model.config.to_dict()
    if "quantization_config" in new_config:
        del new_config["quantization_config"]
    original_model = model
    new_config = type(model.config).from_dict(new_config)
    while hasattr(original_model, "model"):
        original_model = original_model.model
        original_model.config = new_config
    model.config = new_config


def test_save_merged_16bit(model, tokenizer, temp_save_dir: str):
    save_path = os.path.join(
        temp_save_dir,
        "unsloth_merged_16bit",
        model.config._name_or_path.replace("/", "_"),
    )

    model.save_pretrained_merged(
        save_path, tokenizer = tokenizer, save_method = "merged_16bit"
    )

    # Check model files
    assert os.path.isdir(save_path), f"Directory {save_path} does not exist."
    assert os.path.isfile(
        os.path.join(save_path, "config.json")
    ), "config.json not found."

    weight_files = [
        f
        for f in os.listdir(save_path)
        if f.endswith(".bin") or f.endswith(".safetensors")
    ]
    assert len(weight_files) > 0, "No weight files found in the save directory."

    # Check tokenizer files
    for file in tokenizer_files:
        assert os.path.isfile(
            os.path.join(save_path, file)
        ), f"{file} not found in the save directory."

    # Check config to see if it is 16bit by checking for quantization config
    config_path = os.path.join(save_path, "config.json")
    with open(config_path, "r") as f:
        config = json.load(f)

    assert (
        "quantization_config" not in config
    ), "Quantization config not found in the model config."

    # Store the size of the model files
    total_size = sum(os.path.getsize(os.path.join(save_path, f)) for f in weight_files)
    save_file_sizes["merged_16bit"][model.config._name_or_path] = total_size
    print(f"Total size of merged_16bit files: {total_size} bytes")

    # Test loading the model from the saved path
    loaded_model, loaded_tokenizer = FastLanguageModel.from_pretrained(
        save_path,
        max_seq_length = 128,
        dtype = None,
        load_in_4bit = True,
    )


def test_save_merged_4bit(model, tokenizer, temp_save_dir: str):
    save_path = os.path.join(
        temp_save_dir,
        "unsloth_merged_4bit",
        model.config._name_or_path.replace("/", "_"),
    )

    model.save_pretrained_merged(
        save_path, tokenizer = tokenizer, save_method = "merged_4bit_forced"
    )

    # Check model files
    assert os.path.isdir(save_path), f"Directory {save_path} does not exist."
    assert os.path.isfile(
        os.path.join(save_path, "config.json")
    ), "config.json not found."

    weight_files = [
        f
        for f in os.listdir(save_path)
        if f.endswith(".bin") or f.endswith(".safetensors")
    ]
    assert len(weight_files) > 0, "No weight files found in the save directory."

    # Check tokenizer files
    for file in tokenizer_files:
        assert os.path.isfile(
            os.path.join(save_path, file)
        ), f"{file} not found in the save directory."

    # Store the size of the model files
    total_size = sum(os.path.getsize(os.path.join(save_path, f)) for f in weight_files)
    save_file_sizes["merged_4bit"][model.config._name_or_path] = total_size

    print(f"Total size of merged_4bit files: {total_size} bytes")

    assert (
        total_size < save_file_sizes["merged_16bit"][model.config._name_or_path]
    ), "Merged 4bit files are larger than merged 16bit files."

    # Check config to see if it is 4bit
    config_path = os.path.join(save_path, "config.json")
    with open(config_path, "r") as f:
        config = json.load(f)

    assert (
        "quantization_config" in config
    ), "Quantization config not found in the model config."

    # Test loading the model from the saved path
    loaded_model, loaded_tokenizer = FastModel.from_pretrained(
        save_path,
        max_seq_length = 128,
        dtype = None,
        load_in_4bit = True,
    )


@pytest.mark.skipif(
    importlib.util.find_spec("torchao") is None,
    reason = "require torchao to be installed",
)
def test_save_torchao(fp16_model_tokenizer, temp_save_dir: str):
    model, tokenizer = fp16_model_tokenizer
    save_path = os.path.join(
        temp_save_dir, "unsloth_torchao", model.config._name_or_path.replace("/", "_")
    )

    from torchao.quantization import Int8DynamicActivationInt8WeightConfig

    torchao_config = Int8DynamicActivationInt8WeightConfig()
    model.save_pretrained_torchao(
        save_path,
        tokenizer = tokenizer,
        torchao_config = torchao_config,
        push_to_hub = False,
    )

    weight_files_16bit = [
        f
        for f in os.listdir(save_path)
        if f.endswith(".bin") or f.endswith(".safetensors")
    ]
    total_16bit_size = sum(
        os.path.getsize(os.path.join(save_path, f)) for f in weight_files_16bit
    )
    save_file_sizes["merged_16bit"][model.config._name_or_path] = total_16bit_size

    torchao_save_path = save_path + "-torchao"

    # Check model files
    assert os.path.isdir(
        torchao_save_path
    ), f"Directory {torchao_save_path} does not exist."
    assert os.path.isfile(
        os.path.join(torchao_save_path, "config.json")
    ), "config.json not found."

    weight_files = [
        f
        for f in os.listdir(torchao_save_path)
        if f.endswith(".bin") or f.endswith(".safetensors")
    ]
    assert len(weight_files) > 0, "No weight files found in the save directory."

    # Check tokenizer files
    for file in tokenizer_files:
        assert os.path.isfile(
            os.path.join(torchao_save_path, file)
        ), f"{file} not found in the save directory."

    # Store the size of the model files
    total_size = sum(
        os.path.getsize(os.path.join(torchao_save_path, f)) for f in weight_files
    )
    save_file_sizes["torchao"][model.config._name_or_path] = total_size

    assert (
        total_size < save_file_sizes["merged_16bit"][model.config._name_or_path]
    ), "torchao files are larger than merged 16bit files."

    # Check config to see if it is quantized with torchao
    config_path = os.path.join(torchao_save_path, "config.json")
    with open(config_path, "r") as f:
        config = json.load(f)

    assert (
        "quantization_config" in config
    ), "Quantization config not found in the model config."

    # Test loading the model from the saved path
    # can't set `load_in_4bit` to True because the model is torchao quantized
    # can't quantize again with bitsandbytes
    import torch.serialization

    with torch.serialization.safe_globals([getattr]):
        loaded_model, loaded_tokenizer = FastModel.from_pretrained(
            torchao_save_path,
            max_seq_length = 128,
            dtype = None,
            load_in_4bit = False,
        )


@pytest.mark.skipif(
    importlib.util.find_spec("torchao") is None,
    reason = "require torchao to be installed",
)
def test_save_and_inference_torchao(fp16_model_tokenizer, temp_save_dir: str):
    model, tokenizer = fp16_model_tokenizer
    model_name = model.config._name_or_path

    print(f"Testing TorchAO save and inference for: {model_name}")

    save_path = os.path.join(
        temp_save_dir, "torchao_models", model_name.replace("/", "_")
    )

    from torchao.quantization import Int8DynamicActivationInt8WeightConfig

    torchao_config = Int8DynamicActivationInt8WeightConfig()

    # Save with TorchAO
    model.save_pretrained_torchao(
        save_path,
        tokenizer = tokenizer,
        torchao_config = torchao_config,
        push_to_hub = False,
    )

    torchao_save_path = save_path + "-torchao"

    # Verify files exist
    assert os.path.isdir(
        torchao_save_path
    ), f"TorchAO directory {torchao_save_path} does not exist."

    # Load with safe globals
    import torch.serialization

    with torch.serialization.safe_globals([getattr]):
        loaded_model, loaded_tokenizer = FastModel.from_pretrained(
            torchao_save_path,
            max_seq_length = 128,
            dtype = None,
            load_in_4bit = False,
        )

    FastModel.for_inference(loaded_model)  # Enable native 2x faster inference

    messages = [
        {
            "role": "user",
            "content": "Continue the fibonnaci sequence: 1, 1, 2, 3, 5, 8,",
        },
    ]
    inputs = loaded_tokenizer.apply_chat_template(
        messages,
        tokenize = True,
        add_generation_prompt = True,  # Must add for generation
        return_tensors = "pt",
    ).to("cuda")

    outputs = loaded_model.generate(  # ← Use loaded_model, not model
        input_ids = inputs,
        max_new_tokens = 64,
        use_cache = False,  # Avoid cache issues
        temperature = 1.5,
        min_p = 0.1,
        do_sample = True,
        pad_token_id = loaded_tokenizer.pad_token_id or loaded_tokenizer.eos_token_id,
    )

    # Decode with the LOADED tokenizer
    generated_text = loaded_tokenizer.decode(outputs[0], skip_special_tokens = True)
    input_text = loaded_tokenizer.decode(inputs[0], skip_special_tokens = True)
    response_part = generated_text[len(input_text) :].strip()

    print(f"Input: {input_text}")
    print(f"Full output: {generated_text}")
    print(f"Response only: {response_part}")


================================================
FILE: tests/saving/text_to_speech_models/test_csm.py
================================================
from unsloth import FastLanguageModel, FastModel
from transformers import CsmForConditionalGeneration
import torch

# ruff: noqa
import sys
from pathlib import Path
from peft import PeftModel
import warnings
import requests

REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))

from tests.utils.cleanup_utils import safe_remove_directory
from tests.utils.os_utils import require_package, require_python_package

require_package("ffmpeg", "ffmpeg")
require_python_package("soundfile")

import soundfile as sf

print(f"\n{'='*80}")
print("🔍 SECTION 1: Loading Model and LoRA Adapters")
print(f"{'='*80}")


model, tokenizer = FastModel.from_pretrained(
    model_name = "unsloth/csm-1b",
    max_seq_length = 2048,  # Choose any for long context!
    dtype = None,  # Leave as None for auto-detection
    auto_model = CsmForConditionalGeneration,
    load_in_4bit = False,  # Select True for 4bit - reduces memory usage
)


base_model_class = model.__class__.__name__


model = FastModel.get_peft_model(
    model,
    r = 32,  # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = [
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha = 32,
    lora_dropout = 0,  # Supports any, but = 0 is optimized
    bias = "none",  # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth",  # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None,  # And LoftQ
)

print("✅ Model and LoRA adapters loaded successfully!")


print(f"\n{'='*80}")
print("🔍 SECTION 2: Checking Model Class Type")
print(f"{'='*80}")

assert isinstance(model, PeftModel), "Model should be an instance of PeftModel"
print("✅ Model is an instance of PeftModel!")


print(f"\n{'='*80}")
print("🔍 SECTION 3: Checking Config Model Class Type")
print(f"{'='*80}")


def find_lora_base_model(model_to_inspect):
    current = model_to_inspect
    if hasattr(current, "base_model"):
        current = current.base_model
    if hasattr(current, "model"):
        current = current.model
    return current


config_model = find_lora_base_model(model) if isinstance(model, PeftModel) else model

assert (
    config_model.__class__.__name__ == base_model_class
), f"Expected config_model class to be {base_model_class}"
print("✅ config_model returns correct Base Model class:", str(base_model_class))


print(f"\n{'='*80}")
print("🔍 SECTION 4: Saving and Merging Model")
print(f"{'='*80}")

with warnings.catch_warnings():
    warnings.simplefilter("error")  # Treat warnings as errors
    try:
        model.save_pretrained_merged("csm", tokenizer)
        print("✅ Model saved and merged successfully without warnings!")
    except Exception as e:
        assert False, f"Model saving/merging failed with exception: {e}"

print(f"\n{'='*80}")
print("🔍 SECTION 5: Loading Model for Inference")
print(f"{'='*80}")


model, processor = FastModel.from_pretrained(
    model_name = "./csm",
    max_seq_length = 2048,  # Choose any for long context!
    dtype = None,  # Leave as None for auto-detection
    auto_model = CsmForConditionalGeneration,
    load_in_4bit = False,  # Select True for 4bit - reduces memory usage
)

from transformers import AutoProcessor

processor = AutoProcessor.from_pretrained("unsloth/csm-1b")

print("✅ Model loaded for inference successfully!")


print(f"\n{'='*80}")
print("🔍 SECTION 6: Running Inference")
print(f"{'='*80}")


from transformers import pipeline
import torch

output_audio_path = "csm_audio.wav"
try:
    text = (
        "We just finished fine tuning a text to speech model... and it's pretty good!"
    )
    speaker_id = 0
    inputs = processor(f"[{speaker_id}]{text}", add_special_tokens = True).to("cuda")
    audio_values = model.generate(
        **inputs,
        max_new_tokens = 125,  # 125 tokens is 10 seconds of audio, for longer speech increase this
        # play with these parameters to get the best results
        depth_decoder_temperature = 0.6,
        depth_decoder_top_k = 0,
        depth_decoder_top_p = 0.9,
        temperature = 0.8,
        top_k = 50,
        top_p = 1.0,
        #########################################################
        output_audio = True,
    )
    audio = audio_values[0].to(torch.float32).cpu().numpy()
    sf.write("example_without_context.wav", audio, 24000)
    print(f"✅ Audio generated and saved to {output_audio_path}!")
except Exception as e:
    assert False, f"Inference failed with exception: {e}"


## assert that transcribed_text contains The birch canoe slid on the smooth planks. Glued the sheet to the dark blue background. It's easy to tell the depth of a well. Four hours of steady work faced us.

print("✅ All sections passed successfully!")


safe_remove_directory("./unsloth_compiled_cache")
safe_remove_directory("./csm")


================================================
FILE: tests/saving/text_to_speech_models/test_lasa.py
================================================
from unsloth import FastLanguageModel, FastModel
from transformers import CsmForConditionalGeneration
import torch

# ruff: noqa
import sys
from pathlib import Path
from peft import PeftModel
import warnings
import requests


REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))


from tests.utils.cleanup_utils import safe_remove_directory
from tests.utils.os_utils import require_package, require_python_package

require_package("ffmpeg", "ffmpeg")
require_python_package("soundfile")
require_python_package("xcodec2")

import soundfile as sf
from xcodec2.modeling_xcodec2 import XCodec2Model

XCODEC2_MODEL_NAME = "HKUST-Audio/xcodec2"
SAMPLE_RATE = 16000
DEVICE = "cuda"

try:
    codec_model = XCodec2Model.from_pretrained(XCODEC2_MODEL_NAME)

except Exception as e:
    raise f"ERROR loading XCodec2 model: {e}."

codec_model.to("cpu")

print(f"\n{'='*80}")
print("🔍 SECTION 1: Loading Model and LoRA Adapters")
print(f"{'='*80}")

max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llasa-1B",
    max_seq_length = max_seq_length,
    dtype = None,  # Select None for auto detection
    load_in_4bit = False,  # Choose True for 4bit which reduces memory
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

base_model_class = model.__class__.__name__


model = FastLanguageModel.get_peft_model(
    model,
    r = 128,  # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "v_proj"],
    lora_alpha = 128,
    lora_dropout = 0,  # Supports any, but = 0 is optimized
    bias = "none",  # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth",  # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None,  # And LoftQ
)

print("✅ Model and LoRA adapters loaded successfully!")


print(f"\n{'='*80}")
print("🔍 SECTION 2: Checking Model Class Type")
print(f"{'='*80}")

assert isinstance(model, PeftModel), "Model should be an instance of PeftModel"
print("✅ Model is an instance of PeftModel!")


print(f"\n{'='*80}")
print("🔍 SECTION 3: Checking Config Model Class Type")
print(f"{'='*80}")


def find_lora_base_model(model_to_inspect):
    current = model_to_inspect
    if hasattr(current, "base_model"):
        current = current.base_model
    if hasattr(current, "model"):
        current = current.model
    return current


config_model = find_lora_base_model(model) if isinstance(model, PeftModel) else model

assert (
    config_model.__class__.__name__ == base_model_class
), f"Expected config_model class to be {base_model_class}"
print("✅ config_model returns correct Base Model class:", str(base_model_class))


print(f"\n{'='*80}")
print("🔍 SECTION 4: Saving and Merging Model")
print(f"{'='*80}")

with warnings.catch_warnings():
    warnings.simplefilter("error")  # Treat warnings as errors
    try:
        model.save_pretrained_merged("lasa", tokenizer)
        print("✅ Model saved and merged successfully without warnings!")
    except Exception as e:
        assert False, f"Model saving/merging failed with exception: {e}"

print(f"\n{'='*80}")
print("🔍 SECTION 5: Loading Model for Inference")
print(f"{'='*80}")


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "./lasa",
    max_seq_length = max_seq_length,
    dtype = None,  # Select None for auto detection
    load_in_4bit = False,  # Choose True for 4bit which reduces memory
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

# from transformers import AutoProcessor
# processor = AutoProcessor.from_pretrained("unsloth/csm-1b")

print("✅ Model loaded for inference successfully!")


print(f"\n{'='*80}")
print("🔍 SECTION 6: Running Inference")
print(f"{'='*80}")


from transformers import pipeline
import torch

output_audio_path = "lasa_audio.wav"
input_text = "Hey there my name is Elise, <giggles> and I'm a speech generation model that can sound like a person."

FastLanguageModel.for_inference(model)


def ids_to_speech_tokens(speech_ids):
    speech_tokens_str = []
    for speech_id in speech_ids:
        speech_tokens_str.append(f"<|s_{speech_id}|>")
    return speech_tokens_str


def extract_speech_ids(speech_tokens_str):
    speech_ids = []
    for token_str in speech_tokens_str:
        if token_str.startswith("<|s_") and token_str.endswith("|>"):
            num_str = token_str[4:-2]

            num = int(num_str)
            speech_ids.append(num)
        else:
            print(f"Unexpected token: {token_str}")
    return speech_ids


# TTS start!
with torch.inference_mode():
    with torch.amp.autocast("cuda", dtype = model.dtype):
        formatted_text = (
            f"<|TEXT_UNDERSTANDING_START|>{input_text}<|TEXT_UNDERSTANDING_END|>"
        )

        # Tokenize the text
        chat = [
            {"role": "user", "content": "Convert the text to speech:" + formatted_text},
            {"role": "assistant", "content": "<|SPEECH_GENERATION_START|>"},
        ]

        input_ids = tokenizer.apply_chat_template(
            chat, tokenize = True, return_tensors = "pt", continue_final_message = True
        )
        input_ids = input_ids.to("cuda")

        speech_end_id = tokenizer.convert_tokens_to_ids("<|SPEECH_GENERATION_END|>")

        # Generate the speech autoregressively
        outputs = model.generate(
            input_ids,
            max_length = 2048,  # We trained our model with a max length of 2048
            eos_token_id = speech_end_id,
            do_sample = True,
            top_p = 1.2,  #  Adjusts the diversity of generated content
            temperature = 1.2,  #  Controls randomness in output
        )
    # Extract the speech tokens
    generated_ids = outputs[0][input_ids.shape[1] : -1]

    speech_tokens = tokenizer.batch_decode(generated_ids, skip_special_tokens = True)

    # Convert  token <|s_23456|> to int 23456
    speech_tokens = extract_speech_ids(speech_tokens)

    speech_tokens = torch.tensor(speech_tokens).cpu().unsqueeze(0).unsqueeze(0)

    # Decode the speech tokens to speech waveform
    gen_wav = codec_model.decode_code(speech_tokens)
try:
    sf.write(output_audio_path, gen_wav[0, 0, :].cpu().numpy(), 16000)
except Exception as e:
    assert False, f"Inference failed with exception: {e}"


## assert that transcribed_text contains The birch canoe slid on the smooth planks. Glued the sheet to the dark blue background. It's easy to tell the depth of a well. Four hours of steady work faced us.

print("✅ All sections passed successfully!")


safe_remove_directory("./unsloth_compiled_cache")
safe_remove_directory("./lasa")


================================================
FILE: tests/saving/text_to_speech_models/test_orpheus.py
================================================
from unsloth import FastLanguageModel, FastModel
from transformers import CsmForConditionalGeneration
import torch

# ruff: noqa
import sys
from pathlib import Path
from peft import PeftModel
import warnings
import requests

REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))

from tests.utils.cleanup_utils import safe_remove_directory
from tests.utils.os_utils import require_package, require_python_package

require_package("ffmpeg", "ffmpeg")
require_python_package("soundfile")
require_python_package("snac")

import soundfile as sf
from snac import SNAC

snac_model = SNAC.from_pretrained("hubertsiuzdak/snac_24khz")
snac_model = snac_model.to("cuda")
print(f"\n{'='*80}")
print("🔍 SECTION 1: Loading Model and LoRA Adapters")
print(f"{'='*80}")


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/orpheus-3b-0.1-ft",
    max_seq_length = 2048,  # Choose any for long context!
    dtype = None,  # Select None for auto detection
    load_in_4bit = False,  # Select True for 4bit which reduces memory usage
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

base_model_class = model.__class__.__name__


model = FastLanguageModel.get_peft_model(
    model,
    r = 64,  # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = [
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha = 64,
    lora_dropout = 0,  # Supports any, but = 0 is optimized
    bias = "none",  # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth",  # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None,  # And LoftQ
)
print("✅ Model and LoRA adapters loaded successfully!")


print(f"\n{'='*80}")
print("🔍 SECTION 2: Checking Model Class Type")
print(f"{'='*80}")

assert isinstance(model, PeftModel), "Model should be an instance of PeftModel"
print("✅ Model is an instance of PeftModel!")


print(f"\n{'='*80}")
print("🔍 SECTION 3: Checking Config Model Class Type")
print(f"{'='*80}")


def find_lora_base_model(model_to_inspect):
    current = model_to_inspect
    if hasattr(current, "base_model"):
        current = current.base_model
    if hasattr(current, "model"):
        current = current.model
    return current


config_model = find_lora_base_model(model) if isinstance(model, PeftModel) else model

assert (
    config_model.__class__.__name__ == base_model_class
), f"Expected config_model class to be {base_model_class}"
print("✅ config_model returns correct Base Model class:", str(base_model_class))


print(f"\n{'='*80}")
print("🔍 SECTION 4: Saving and Merging Model")
print(f"{'='*80}")

with warnings.catch_warnings():
    warnings.simplefilter("error")  # Treat warnings as errors
    try:
        model.save_pretrained_merged("orpheus", tokenizer)
        print("✅ Model saved and merged successfully without warnings!")
    except Exception as e:
        assert False, f"Model saving/merging failed with exception: {e}"

print(f"\n{'='*80}")
print("🔍 SECTION 5: Loading Model for Inference")
print(f"{'='*80}")


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/orpheus-3b-0.1-ft",
    max_seq_length = 2048,  # Choose any for long context!
    dtype = None,  # Select None for auto detection
    load_in_4bit = False,  # Select True for 4bit which reduces memory usage
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

# from transformers import AutoProcessor
# processor = AutoProcessor.from_pretrained("unsloth/csm-1b")

print("✅ Model loaded for inference successfully!")


print(f"\n{'='*80}")
print("🔍 SECTION 6: Running Inference")
print(f"{'='*80}")


# @title Run Inference


FastLanguageModel.for_inference(model)  # Enable native 2x faster inference

# Moving snac_model cuda to cpu
snac_model.to("cpu")
prompts = [
    "Hey there my name is Elise, <giggles> and I'm a speech generation model that can sound like a person.",
]

chosen_voice = None  # None for single-speaker

prompts_ = [(f"{chosen_voice}: " + p) if chosen_voice else p for p in prompts]

all_input_ids = []

for prompt in prompts_:
    input_ids = tokenizer(prompt, return_tensors = "pt").input_ids
    all_input_ids.append(input_ids)

start_token = torch.tensor([[128259]], dtype = torch.int64)  # Start of human
end_tokens = torch.tensor(
    [[128009, 128260]], dtype = torch.int64
)  # End of text, End of human

all_modified_input_ids = []
for input_ids in all_input_ids:
    modified_input_ids = torch.cat(
        [start_token, input_ids, end_tokens], dim = 1
    )  # SOH SOT Text EOT EOH
    all_modified_input_ids.append(modified_input_ids)

all_padded_tensors = []
all_attention_masks = []
max_length = max(
    [modified_input_ids.shape[1] for modified_input_ids in all_modified_input_ids]
)
for modified_input_ids in all_modified_input_ids:
    padding = max_length - modified_input_ids.shape[1]
    padded_tensor = torch.cat(
        [torch.full((1, padding), 128263, dtype = torch.int64), modified_input_ids], dim = 1
    )
    attention_mask = torch.cat(
        [
            torch.zeros((1, padding), dtype = torch.int64),
            torch.ones((1, modified_input_ids.shape[1]), dtype = torch.int64),
        ],
        dim = 1,
    )
    all_padded_tensors.append(padded_tensor)
    all_attention_masks.append(attention_mask)

all_padded_tensors = torch.cat(all_padded_tensors, dim = 0)
all_attention_masks = torch.cat(all_attention_masks, dim = 0)

input_ids = all_padded_tensors.to("cuda")
attention_mask = all_attention_masks.to("cuda")
generated_ids = model.generate(
    input_ids = input_ids,
    attention_mask = attention_mask,
    max_new_tokens = 1200,
    do_sample = True,
    temperature = 0.6,
    top_p = 0.95,
    repetition_penalty = 1.1,
    num_return_sequences = 1,
    eos_token_id = 128258,
    use_cache = True,
)
token_to_find = 128257
token_to_remove = 128258

token_indices = (generated_ids == token_to_find).nonzero(as_tuple = True)

if len(token_indices[1]) > 0:
    last_occurrence_idx = token_indices[1][-1].item()
    cropped_tensor = generated_ids[:, last_occurrence_idx + 1 :]
else:
    cropped_tensor = generated_ids

mask = cropped_tensor != token_to_remove

processed_rows = []

for row in cropped_tensor:
    masked_row = row[row != token_to_remove]
    processed_rows.append(masked_row)

code_lists = []

for row in processed_rows:
    row_length = row.size(0)
    new_length = (row_length // 7) * 7
    trimmed_row = row[:new_length]
    trimmed_row = [t - 128266 for t in trimmed_row]
    code_lists.append(trimmed_row)


def redistribute_codes(code_list):
    layer_1 = []
    layer_2 = []
    layer_3 = []
    for i in range((len(code_list) + 1) // 7):
        layer_1.append(code_list[7 * i])
        layer_2.append(code_list[7 * i + 1] - 4096)
        layer_3.append(code_list[7 * i + 2] - (2 * 4096))
        layer_3.append(code_list[7 * i + 3] - (3 * 4096))
        layer_2.append(code_list[7 * i + 4] - (4 * 4096))
        layer_3.append(code_list[7 * i + 5] - (5 * 4096))
        layer_3.append(code_list[7 * i + 6] - (6 * 4096))
    codes = [
        torch.tensor(layer_1).unsqueeze(0),
        torch.tensor(layer_2).unsqueeze(0),
        torch.tensor(layer_3).unsqueeze(0),
    ]

    # codes = [c.to("cuda") for c in codes]
    audio_hat = snac_model.decode(codes)
    return audio_hat


my_samples = []
for code_list in code_lists:
    samples = redistribute_codes(code_list)
    my_samples.append(samples)
output_path = "orpheus_audio.wav"
try:
    for i, samples in enumerate(my_samples):
        audio_data = samples.detach().squeeze().cpu().numpy()
        import soundfile as sf

        sf.write(output_path, audio_data, 24000)  # Explicitly pass sample rate
        print(f"✅ Audio saved to {output_path}!")
except Exception as e:
    assert False, f"Inference failed with exception: {e}"

# Verify the file exists
import os

assert os.path.exists(output_path), f"Audio file not found at {output_path}"
print("✅ Audio file exists on disk!")
del my_samples, samples
## assert that transcribed_text contains The birch canoe slid on the smooth planks. Glued the sheet to the dark blue background. It's easy to tell the depth of a well. Four hours of steady work faced us.

print("✅ All sections passed successfully!")


safe_remove_directory("./unsloth_compiled_cache")
safe_remove_directory("./orpheus")


================================================
FILE: tests/saving/text_to_speech_models/test_whisper.py
================================================
from unsloth import FastLanguageModel, FastModel
from transformers import WhisperForConditionalGeneration, WhisperProcessor
import torch

# ruff: noqa
import sys
from pathlib import Path
from peft import PeftModel
import warnings
import requests


REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))


from tests.utils.cleanup_utils import safe_remove_directory
from tests.utils.os_utils import require_package, require_python_package

require_package("ffmpeg", "ffmpeg")
require_python_package("soundfile")

import soundfile as sf

print(f"\n{'='*80}")
print("🔍 SECTION 1: Loading Model and LoRA Adapters")
print(f"{'='*80}")


model, tokenizer = FastModel.from_pretrained(
    model_name = "unsloth/whisper-large-v3",
    dtype = None,  # Leave as None for auto detection
    load_in_4bit = False,  # Set to True to do 4bit quantization which reduces memory
    auto_model = WhisperForConditionalGeneration,
    whisper_language = "English",
    whisper_task = "transcribe",
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)


base_model_class = model.__class__.__name__
# https://github.com/huggingface/transformers/issues/37172
model.generation_config.input_ids = model.generation_config.forced_decoder_ids
model.generation_config.forced_decoder_ids = None


model = FastModel.get_peft_model(
    model,
    r = 64,  # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "v_proj"],
    lora_alpha = 64,
    lora_dropout = 0,  # Supports any, but = 0 is optimized
    bias = "none",  # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth",  # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None,  # And LoftQ
    task_type = None,  # ** MUST set this for Whisper **
)

print("✅ Model and LoRA adapters loaded successfully!")


print(f"\n{'='*80}")
print("🔍 SECTION 2: Checking Model Class Type")
print(f"{'='*80}")

assert isinstance(model, PeftModel), "Model should be an instance of PeftModel"
print("✅ Model is an instance of PeftModel!")


print(f"\n{'='*80}")
print("🔍 SECTION 3: Checking Config Model Class Type")
print(f"{'='*80}")


def find_lora_base_model(model_to_inspect):
    current = model_to_inspect
    if hasattr(current, "base_model"):
        current = current.base_model
    if hasattr(current, "model"):
        current = current.model
    return current


config_model = find_lora_base_model(model) if isinstance(model, PeftModel) else model

assert (
    config_model.__class__.__name__ == base_model_class
), f"Expected config_model class to be {base_model_class}"
print("✅ config_model returns correct Base Model class:", str(base_model_class))


print(f"\n{'='*80}")
print("🔍 SECTION 4: Saving and Merging Model")
print(f"{'='*80}")

with warnings.catch_warnings():
    warnings.simplefilter("error")  # Treat warnings as errors
    try:
        model.save_pretrained_merged("whisper", tokenizer)
        print("✅ Model saved and merged successfully without warnings!")
    except Exception as e:
        assert False, f"Model saving/merging failed with exception: {e}"

print(f"\n{'='*80}")
print("🔍 SECTION 5: Loading Model for Inference")
print(f"{'='*80}")


model, tokenizer = FastModel.from_pretrained(
    model_name = "./whisper",
    dtype = None,  # Leave as None for auto detection
    load_in_4bit = False,  # Set to True to do 4bit quantization which reduces memory
    auto_model = WhisperForConditionalGeneration,
    whisper_language = "English",
    whisper_task = "transcribe",
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

# model = WhisperForConditionalGeneration.from_pretrained("./whisper")
# processor = WhisperProcessor.from_pretrained("./whisper")

print("✅ Model loaded for inference successfully!")

print(f"\n{'='*80}")
print("🔍 SECTION 6: Downloading Sample Audio File")
print(f"{'='*80}")

audio_url = "https://upload.wikimedia.org/wikipedia/commons/5/5b/Speech_12dB_s16.flac"
audio_file = "Speech_12dB_s16.flac"

try:
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    response = requests.get(audio_url, headers = headers)
    response.raise_for_status()
    with open(audio_file, "wb") as f:
        f.write(response.content)
    print("✅ Audio file downloaded successfully!")
except Exception as e:
    assert False, f"Failed to download audio file: {e}"

print(f"\n{'='*80}")
print("🔍 SECTION 7: Running Inference")
print(f"{'='*80}")


from transformers import pipeline
import torch

FastModel.for_inference(model)
model.eval()
# Create pipeline without specifying the device
whisper = pipeline(
    "automatic-speech-recognition",
    model = model,
    tokenizer = tokenizer.tokenizer,
    feature_extractor = tokenizer.feature_extractor,
    processor = tokenizer,
    return_language = True,
    torch_dtype = torch.float16,  # Remove the device parameter
)
# Example usage
audio_file = "Speech_12dB_s16.flac"
transcribed_text = whisper(audio_file)
# audio, sr = sf.read(audio_file)
# input_features = processor(audio, return_tensors="pt").input_features
# transcribed_text = model.generate(input_features=input_features)
print(f"📝 Transcribed Text: {transcribed_text['text']}")

## assert that transcribed_text contains The birch canoe slid on the smooth planks. Glued the sheet to the dark blue background. It's easy to tell the depth of a well. Four hours of steady work faced us.

expected_phrases = [
    "birch canoe slid on the smooth planks",
    "sheet to the dark blue background",
    "easy to tell the depth of a well",
    "Four hours of steady work faced us",
]

transcribed_lower = transcribed_text["text"].lower()
all_phrases_found = all(
    phrase.lower() in transcribed_lower for phrase in expected_phrases
)

assert (
    all_phrases_found
), f"Expected phrases not found in transcription: {transcribed_text['text']}"
print("✅ Transcription contains all expected phrases!")


safe_remove_directory("./unsloth_compiled_cache")
safe_remove_directory("./whisper")


================================================
FILE: tests/saving/vision_models/test_index_file_sharded_model.py
================================================
## Import required libraries

from unsloth import FastVisionModel, is_bf16_supported
from unsloth.trainer import UnslothVisionDataCollator

import torch
import os
from datasets import load_dataset
from trl import SFTTrainer, SFTConfig
from huggingface_hub import HfFileSystem
import sys
from pathlib import Path


REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))

from tests.utils.cleanup_utils import safe_remove_directory


## Dataset Preparation"""

print("\n📊 Loading and preparing dataset...")
dataset = load_dataset("lbourdois/OCR-liboaccn-OPUS-MIT-5M-clean", "en", split = "train")
# To select the first 2000 examples
train_dataset = dataset.select(range(2000))

# To select the next 200 examples for evaluation
eval_dataset = dataset.select(range(2000, 2200))

print(f"✅ Dataset loaded successfully!")
print(f"   📈 Training samples: {len(train_dataset)}")
print(f"   📊 Evaluation samples: {len(eval_dataset)}")


# Convert dataset to OAI messages
def format_data(sample):
    return {
        "messages": [
            {
                "role": "system",
                "content": [{"type": "text", "text": system_message}],
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": sample["question"],
                    },
                    {
                        "type": "image",
                        "image": sample["image"],
                    },
                ],
            },
            {
                "role": "assistant",
                "content": [{"type": "text", "text": sample["answer"]}],
            },
        ],
    }


print("\n🔄 Formatting dataset for vision training...")
system_message = "You are an expert french ocr system."
# Convert dataset to OAI messages
# need to use list comprehension to keep Pil.Image type, .mape convert image to bytes
train_dataset = [format_data(sample) for sample in train_dataset]
eval_dataset = [format_data(sample) for sample in eval_dataset]
print("✅ Dataset formatting completed!")

"""## Finetuning Setup and Run"""


print("\n" + "=" * 80)
print("=== MODEL LOADING AND SETUP ===".center(80))
print("=" * 80 + "\n")
# Load Base Model
print("🤖 Loading base vision model...")
try:
    model, tokenizer = FastVisionModel.from_pretrained(
        # model_name = "unsloth/Qwen2-VL-7B-Instruct",
        model_name = "unsloth/Qwen2-VL-7B-Instruct",
        max_seq_length = 2048,  # Choose any for long context!
        load_in_4bit = True,  # 4 bit quantization to reduce memory
        load_in_8bit = False,  # [NEW!] A bit more accurate, uses 2x memory
        full_finetuning = False,  # [NEW!] We have full finetuning now!
    )
except Exception as e:
    print(f"❌ Failed to load base model: {e}")
    raise

print("\n🔧 Setting up LoRA configuration...")
## Lora Finetuning
try:
    model = FastVisionModel.get_peft_model(
        model,
        finetune_vision_layers = True,  # Turn off for just text!
        finetune_language_layers = True,  # Should leave on!
        finetune_attention_modules = True,  # Attention good for GRPO
        finetune_mlp_modules = True,  # SHould leave on always!
        r = 16,  # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
        lora_alpha = 32,
        lora_dropout = 0,  # Supports any, but = 0 is optimized
        bias = "none",  # Supports any, but = "none" is optimized
        use_gradient_checkpointing = "unsloth",  # True or "unsloth" for very long context
        random_state = 3407,
        use_rslora = False,  # We support rank stabilized LoRA
        loftq_config = None,  # And LoftQ
    )
    print("✅ LoRA configuration applied successfully!")
    print(f"   🎯 LoRA rank (r): 16")
    print(f"   📊 LoRA alpha: 32")
    print(f"   🔍 Vision layers: Enabled")
    print(f"   💬 Language layers: Enabled")
except Exception as e:
    print(f"❌ Failed to apply LoRA configuration: {e}")
    raise

print("\n" + "=" * 80)
print("=== TRAINING SETUP ===".center(80))
print("=" * 80 + "\n")


print("🏋️ Preparing trainer...")
FastVisionModel.for_training(model)  # Enable for training!

try:
    trainer = SFTTrainer(
        model = model,
        tokenizer = tokenizer,
        data_collator = UnslothVisionDataCollator(model, tokenizer),
        train_dataset = train_dataset,
        args = SFTConfig(
            # per_device_train_batch_size = 4,
            # gradient_accumulation_steps = 8,
            per_device_train_batch_size = 2,
            gradient_accumulation_steps = 4,
            gradient_checkpointing = True,
            gradient_checkpointing_kwargs = {
                "use_reentrant": False
            },  # use reentrant checkpointing
            max_grad_norm = 0.3,  # max gradient norm based on QLoRA paper
            warmup_ratio = 0.03,
            # num_train_epochs = 2, # Set this instead of max_steps for full training runs
            max_steps = 10,
            learning_rate = 2e-4,
            fp16 = not is_bf16_supported(),
            bf16 = is_bf16_supported(),
            logging_steps = 5,
            save_strategy = "epoch",
            optim = "adamw_torch_fused",
            weight_decay = 0.01,
            lr_scheduler_type = "linear",
            seed = 3407,
            output_dir = "checkpoints",
            report_to = "none",  # For Weights and Biases
            # You MUST put the below items for vision finetuning:
            remove_unused_columns = False,
            dataset_text_field = "",
            dataset_kwargs = {"skip_prepare_dataset": True},
            dataset_num_proc = 4,
            max_seq_length = 2048,
        ),
    )
    print("✅ Trainer setup completed!")
    print(f"   📦 Batch size: 2")
    print(f"   🔄 Gradient accumulation steps: 4")
    print(f"   📈 Max training steps: 10")
    print(f"   🎯 Learning rate: 2e-4")
    print(f"   💾 Precision: {'BF16' if is_bf16_supported() else 'FP16'}")
except Exception as e:
    print(f"❌ Failed to setup trainer: {e}")
    raise

print("\n" + "=" * 80)
print("=== STARTING TRAINING ===".center(80))
print("=" * 80 + "\n")
# run training
try:
    print("🚀 Starting training process...")
    trainer_stats = trainer.train()
except Exception as e:
    print(f"❌ Training failed: {e}")
    raise

print("\n" + "=" * 80)
print("=== SAVING MODEL ===".center(80))
print("=" * 80 + "\n")

print("💾 Saving adapter model and tokenizer locally...")
try:
    model.save_pretrained("unsloth-qwen2-7vl-french-ocr-adapter", tokenizer)
    tokenizer.save_pretrained("unsloth-qwen2-7vl-french-ocr-adapter")
    print("✅ Model saved locally!")
except Exception as e:
    print(f"❌ Failed to save model locally: {e}")
    raise


hf_username = os.environ.get("HF_USER", "")
if not hf_username:
    hf_username = input("Please enter your Hugging Face username: ").strip()
    os.environ["HF_USER"] = hf_username

hf_token = os.environ.get("HF_TOKEN", "")
if not hf_token:
    hf_token = input("Please enter your Hugging Face token: ").strip()
    os.environ["HF_TOKEN"] = hf_token

repo_name = f"{hf_username}/qwen2-7b-ocr-merged"
success = {
    "upload": False,
    "safetensors_check": False,
    "download": False,
}
# Stage 1: Upload model to Hub
try:
    print("\n" + "=" * 80)
    print("=== UPLOADING MODEL TO HUB ===".center(80))
    print("=" * 80 + "\n")
    print(f"🚀 Uploading to repository: {repo_name}")
    model.push_to_hub_merged(repo_name, tokenizer = tokenizer, token = hf_token)
    success["upload"] = True
    print("✅ Model uploaded successfully!")
except Exception as e:
    print(f"❌ Failed to upload model: {e}")
    raise Exception("Model upload failed.")

# Stage 2: Verify safetensors.index.json exists
try:
    print("\n" + "=" * 80)
    print("=== VERIFYING REPO CONTENTS ===".center(80))
    print("=" * 80 + "\n")
    fs = HfFileSystem(token = hf_token)
    file_list = fs.ls(repo_name, detail = True)
    safetensors_found = any(
        file["name"].endswith("model.safetensors.index.json") for file in file_list
    )
    if safetensors_found:
        success["safetensors_check"] = True
        print("✅ model.safetensors.index.json found in repo!")
    else:
        raise Exception("model.safetensors.index.json not found in repo.")
except Exception as e:
    print(f"❌ Verification failed: {e}")
    raise Exception("Repo verification failed.")

# test downloading model even if cached
safe_remove_directory(f"./{hf_username}")

try:
    print("\n" + "=" * 80)
    print("=== TESTING MODEL DOWNLOAD ===".center(80))
    print("=" * 80 + "\n")
    print("📥 Testing model download...")
    # Force download even if cached
    test_model, test_tokenizer = FastVisionModel.from_pretrained(repo_name)
    success["download"] = True
    print("✅ Model downloaded successfully!")

    # Clean up test model
    del test_model, test_tokenizer
    torch.cuda.empty_cache()
except Exception as e:
    print(f"❌ Download failed: {e}")
    raise Exception("Model download failed.")

# Final report
print("\n" + "=" * 80)
print("=== VALIDATION REPORT ===".center(80))
print("=" * 80 + "\n")
for stage, passed in success.items():
    status = "✅" if passed else "❌"
    print(f"{status} {stage.replace('_', ' ').title()}")
print("\n" + "=" * 80)

if all(success.values()):
    print("\n🎉 All stages completed successfully!")
    print(f"🌐 Your model is available at: https://huggingface.co/{repo_name}")
else:
    raise Exception("Validation failed for one or more stages.")


# Final cleanup
print("\n🧹 Cleaning up temporary files...")
safe_remove_directory("./checkpoints")
safe_remove_directory("./unsloth_compiled_cache")
safe_remove_directory("./unsloth-qwen2-7vl-french-ocr-adapter")

print("\n🎯 Pipeline completed successfully!")
print("=" * 80)


================================================
FILE: tests/saving/vision_models/test_push_to_hub_merged.py
================================================
## Import required libraries

from unsloth import FastVisionModel, is_bf16_supported
from unsloth.trainer import UnslothVisionDataCollator

import torch
import os
from datasets import load_dataset
from trl import SFTTrainer, SFTConfig

import sys
from pathlib import Path


REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))


from tests.utils.cleanup_utils import safe_remove_directory


## Dataset Preparation"""

print("\n📊 Loading and preparing dataset...")
dataset = load_dataset("lbourdois/OCR-liboaccn-OPUS-MIT-5M-clean", "en", split = "train")
# To select the first 2000 examples
train_dataset = dataset.select(range(2000))

# To select the next 200 examples for evaluation
eval_dataset = dataset.select(range(2000, 2200))

print(f"✅ Dataset loaded successfully!")
print(f"   📈 Training samples: {len(train_dataset)}")
print(f"   📊 Evaluation samples: {len(eval_dataset)}")


# Convert dataset to OAI messages
def format_data(sample):
    return {
        "messages": [
            {
                "role": "system",
                "content": [{"type": "text", "text": system_message}],
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": sample["question"],
                    },
                    {
                        "type": "image",
                        "image": sample["image"],
                    },
                ],
            },
            {
                "role": "assistant",
                "content": [{"type": "text", "text": sample["answer"]}],
            },
        ],
    }


print("\n🔄 Formatting dataset for vision training...")
system_message = "You are an expert french ocr system."
# Convert dataset to OAI messages
# need to use list comprehension to keep Pil.Image type, .mape convert image to bytes
train_dataset = [format_data(sample) for sample in train_dataset]
eval_dataset = [format_data(sample) for sample in eval_dataset]
print("✅ Dataset formatting completed!")

"""## Finetuning Setup and Run"""


print("\n" + "=" * 80)
print("=== MODEL LOADING AND SETUP ===".center(80))
print("=" * 80 + "\n")
# Load Base Model
print("🤖 Loading base vision model...")
try:
    model, tokenizer = FastVisionModel.from_pretrained(
        # model_name = "unsloth/Qwen2-VL-7B-Instruct",
        model_name = "unsloth/Qwen2-VL-2B-Instruct",
        max_seq_length = 2048,  # Choose any for long context!
        load_in_4bit = True,  # 4 bit quantization to reduce memory
        load_in_8bit = False,  # [NEW!] A bit more accurate, uses 2x memory
        full_finetuning = False,  # [NEW!] We have full finetuning now!
    )
except Exception as e:
    print(f"❌ Failed to load base model: {e}")
    raise

print("\n🔧 Setting up LoRA configuration...")
## Lora Finetuning
try:
    model = FastVisionModel.get_peft_model(
        model,
        finetune_vision_layers = True,  # Turn off for just text!
        finetune_language_layers = True,  # Should leave on!
        finetune_attention_modules = True,  # Attention good for GRPO
        finetune_mlp_modules = True,  # SHould leave on always!
        r = 16,  # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
        lora_alpha = 32,
        lora_dropout = 0,  # Supports any, but = 0 is optimized
        bias = "none",  # Supports any, but = "none" is optimized
        use_gradient_checkpointing = "unsloth",  # True or "unsloth" for very long context
        random_state = 3407,
        use_rslora = False,  # We support rank stabilized LoRA
        loftq_config = None,  # And LoftQ
    )
    print("✅ LoRA configuration applied successfully!")
    print(f"   🎯 LoRA rank (r): 16")
    print(f"   📊 LoRA alpha: 32")
    print(f"   🔍 Vision layers: Enabled")
    print(f"   💬 Language layers: Enabled")
except Exception as e:
    print(f"❌ Failed to apply LoRA configuration: {e}")
    raise

print("\n" + "=" * 80)
print("=== TRAINING SETUP ===".center(80))
print("=" * 80 + "\n")


print("🏋️ Preparing trainer...")
FastVisionModel.for_training(model)  # Enable for training!

try:
    trainer = SFTTrainer(
        model = model,
        tokenizer = tokenizer,
        data_collator = UnslothVisionDataCollator(model, tokenizer),
        train_dataset = train_dataset,
        args = SFTConfig(
            # per_device_train_batch_size = 4,
            # gradient_accumulation_steps = 8,
            per_device_train_batch_size = 2,
            gradient_accumulation_steps = 4,
            gradient_checkpointing = True,
            gradient_checkpointing_kwargs = {
                "use_reentrant": False
            },  # use reentrant checkpointing
            max_grad_norm = 0.3,  # max gradient norm based on QLoRA paper
            warmup_ratio = 0.03,
            # num_train_epochs = 2, # Set this instead of max_steps for full training runs
            max_steps = 10,
            learning_rate = 2e-4,
            fp16 = not is_bf16_supported(),
            bf16 = is_bf16_supported(),
            logging_steps = 5,
            save_strategy = "epoch",
            optim = "adamw_torch_fused",
            weight_decay = 0.01,
            lr_scheduler_type = "linear",
            seed = 3407,
            output_dir = "checkpoints",
            report_to = "none",  # For Weights and Biases
            # You MUST put the below items for vision finetuning:
            remove_unused_columns = False,
            dataset_text_field = "",
            dataset_kwargs = {"skip_prepare_dataset": True},
            dataset_num_proc = 4,
            max_seq_length = 2048,
        ),
    )
    print("✅ Trainer setup completed!")
    print(f"   📦 Batch size: 2")
    print(f"   🔄 Gradient accumulation steps: 4")
    print(f"   📈 Max training steps: 10")
    print(f"   🎯 Learning rate: 2e-4")
    print(f"   💾 Precision: {'BF16' if is_bf16_supported() else 'FP16'}")
except Exception as e:
    print(f"❌ Failed to setup trainer: {e}")
    raise

print("\n" + "=" * 80)
print("=== STARTING TRAINING ===".center(80))
print("=" * 80 + "\n")
# run training
try:
    print("🚀 Starting training process...")
    trainer_stats = trainer.train()
except Exception as e:
    print(f"❌ Training failed: {e}")
    raise

print("\n" + "=" * 80)
print("=== SAVING MODEL ===".center(80))
print("=" * 80 + "\n")

print("💾 Saving adapter model and tokenizer locally...")
try:
    model.save_pretrained("unsloth-qwen2-7vl-french-ocr-adapter", tokenizer)
    tokenizer.save_pretrained("unsloth-qwen2-7vl-french-ocr-adapter")
    print("✅ Model saved locally!")
except Exception as e:
    print(f"❌ Failed to save model locally: {e}")
    raise


hf_username = os.environ.get("HF_USER", "")
if not hf_username:
    hf_username = input("Please enter your Hugging Face username: ").strip()
    os.environ["HF_USER"] = hf_username

hf_token = os.environ.get("HF_TOKEN", "")
if not hf_token:
    hf_token = input("Please enter your Hugging Face token: ").strip()
    os.environ["HF_TOKEN"] = hf_token

repo_name = f"{hf_username}/qwen2-ocr-merged"
success = {
    "upload": False,
    "download": False,
}
# Stage 1: Upload model to Hub
try:
    print("\n" + "=" * 80)
    print("=== UPLOADING MODEL TO HUB ===".center(80))
    print("=" * 80 + "\n")
    print(f"🚀 Uploading to repository: {repo_name}")
    model.push_to_hub_merged(repo_name, tokenizer = tokenizer, token = hf_token)
    success["upload"] = True
    print("✅ Model uploaded successfully!")
except Exception as e:
    print(f"❌ Failed to upload model: {e}")
    raise Exception("Model upload failed.")


try:
    print("\n" + "=" * 80)
    print("=== TESTING MODEL DOWNLOAD ===".center(80))
    print("=" * 80 + "\n")
    print("📥 Testing model download...")
    # Force download even if cached
    test_model, test_tokenizer = FastVisionModel.from_pretrained(repo_name)
    success["download"] = True
    print("✅ Model downloaded successfully!")

    # Clean up test model
    del test_model, test_tokenizer
    torch.cuda.empty_cache()
except Exception as e:
    print(f"❌ Download failed: {e}")
    raise Exception("Model download failed.")

# Final report
print("\n" + "=" * 80)
print("=== VALIDATION REPORT ===".center(80))
print("=" * 80 + "\n")
for stage, passed in success.items():
    status = "✅" if passed else "❌"
    print(f"{status} {stage.replace('_', ' ').title()}")
print("\n" + "=" * 80)

if all(success.values()):
    print("\n🎉 All stages completed successfully!")
    print(f"🌐 Your model is available at: https://huggingface.co/{repo_name}")
else:
    raise Exception("Validation failed for one or more stages.")


# Final cleanup
print("\n🧹 Cleaning up temporary files...")
safe_remove_directory("./checkpoints")
safe_remove_directory("./unsloth_compiled_cache")
safe_remove_directory("./unsloth-qwen2-7vl-french-ocr-adapter")
safe_remove_directory(f"./{hf_username}")

print("\n🎯 Pipeline completed successfully!")
print("=" * 80)


================================================
FILE: tests/saving/vision_models/test_save_merge_qwen2.5vl32B_model_ocr_benchmark.py
================================================
# -*- coding: utf-8 -*-

from unsloth import FastVisionModel

import torch
from qwen_vl_utils import process_vision_info
import os
from datasets import load_dataset
from trl import SFTTrainer, SFTConfig

import sys
from pathlib import Path


REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))

from tests.utils.cleanup_utils import safe_remove_directory
from tests.utils.ocr_eval import OCRModelEvaluator


## Dataset Preparation
from datasets import load_dataset

dataset = load_dataset("lbourdois/OCR-liboaccn-OPUS-MIT-5M-clean", "en", split = "train")
# To select the first 2000 examples
train_dataset = dataset.select(range(2000))

# To select the next 200 examples for evaluation
eval_dataset = dataset.select(range(2000, 2200))


# Convert dataset to OAI messages
def format_data(sample):
    return {
        "messages": [
            {
                "role": "system",
                "content": [{"type": "text", "text": system_message}],
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": sample["question"],
                    },
                    {
                        "type": "image",
                        "image": sample["image"],
                    },
                ],
            },
            {
                "role": "assistant",
                "content": [{"type": "text", "text": sample["answer"]}],
            },
        ],
    }


system_message = "You are an expert french ocr system."
# Convert dataset to OAI messages
# need to use list comprehension to keep Pil.Image type, .mape convert image to bytes
train_dataset = [format_data(sample) for sample in train_dataset]
eval_dataset = [format_data(sample) for sample in eval_dataset]

## Setup OCR main evaluation function and helpers
import os
import torch
from tqdm import tqdm
import pandas as pd
from jiwer import wer, cer
from qwen_vl_utils import process_vision_info

#
ocr_evaluator = OCRModelEvaluator()
model_comparison_results = {}

## Finetuning Setup and Run
# Load Base Model

model, tokenizer = FastVisionModel.from_pretrained(
    model_name = "unsloth/Qwen2.5-VL-32B-Instruct-bnb-4bit",
    max_seq_length = 2048,  # Choose any for long context!
    load_in_4bit = True,  # 4 bit quantization to reduce memory
    load_in_8bit = False,  # [NEW!] A bit more accurate, uses 2x memory
    full_finetuning = False,  # [NEW!] We have full finetuning now!
)

# benchmark base model performance
model_name = "Unsloth Base model"
FastVisionModel.for_inference(model)
avg_wer, avg_cer = ocr_evaluator.evaluate_model(
    model, tokenizer, eval_dataset, output_dir = "unsloth_base_model_results"
)
ocr_evaluator.add_to_comparison(model_name, avg_wer, avg_cer)

## Lora Finetuning
model = FastVisionModel.get_peft_model(
    model,
    finetune_vision_layers = True,  # Turn off for just text!
    finetune_language_layers = True,  # Should leave on!
    finetune_attention_modules = True,  # Attention good for GRPO
    finetune_mlp_modules = True,  # SHould leave on always!
    r = 16,  # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    # target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
    # "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,
    lora_dropout = 0,  # Supports any, but = 0 is optimized
    bias = "none",  # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth",  # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None,  # And LoftQ
)

from unsloth import is_bf16_supported
from unsloth.trainer import UnslothVisionDataCollator

FastVisionModel.for_training(model)  # Enable for training!
model.config.use_cache = False


trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    data_collator = UnslothVisionDataCollator(model, tokenizer),
    train_dataset = train_dataset,
    args = SFTConfig(
        # per_device_train_batch_size = 4,
        # gradient_accumulation_steps = 8,
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        gradient_checkpointing = True,
        gradient_checkpointing_kwargs = {
            "use_reentrant": False
        },  # use reentrant checkpointing
        max_grad_norm = 0.3,  # max gradient norm based on QLoRA paper
        warmup_ratio = 0.03,
        # num_train_epochs = 2, # Set this instead of max_steps for full training runs
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bf16_supported(),
        bf16 = is_bf16_supported(),
        logging_steps = 5,
        save_strategy = "epoch",
        optim = "adamw_torch_fused",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "unsloth-qwen2.5-vl-32b-french-ocr-checkpoints",
        report_to = "none",  # For Weights and Biases
        # You MUST put the below items for vision finetuning:
        remove_unused_columns = False,
        dataset_text_field = "",
        dataset_kwargs = {"skip_prepare_dataset": True},
        dataset_num_proc = 4,
        max_seq_length = 2048,
    ),
)

# run training
trainer_stats = trainer.train()

model.save_pretrained("unsloth-qwen2.5-vl-32b-french-ocr-adapter", tokenizer)
tokenizer.save_pretrained("unsloth-qwen2.5-vl-32b-french-ocr-adapter")

## Measure Adapter Performance

# benchmark lora model performance
model_name = "Unsloth lora adapter model"
FastVisionModel.for_inference(model)
avg_wer, avg_cer = ocr_evaluator.evaluate_model(
    model, tokenizer, eval_dataset, output_dir = "unsloth_lora_model_results"
)
ocr_evaluator.add_to_comparison(model_name, avg_wer, avg_cer)

## Merge Model


def find_lora_base_model(model_to_inspect):
    current = model_to_inspect
    if hasattr(current, "base_model"):
        current = current.base_model
    if hasattr(current, "model"):
        current = current.model
    return current


base = find_lora_base_model(model)

print((base.__class__.__name__))

# merge default 16 bits
model.save_pretrained_merged(
    save_directory = "qwen2.5-ocr-merged-finetune-merge-16bit", tokenizer = tokenizer
)


## Benchmark merged model performance

### 16 bits merged model

model, tokenizer = FastVisionModel.from_pretrained(
    "./qwen2.5-ocr-merged-finetune-merge-16bit", load_in_4bit = False, load_in_8bit = False
)

# benchmark 4bit loaded, 16bits merged model performance
model_name = "Unsloth 16bits-merged model load-16bits"
model.config.use_cache = True

avg_wer, avg_cer = ocr_evaluator.evaluate_model(
    model,
    tokenizer,
    eval_dataset,
    output_dir = "unsloth_16bits_merged_model_load_16bits_results",
)
ocr_evaluator.add_to_comparison(model_name, avg_wer, avg_cer)

# load 16bits-merged model in 4 bits
model, tokenizer = FastVisionModel.from_pretrained(
    "./qwen2.5-ocr-merged-finetune-merge-16bit", load_in_4bit = True, load_in_8bit = False
)

# benchmark 4bit loaded, 16bits merged model performance
model_name = "Unsloth 16bits-merged model load-4bits"
model.config.use_cache = True

avg_wer, avg_cer = ocr_evaluator.evaluate_model(
    model,
    tokenizer,
    eval_dataset,
    output_dir = "unsloth_16bits_merged_model_load_4bits_results",
)
ocr_evaluator.add_to_comparison(model_name, avg_wer, avg_cer)

# load model in 8 bits
model, tokenizer = FastVisionModel.from_pretrained(
    "./qwen2.5-ocr-merged-finetune-merge-16bit", load_in_4bit = False, load_in_8bit = True
)

# benchmark 4bit loaded, 16bits merged model performance
model_name = "Unsloth 16bits-merged model load-8bits"
avg_wer, avg_cer = ocr_evaluator.evaluate_model(
    model,
    tokenizer,
    eval_dataset,
    output_dir = "unsloth_16bits_merged_model_load_8bits_results",
)
ocr_evaluator.add_to_comparison(model_name, avg_wer, avg_cer)

# """### 4 bits merged model"""
#
# # load 4bits-merged model in 4 bits
# model, tokenizer = FastVisionModel.from_pretrained("./qwen2-ocr-merged-finetune-merge-4bit",load_in_4bit=True, load_in_8bit=False)
#
# # benchmark 4bit loaded, 4bits merged model performance
# model_name = "Unsloth 4bits-merged model load-4bits"
#
# avg_wer, avg_cer = ocr_evaluator.evaluate_model(model, tokenizer, eval_dataset, output_dir="unsloth_4bits_merged_model_load_4bits_results")
# ocr_evaluator.add_to_comparison(model_name, avg_wer, avg_cer)
#
# # load model in 8 bits
# model, tokenizer = FastVisionModel.from_pretrained("./qwen2-ocr-merged-finetune-merge-4bit",load_in_4bit=False, load_in_8bit=True)
#
# # benchmark 8bit loaded, 4bits merged model performance
# model_name = "Unsloth 4bits-merged model load-8bits"
#
# avg_wer, avg_cer = ocr_evaluator.evaluate_model(model, tokenizer, eval_dataset, output_dir="unsloth_4bits_merged_model_load_8bits_results")
# ocr_evaluator.add_to_comparison(model_name, avg_wer, avg_cer)

# Model comparison report
# print model comparison
ocr_evaluator.print_model_comparison()


# Final cleanup
print("\n🧹 Cleaning up temporary files...")
safe_remove_directory("./unsloth-qwen2.5-vl-32b-french-ocr-adapter")
safe_remove_directory("./unsloth-qwen2.5-vl-32b-french-ocr-checkpoints")
safe_remove_directory("./unsloth_compiled_cache")
safe_remove_directory("./qwen2.5-ocr-merged-finetune-merge-16bit")

print("\n🎯 Pipeline completed successfully!")
print("=" * 80)


================================================
FILE: tests/saving/vision_models/test_save_merge_vision_model_ocr_benchmark.py
================================================
# -*- coding: utf-8 -*-

from unsloth import FastVisionModel

import torch
from qwen_vl_utils import process_vision_info
import os
from datasets import load_dataset
from trl import SFTTrainer, SFTConfig

import sys
from pathlib import Path


REPO_ROOT = Path(__file__).parents[3]
sys.path.insert(0, str(REPO_ROOT))

from tests.utils.cleanup_utils import safe_remove_directory
from tests.utils.ocr_eval import OCRModelEvaluator


## Dataset Preparation
from datasets import load_dataset

dataset = load_dataset("lbourdois/OCR-liboaccn-OPUS-MIT-5M-clean", "en", split = "train")
# To select the first 2000 examples
train_dataset = dataset.select(range(2000))

# To select the next 200 examples for evaluation
eval_dataset = dataset.select(range(2000, 2200))


# Convert dataset to OAI messages
def format_data(sample):
    return {
        "messages": [
            {
                "role": "system",
                "content": [{"type": "text", "text": system_message}],
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": sample["question"],
                    },
                    {
                        "type": "image",
                        "image": sample["image"],
                    },
                ],
            },
            {
                "role": "assistant",
                "content": [{"type": "text", "text": sample["answer"]}],
            },
        ],
    }


system_message = "You are an expert french ocr system."
# Convert dataset to OAI messages
# need to use list comprehension to keep Pil.Image type, .mape convert image to bytes
train_dataset = [format_data(sample) for sample in train_dataset]
eval_dataset = [format_data(sample) for sample in eval_dataset]

## Setup OCR main evaluation function and helpers
import os
import torch
from tqdm import tqdm
import pandas as pd
from jiwer import wer, cer
from qwen_vl_utils import process_vision_info

#
ocr_evaluator = OCRModelEvaluator()
model_comparison_results = {}

## Finetuning Setup and Run
# Load Base Model

model, tokenizer = FastVisionModel.from_pretrained(
    model_name = "unsloth/Qwen2-VL-7B-Instruct",
    max_seq_length = 2048,  # Choose any for long context!
    load_in_4bit = True,  # 4 bit quantization to reduce memory
    load_in_8bit = False,  # [NEW!] A bit more accurate, uses 2x memory
    full_finetuning = False,  # [NEW!] We have full finetuning now!
)

# benchmark base model performance
model_name = "Unsloth Base model"
FastVisionModel.for_inference(model)
avg_wer, avg_cer = ocr_evaluator.evaluate_model(
    model, tokenizer, eval_dataset, output_dir = "unsloth_base_model_results"
)
ocr_evaluator.add_to_comparison(model_name, avg_wer, avg_cer)

## Lora Finetuning
model = FastVisionModel.get_peft_model(
    model,
    finetune_vision_layers = True,  # Turn off for just text!
    finetune_language_layers = True,  # Should leave on!
    finetune_attention_modules = True,  # Attention good for GRPO
    finetune_mlp_modules = True,  # SHould leave on always!
    r = 16,  # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    # target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
    # "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,
    lora_dropout = 0,  # Supports any, but = 0 is optimized
    bias = "none",  # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth",  # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None,  # And LoftQ
)

from unsloth import is_bf16_supported
from unsloth.trainer import UnslothVisionDataCollator

FastVisionModel.for_training(model)  # Enable for training!
model.config.use_cache = False


trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    data_collator = UnslothVisionDataCollator(model, tokenizer),
    train_dataset = train_dataset,
    args = SFTConfig(
        # per_device_train_batch_size = 4,
        # gradient_accumulation_steps = 8,
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        gradient_checkpointing = True,
        gradient_checkpointing_kwargs = {
            "use_reentrant": False
        },  # use reentrant checkpointing
        max_grad_norm = 0.3,  # max gradient norm based on QLoRA paper
        warmup_ratio = 0.03,
        # num_train_epochs = 2, # Set this instead of max_steps for full training runs
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bf16_supported(),
        bf16 = is_bf16_supported(),
        logging_steps = 5,
        save_strategy = "epoch",
        optim = "adamw_torch_fused",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "unsloth-qwen2-7vl-french-ocr-checkpoints",
        report_to = "none",  # For Weights and Biases
        # You MUST put the below items for vision finetuning:
        remove_unused_columns = False,
        dataset_text_field = "",
        dataset_kwargs = {"skip_prepare_dataset": True},
        dataset_num_proc = 4,
        max_seq_length = 2048,
    ),
)

# run training
trainer_stats = trainer.train()

model.save_pretrained("unsloth-qwen2-7vl-french-ocr-adapter", tokenizer)
tokenizer.save_pretrained("unsloth-qwen2-7vl-french-ocr-adapter")

## Measure Adapter Performance

# benchmark lora model performance
model_name = "Unsloth lora adapter model"
FastVisionModel.for_inference(model)
avg_wer, avg_cer = ocr_evaluator.evaluate_model(
    model, tokenizer, eval_dataset, output_dir = "unsloth_lora_model_results"
)
ocr_evaluator.add_to_comparison(model_name, avg_wer, avg_cer)

## Merge Model


def find_lora_base_model(model_to_inspect):
    current = model_to_inspect
    if hasattr(current, "base_model"):
        current = current.base_model
    if hasattr(current, "model"):
        current = current.model
    return current


base = find_lora_base_model(model)

print((base.__class__.__name__))

# merge default 16 bits
model.save_pretrained_merged(
    save_directory = "qwen2-ocr-merged-finetune-merge-16bit", tokenizer = tokenizer
)


## Benchmark merged model performance

### 16 bits merged model

model, tokenizer = FastVisionModel.from_pretrained(
    "./qwen2-ocr-merged-finetune-merge-16bit", load_in_4bit = False, load_in_8bit = False
)

# benchmark 4bit loaded, 16bits merged model performance
model_name = "Unsloth 16bits-merged model load-16bits"
model.config.use_cache = True

avg_wer, avg_cer = ocr_evaluator.evaluate_model(
    model,
    tokenizer,
    eval_dataset,
    output_dir = "unsloth_16bits_merged_model_load_16bits_results",
)
ocr_evaluator.add_to_comparison(model_name, avg_wer, avg_cer)

# load 16bits-merged model in 4 bits
model, tokenizer = FastVisionModel.from_pretrained(
    "./qwen2-ocr-merged-finetune-merge-16bit", load_in_4bit = True, load_in_8bit = False
)

# benchmark 4bit loaded, 16bits merged model performance
model_name = "Unsloth 16bits-merged model load-4bits"
model.config.use_cache = True

avg_wer, avg_cer = ocr_evaluator.evaluate_model(
    model,
    tokenizer,
    eval_dataset,
    output_dir = "unsloth_16bits_merged_model_load_4bits_results",
)
ocr_evaluator.add_to_comparison(model_name, avg_wer, avg_cer)

# load model in 8 bits
model, tokenizer = FastVisionModel.from_pretrained(
    "./qwen2-ocr-merged-finetune-merge-16bit", load_in_4bit = False, load_in_8bit = True
)

# benchmark 4bit loaded, 16bits merged model performance
model_name = "Unsloth 16bits-merged model load-8bits"
avg_wer, avg_cer = ocr_evaluator.evaluate_model(
    model,
    tokenizer,
    eval_dataset,
    output_dir = "unsloth_16bits_merged_model_load_8bits_results",
)
ocr_evaluator.add_to_comparison(model_name, avg_wer, avg_cer)

# """### 4 bits merged model"""
#
# # load 4bits-merged model in 4 bits
# model, tokenizer = FastVisionModel.from_pretrained("./qwen2-ocr-merged-finetune-merge-4bit",load_in_4bit=True, load_in_8bit=False)
#
# # benchmark 4bit loaded, 4bits merged model performance
# model_name = "Unsloth 4bits-merged model load-4bits"
#
# avg_wer, avg_cer = ocr_evaluator.evaluate_model(model, tokenizer, eval_dataset, output_dir="unsloth_4bits_merged_model_load_4bits_results")
# ocr_evaluator.add_to_comparison(model_name, avg_wer, avg_cer)
#
# # load model in 8 bits
# model, tokenizer = FastVisionModel.from_pretrained("./qwen2-ocr-merged-finetune-merge-4bit",load_in_4bit=False, load_in_8bit=True)
#
# # benchmark 8bit loaded, 4bits merged model performance
# model_name = "Unsloth 4bits-merged model load-8bits"
#
# avg_wer, avg_cer = ocr_evaluator.evaluate_model(model, tokenizer, eval_dataset, output_dir="unsloth_4bits_merged_model_load_8bits_results")
# ocr_evaluator.add_to_comparison(model_name, avg_wer, avg_cer)

# Model comparison report
# print model comparison
ocr_evaluator.print_model_comparison()


# Final cleanup
print("\n🧹 Cleaning up temporary files...")
safe_remove_directory("./unsloth-qwen2-7vl-french-ocr-adapter")
safe_remove_directory("./unsloth-qwen2-7vl-french-ocr-checkpoints")
safe_remove_directory("./unsloth_compiled_cache")
safe_remove_directory("./qwen2-ocr-merged-finetune-merge-16bit")

print("\n🎯 Pipeline completed successfully!")
print("=" * 80)


================================================
FILE: tests/test_get_model_name.py
================================================
import unittest
from unittest.mock import patch
from unsloth.models.loader_utils import get_model_name
from unsloth.models import loader_utils
from unsloth.models.mapper import FLOAT_TO_INT_MAPPER, MAP_TO_UNSLOTH_16bit


def _no_remote_mapper():
    return {}, {}, {}


class TestGetModelName(unittest.TestCase):
    def _assert_mapping(self, model_name, load_in_4bit, expected, should_change):
        mapped = get_model_name(model_name, load_in_4bit = load_in_4bit)
        self.assertEqual(mapped.lower(), expected.lower())
        if should_change:
            self.assertNotEqual(mapped.lower(), model_name.lower())
        else:
            self.assertEqual(mapped.lower(), model_name.lower())

    @patch.object(loader_utils, "_get_new_mapper", _no_remote_mapper)
    def test_resolution_matrix(self):
        cases = [
            # Core mappings
            ("meta-llama/Llama-2-7b-hf", True, "unsloth/llama-2-7b-bnb-4bit", True),
            ("meta-llama/Llama-2-7b-hf", False, "unsloth/llama-2-7b", True),
            (
                "mistralai/Ministral-8B-Instruct-2410",
                True,
                "mistralai/Ministral-8B-Instruct-2410",
                False,
            ),
            (
                "meta-llama/Llama-3.2-1B-Instruct",
                False,
                "unsloth/Llama-3.2-1B-Instruct",
                True,
            ),
            (
                "meta-llama/Llama-2-7b-chat-hf",
                True,
                "unsloth/llama-2-7b-chat-bnb-4bit",
                True,
            ),
            (
                "meta-llama/Llama-3.3-70B-Instruct",
                True,
                "unsloth/llama-3.3-70b-instruct-unsloth-bnb-4bit",
                True,
            ),
            ("Qwen/Qwen3-8B", True, "unsloth/Qwen3-8B-unsloth-bnb-4bit", True),
            ("Qwen/Qwen3-8B", False, "unsloth/Qwen3-8B", True),
            ("Qwen/Qwen3-8B-FP8", False, "unsloth/Qwen3-8B-FP8", True),
            ("Qwen/Qwen3-8B-FP8", True, "unsloth/Qwen3-8B-unsloth-bnb-4bit", True),
            (
                "mistralai/Ministral-3-3B-Instruct-2512",
                True,
                "unsloth/Ministral-3-3B-Instruct-2512-unsloth-bnb-4bit",
                True,
            ),
            (
                "mistralai/Ministral-3-3B-Instruct-2512",
                False,
                "unsloth/Ministral-3-3B-Instruct-2512",
                True,
            ),
            ("unsloth/Kimi-K2-Instruct", True, "unsloth/Kimi-K2-Instruct-BF16", True),
            ("unsloth/Kimi-K2-Instruct", False, "unsloth/Kimi-K2-Instruct", False),
            # Fallback-to-original behavior
            "nonexistent-user/nonexistent-model-123",
            "google/gemma-3-random-prototype-123",
            "imdatta0/nanoqwen-fp8",
            "imdatta0/nanoqwen-bf16",
            # Backward compatibility for legacy 4bit names
            ("unsloth/llama-2-7b-bnb-4bit", True, "unsloth/llama-2-7b-bnb-4bit", False),
            ("unsloth/llama-2-7b-bnb-4bit", False, "unsloth/llama-2-7b", True),
            ("google/gemma-2-9b", True, "unsloth/gemma-2-9b-bnb-4bit", True),
            # GPT-OSS behavior
            ("openai/gpt-oss-20b", False, "unsloth/gpt-oss-20b", True),
            ("openai/gpt-oss-20b", True, "unsloth/gpt-oss-20b-unsloth-bnb-4bit", True),
            ("unsloth/gpt-oss-20b", True, "unsloth/gpt-oss-20b-unsloth-bnb-4bit", True),
            ("unsloth/gpt-oss-20b-bf16", True, "unsloth/gpt-oss-20b-bf16", False),
            (
                "unsloth/gpt-oss-20b-unsloth-bnb-4bit",
                False,
                "unsloth/gpt-oss-20b",
                True,
            ),
            (
                "unsloth/gpt-oss-20b-bnb-4bit",
                True,
                "unsloth/gpt-oss-20b-bnb-4bit",
                False,
            ),
        ]
        for case in cases:
            if isinstance(case, str):
                model_name = case
                with self.subTest(model_name = model_name, load_in_4bit = True):
                    self._assert_mapping(model_name, True, model_name, False)
            else:
                model_name, load_in_4bit, expected, should_change = case
                with self.subTest(model_name = model_name, load_in_4bit = load_in_4bit):
                    self._assert_mapping(
                        model_name, load_in_4bit, expected, should_change
                    )

    def test_static_mapper_contract(self):
        contracts = [
            ("qwen/qwen3-8b", "unsloth/qwen3-8b-unsloth-bnb-4bit"),
            ("qwen/qwen3-8b-fp8", "unsloth/qwen3-8b-unsloth-bnb-4bit"),
            (
                "mistralai/ministral-3-3b-instruct-2512",
                "unsloth/ministral-3-3b-instruct-2512-unsloth-bnb-4bit",
            ),
            ("unsloth/kimi-k2-instruct", "unsloth/kimi-k2-instruct-bf16"),
        ]
        for src, expected in contracts:
            with self.subTest(src = src):
                self.assertEqual(FLOAT_TO_INT_MAPPER[src], expected)
        self.assertEqual(
            MAP_TO_UNSLOTH_16bit["qwen/qwen3-8b-fp8"], "unsloth/Qwen3-8B-FP8"
        )


if __name__ == "__main__":
    unittest.main()


================================================
FILE: tests/test_model_registry.py
================================================
"""

Test model registration methods
Checks that model registration methods work for respective models as well as all models
The check is performed
- by registering the models
- checking that the instantiated models can be found on huggingface hub by querying for the model id

"""

from dataclasses import dataclass

import pytest
from huggingface_hub import ModelInfo as HfModelInfo

from unsloth.registry import register_models, search_models
from unsloth.registry._deepseek import register_deepseek_models
from unsloth.registry._gemma import register_gemma_models
from unsloth.registry._llama import register_llama_models
from unsloth.registry._mistral import register_mistral_models
from unsloth.registry._phi import register_phi_models
from unsloth.registry._qwen import register_qwen_models
from unsloth.registry.registry import MODEL_REGISTRY, QUANT_TAG_MAP, QuantType
from unsloth.utils.hf_hub import get_model_info

MODEL_NAMES = [
    "llama",
    "qwen",
    "mistral",
    "phi",
    "gemma",
    "deepseek",
]
MODEL_REGISTRATION_METHODS = [
    register_llama_models,
    register_qwen_models,
    register_mistral_models,
    register_phi_models,
    register_gemma_models,
    register_deepseek_models,
]


@dataclass
class ModelTestParam:
    name: str
    register_models: callable


def _test_model_uploaded(model_ids: list[str]):
    missing_models = []
    for _id in model_ids:
        model_info: HfModelInfo = get_model_info(_id)
        if not model_info:
            missing_models.append(_id)

    return missing_models


TestParams = [
    ModelTestParam(name, models)
    for name, models in zip(MODEL_NAMES, MODEL_REGISTRATION_METHODS)
]


# Test that model registration methods register respective models
@pytest.mark.parametrize("model_test_param", TestParams, ids = lambda param: param.name)
def test_model_registration(model_test_param: ModelTestParam):
    MODEL_REGISTRY.clear()
    registration_method = model_test_param.register_models
    registration_method()
    registered_models = MODEL_REGISTRY.keys()
    missing_models = _test_model_uploaded(registered_models)
    assert (
        not missing_models
    ), f"{model_test_param.name} missing following models: {missing_models}"


def test_all_model_registration():
    register_models()
    registered_models = MODEL_REGISTRY.keys()
    missing_models = _test_model_uploaded(registered_models)
    assert not missing_models, f"Missing following models: {missing_models}"


def test_quant_type():
    # Test that the quant_type is correctly set for model paths
    # NOTE: for models registered under org="unsloth" with QuantType.NONE aliases QuantType.UNSLOTH
    dynamic_quant_models = search_models(quant_types = [QuantType.UNSLOTH])
    assert all(m.quant_type == QuantType.UNSLOTH for m in dynamic_quant_models)
    quant_tag = QUANT_TAG_MAP[QuantType.UNSLOTH]
    assert all(quant_tag in m.model_path for m in dynamic_quant_models)


================================================
FILE: tests/test_raw_text.py
================================================
#!/usr/bin/env python3
"""
Minimal test for raw text training implementation.
Tests basic functionality without heavy dependencies.
"""

import sys
import os
import tempfile
from pathlib import Path
import importlib.util


# Mock the datasets module since it's not installed
class MockDataset:
    def __init__(self, data_dict):
        self.data = data_dict
        self.column_names = list(data_dict.keys())

    def __len__(self):
        return len(next(iter(self.data.values())))

    def __getitem__(self, idx):
        if isinstance(idx, str):
            # Allow accessing columns by name like dataset['text']
            return self.data[idx]
        elif isinstance(idx, int):
            # Allow accessing individual rows by index
            return {key: values[idx] for key, values in self.data.items()}
        else:
            raise TypeError(f"Invalid index type: {type(idx)}")

    @classmethod
    def from_dict(cls, data_dict):
        return cls(data_dict)


# Mock datasets module
datasets_mock = type(sys)("datasets")
datasets_mock.Dataset = MockDataset
sys.modules["datasets"] = datasets_mock

# Import the raw_text module directly to avoid unsloth/__init__.py dependencies
current_dir = os.path.dirname(__file__)
raw_text_path = os.path.join(
    os.path.dirname(current_dir), "unsloth", "dataprep", "raw_text.py"
)

spec = importlib.util.spec_from_file_location("raw_text", raw_text_path)
raw_text_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(raw_text_module)

RawTextDataLoader = raw_text_module.RawTextDataLoader
TextPreprocessor = raw_text_module.TextPreprocessor


def test_raw_text_loader():
    """Test basic RawTextDataLoader functionality."""

    # Mock tokenizer for testing
    class MockTokenizer:
        def __init__(self):
            self.eos_token = "</s>"
            self.eos_token_id = 2  # Mock EOS token ID

        def __call__(self, text, return_tensors = None, add_special_tokens = False):
            words = text.split()
            token_ids = list(range(len(words)))

            if return_tensors == "pt":
                # Mock tensor-like object
                class MockTensor:
                    def __init__(self, data):
                        self.data = data

                    def __getitem__(self, idx):
                        return self.data

                    def __len__(self):
                        return len(self.data)

                    def tolist(self):
                        return self.data

                return {"input_ids": [MockTensor(token_ids)]}
            return {"input_ids": token_ids}

        def decode(self, token_ids, skip_special_tokens = False):
            return " ".join([f"word_{i}" for i in token_ids])

    # Create test file
    test_content = "This is a test file for raw text training. " * 10
    with tempfile.NamedTemporaryFile(mode = "w", suffix = ".txt", delete = False) as f:
        f.write(test_content)
        test_file = f.name

    try:
        # Test loader
        tokenizer = MockTokenizer()
        loader = RawTextDataLoader(tokenizer, chunk_size = 5, stride = 2)

        # Test loading with text output (legacy mode)
        text_dataset = loader.load_from_file(test_file, return_tokenized = False)
        assert len(text_dataset) > 0, "Should create at least one chunk"
        assert "text" in text_dataset.column_names, "Dataset should have 'text' column"

        # Test loading with tokenized output (new efficient mode)
        tokenized_dataset = loader.load_from_file(test_file, return_tokenized = True)
        assert len(tokenized_dataset) > 0, "Should create at least one tokenized chunk"
        assert (
            "input_ids" in tokenized_dataset.column_names
        ), "Dataset should have 'input_ids' column"
        assert (
            "attention_mask" in tokenized_dataset.column_names
        ), "Dataset should have 'attention_mask' column"

        # Verify tokenized data structure
        first_sample = tokenized_dataset[0]
        assert isinstance(first_sample["input_ids"], list), "input_ids should be a list"
        assert isinstance(
            first_sample["attention_mask"], list
        ), "attention_mask should be a list"
        assert len(first_sample["input_ids"]) == len(
            first_sample["attention_mask"]
        ), "input_ids and attention_mask should have same length"

        # Verify labels field exists (for causal LM training)
        assert (
            "labels" in tokenized_dataset.column_names
        ), "Dataset should have 'labels' column"
        assert (
            first_sample["labels"] == first_sample["input_ids"]
        ), "labels should match input_ids"

        # Test constructor validation
        try:
            bad_loader = RawTextDataLoader(tokenizer, chunk_size = 0, stride = 2)
            assert False, "Should raise ValueError for chunk_size=0"
        except ValueError as e:
            assert "chunk_size must be positive" in str(e)

        try:
            bad_loader = RawTextDataLoader(tokenizer, chunk_size = 5, stride = 10)
            assert False, "Should raise ValueError for stride >= chunk_size"
        except ValueError as e:
            assert "stride" in str(e) and "chunk_size" in str(e)

        # Test preprocessor
        preprocessor = TextPreprocessor()
        clean_text = preprocessor.clean_text("  messy   text  \n\n\n  ")
        assert "messy text" in clean_text, "Should clean text properly"

        # Test validation
        stats = preprocessor.validate_dataset(text_dataset)
        assert stats["total_samples"] > 0, "Should count samples"
        assert "warnings" in stats, "Should include warnings"

        print("✅ All tests passed!")
        return True

    except Exception as e:
        print(f"❌ Test failed: {e}")
        return False

    finally:
        # Cleanup
        os.unlink(test_file)


if __name__ == "__main__":
    success = test_raw_text_loader()
    sys.exit(0 if success else 1)


================================================
FILE: tests/utils/__init__.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import time
from contextlib import contextmanager


@contextmanager
def timer(name):
    start = time.time()
    yield
    end = time.time()
    print(f"{name} took {end - start:.2f} seconds")


@contextmanager
def header_footer_context(title: str, char = "-"):
    print()
    print(f"{char}" * 50 + f" {title} " + f"{char}" * 50)
    yield
    print(f"{char}" * (100 + len(title) + 2))
    print()


================================================
FILE: tests/utils/aime_eval.md
================================================
# AIME Dataset Evaluator

A Python module for evaluating language models on the AIME (American Invitational Mathematics Examination) dataset. This evaluator automatically downloads and combines multiple AIME test datasets and provides comprehensive mathematical reasoning assessment.


## Basic Usage

```python
from aime_utils import evaluate_model_aime

# Simple AIME evaluation
results = evaluate_model_aime(
    model=your_model,
    tokenizer=your_tokenizer,
    model_type="base_model",
    temperature=0.3,
    n_sampling=8,
    max_tokens=32768
)

print(f"AIME Accuracy: {results['accuracy']:.1f}%")
print(f"Pass@8: {results['pass_at_k']:.1f}%")
```

## Advanced Usage

```python
from aime_utils import evaluate_model_aime, compare_aime_results

# Evaluate multiple model configurations
all_results = []

# Base model
base_results = evaluate_model_aime(
    model=base_model,
    tokenizer=tokenizer,
    model_type="base",
    temperature=0.3,
    n_sampling=8
)
all_results.append(base_results)

# Fine-tuned model
ft_results = evaluate_model_aime(
    model=finetuned_model,
    tokenizer=tokenizer,
    model_type="finetuned",
    temperature=0.3,
    n_sampling=8
)
all_results.append(ft_results)

# Generate comprehensive comparison
compare_aime_results(all_results)
```

## Dataset Format

The evaluator automatically handles AIME dataset format with problems containing:

- **Problem**: Mathematical question text
- **Answer**: Numerical answer (0-999 range for AIME)
- **Solution**: Step-by-step solution (when available)
- **Source**: Original dataset identifier (test2024, test2025-I, test2025-II)

```python
# Automatic dataset download and formatting
{
    "global_id": 0,
    "original_id": "problem_1",
    "source_dataset": "test2024",
    "problem": "Find the number of...",
    "answer": "123",
    "solution": "Step-by-step solution...",
    "prompt": [
        {"role": "system", "content": "You are a mathematical problem solver..."},
        {"role": "user", "content": "Problem: Find the number of..."}
    ]
}
```


## Configuration Examples

### Conservative Evaluation
```python
# Lower temperature for more consistent answers
results = evaluate_model_aime(
    model=model,
    tokenizer=tokenizer,
    model_type="conservative",
    temperature=0.1,
    n_sampling=4,
    top_p=0.9
)
```

### High-Sample Evaluation
```python
# More samples for better Pass@K estimation
results = evaluate_model_aime(
    model=model,
    tokenizer=tokenizer,
    model_type="high_sample",
    temperature=0.5,
    n_sampling=16,
    max_tokens=16384
)
```

### Memory-Optimized
```python
# Reduced parameters for limited resources
results = evaluate_model_aime(
    model=model,
    tokenizer=tokenizer,
    model_type="lite",
    temperature=0.3,
    n_sampling=4,
    max_tokens=8192
)
```

## Examples

### Complete Model Pipeline Evaluation
```python
from aime_utils import evaluate_model_aime, compare_aime_results

def evaluate_training_pipeline(base_model, finetuned_model, merged_model, tokenizer):
    """Evaluate complete training pipeline on AIME"""

    all_results = []

    # Standard evaluation configuration
    eval_config = {
        "temperature": 0.3,
        "n_sampling": 8,
        "max_tokens": 32768,
        "top_p": 0.95,
        "seed": 0
    }

    # Evaluate base model
    print("Evaluating base model...")
    base_results = evaluate_model_aime(
        model=base_model,
        tokenizer=tokenizer,
        model_type="base",
        **eval_config
    )
    all_results.append(base_results)

    # Evaluate fine-tuned model
    print("Evaluating fine-tuned model...")
    ft_results = evaluate_model_aime(
        model=finetuned_model,
        tokenizer=tokenizer,
        model_type="finetuned",
        **eval_config
    )
    all_results.append(ft_results)

    # Evaluate merged model
    print("Evaluating merged model...")
    merged_results = evaluate_model_aime(
        model=merged_model,
        tokenizer=tokenizer,
        model_type="merged",
        **eval_config
    )
    all_results.append(merged_results)

    # Generate comparison report
    compare_aime_results(all_results)

    return all_results
```

### Quantization Impact Analysis
```python
def analyze_quantization_impact(model_paths, tokenizer):
    """Analyze impact of different quantization levels"""

    quantization_configs = {
        "fp16": {"load_in_4bit": False, "load_in_8bit": False},
        "8bit": {"load_in_4bit": False, "load_in_8bit": True},
        "4bit": {"load_in_4bit": True, "load_in_8bit": False}
    }

    all_results = []

    for quant_name, load_config in quantization_configs.items():
        print(f"Evaluating {quant_name} quantization...")

        # Load model with specific quantization
        model = load_model_with_config(model_paths["merged"], **load_config)

        results = evaluate_model_aime(
            model=model,
            tokenizer=tokenizer,
            model_type=f"merged_{quant_name}",
            temperature=0.3,
            n_sampling=8,
            max_tokens=32768
        )
        all_results.append(results)

        # Cleanup
        del model
        torch.cuda.empty_cache()

    compare_aime_results(all_results)
    return all_results
```

## Output Format

### Individual Evaluation Results
```
🧮 AIME EVALUATION - BASE MODEL
Combined Dataset: test2024 + test2025-I + test2025-II
====================================================================

🎯 Overall Performance:
   Total problems:           45
   Correct answers:         12/45 (26.7%)
   Pass@8:                  31.1%

📈 Performance by Dataset:
    test2024:   4/15 (26.7%)
  test2025-I:   5/15 (33.3%)
 test2025-II:   3/15 (20.0%)

🎖️  AIME Performance:     ✅ EXCELLENT (26.7%)
```

### Comparison Report
```
COMPREHENSIVE AIME MODEL COMPARISON
================================================================================
Model           Accuracy %   Pass@K %   Correct  Total
--------------------------------------------------------------------------------
finetuned       31.1         35.6       14       45
base            26.7         31.1       12       45
merged_4bit     24.4         28.9       11       45

IMPROVEMENT ANALYSIS
==================================================
finetuned vs base:
  Accuracy improvement:  +4.4%
  Pass@K improvement:    +4.5%
```

## Performance Tiers

The evaluator provides performance assessment based on AIME difficulty:

- **🏆 EXCEPTIONAL**: ≥50% accuracy
- **✅ EXCELLENT**: ≥30% accuracy
- **🎯 VERY GOOD**: ≥20% accuracy
- **⚠️ GOOD**: ≥10% accuracy
- **📈 FAIR**: ≥5% accuracy
- **❌ NEEDS IMPROVEMENT**: <5% accuracy


================================================
FILE: tests/utils/aime_eval.py
================================================
"""
AIME Dataset Evaluation Module

This module provides functions to evaluate language models on the combined AIME dataset
(test2024 + test2025-I + test2025-II).
"""

import json
import requests
import os
import re
import logging
from typing import List, Dict, Any
from tqdm import tqdm
from vllm import SamplingParams


def download_and_combine_aime_datasets(data_dir: str = "./data/aime") -> str:
    """Download all AIME datasets and combine them into a single file"""

    datasets = {
        "test2024": "https://raw.githubusercontent.com/GAIR-NLP/AIME-Preview/main/eval/data/aime/test2024.jsonl",
        "test2025-I": "https://raw.githubusercontent.com/GAIR-NLP/AIME-Preview/main/eval/data/aime/test2025-I.jsonl",
        "test2025-II": "https://raw.githubusercontent.com/GAIR-NLP/AIME-Preview/main/eval/data/aime/test2025-II.jsonl",
    }

    os.makedirs(data_dir, exist_ok = True)
    combined_filepath = os.path.join(data_dir, "aime.jsonl")

    # Check if combined file already exists
    if os.path.exists(combined_filepath):
        print(f"Combined AIME dataset already exists at {combined_filepath}")
        return combined_filepath

    print("Downloading and combining AIME datasets...")

    all_problems = []
    global_id = 0

    for dataset_name, url in datasets.items():
        print(f"  Downloading {dataset_name}...")

        try:
            response = requests.get(url)
            response.raise_for_status()

            # Parse each line and add source information
            for line_num, line in enumerate(response.text.strip().split("\n")):
                if line.strip():
                    try:
                        data = json.loads(line)
                        # Add source dataset information and global ID
                        data["source_dataset"] = dataset_name
                        data["original_id"] = data.get("id", line_num)
                        data["global_id"] = global_id
                        global_id += 1
                        all_problems.append(data)
                    except json.JSONDecodeError as e:
                        print(
                            f"    Warning: Error parsing line {line_num + 1} in {dataset_name}: {e}"
                        )
                        continue

        except requests.RequestException as e:
            print(f"    Error downloading {dataset_name}: {e}")
            continue

    # Write combined dataset
    if all_problems:
        with open(combined_filepath, "w", encoding = "utf-8") as f:
            for problem in all_problems:
                f.write(json.dumps(problem, ensure_ascii = False) + "\n")

        print(f"✅ Combined {len(all_problems)} problems from {len(datasets)} datasets")
        print(f"   Saved to: {combined_filepath}")

        # Print summary by dataset
        for dataset_name in datasets.keys():
            count = sum(1 for p in all_problems if p["source_dataset"] == dataset_name)
            print(f"   {dataset_name}: {count} problems")

    else:
        raise RuntimeError("No problems were successfully downloaded")

    return combined_filepath


def load_aime_dataset(data_dir: str = "./data/aime") -> List[Dict[str, Any]]:
    """Load combined AIME dataset and format for evaluation"""

    # Download and combine if needed
    filepath = download_and_combine_aime_datasets(data_dir)

    examples = []
    with open(filepath, "r", encoding = "utf-8") as f:
        for line_num, line in enumerate(f):
            line = line.strip()
            if line:
                try:
                    data = json.loads(line)

                    # Format as expected by our evaluation
                    formatted_example = {
                        "global_id": data.get("global_id", line_num),
                        "original_id": data.get(
                            "original_id", data.get("id", line_num)
                        ),
                        "source_dataset": data.get("source_dataset", "unknown"),
                        "problem": data["problem"],
                        "answer": str(data["answer"]),  # Ensure answer is string
                        "solution": data.get("solution", ""),
                        "url": data.get("url", ""),
                        # Format as chat messages for the model
                        "prompt": [
                            {
                                "role": "system",
                                "content": "You are a mathematical problem solver. Solve the given problem step by step and provide your final answer clearly.",
                            },
                            {
                                "role": "user",
                                "content": f"Problem: {data['problem']}\n\nSolve this step by step and provide your final numerical answer.",
                            },
                        ],
                    }
                    examples.append(formatted_example)

                except json.JSONDecodeError as e:
                    print(f"Error parsing line {line_num + 1}: {e}")
                    continue

    print(f"Loaded {len(examples)} problems from combined AIME dataset")

    # Print breakdown by source
    source_counts = {}
    for example in examples:
        source = example["source_dataset"]
        source_counts[source] = source_counts.get(source, 0) + 1

    for source, count in source_counts.items():
        print(f"  {source}: {count} problems")

    return examples


def extract_aime_answer(response: str) -> str:
    """Extract numerical answer from AIME response"""

    # AIME answers are integers from 0-999
    # Look for patterns like "The answer is 123" or just standalone numbers
    patterns = [
        r"(?:the )?(?:final )?answer is (\d{1,3})",
        r"(?:therefore|thus|so),?\s*(?:the )?(?:final )?answer is (\d{1,3})",
        r"\\boxed\{(\d{1,3})\}",
        r"\$\\boxed\{(\d{1,3})\}\$",
        r"(?:answer|result):\s*(\d{1,3})",
        r"(?:^|\n)\s*(\d{1,3})\s*(?:\n|$)",  # Standalone number
    ]

    response_lower = response.lower().strip()

    for pattern in patterns:
        matches = re.findall(pattern, response_lower, re.MULTILINE | re.IGNORECASE)
        if matches:
            # Get the last match (most likely to be final answer)
            answer = matches[-1]
            try:
                num = int(answer)
                if 0 <= num <= 999:  # AIME answers are in range 0-999
                    return str(num)
            except ValueError:
                continue

    # If no clear pattern found, try to extract any 1-3 digit number
    numbers = re.findall(r"\b(\d{1,3})\b", response)
    if numbers:
        for num_str in reversed(numbers):  # Check from end
            try:
                num = int(num_str)
                if 0 <= num <= 999:
                    return str(num)
            except ValueError:
                continue

    return ""


def get_num_tokens(text, tokenizer_instance):
    """Count tokens in text"""
    if not text:
        return 0
    encoding = tokenizer_instance(text, return_tensors = "pt")
    return len(encoding["input_ids"][0])


def evaluate_model_aime(
    model,
    tokenizer,
    model_type = "base",
    lora_request = None,
    temperature = 0.3,
    n_sampling = 8,
    max_tokens = 32768,
    top_p = 0.95,
    seed = 0,
):
    """Evaluate model on combined AIME dataset with official configuration"""

    print(f"\n{'='*70}")
    print(f"🧮 AIME EVALUATION - {model_type.upper()} MODEL")
    print(f"Combined Dataset: test2024 + test2025-I + test2025-II")
    print(f"{'='*70}")

    # Load combined AIME dataset
    try:
        eval_dataset = load_aime_dataset()
    except Exception as e:
        print(f"Error loading dataset: {e}")
        return None

    if not eval_dataset:
        print("No examples found in dataset")
        return None

    # Initialize tracking variables
    records = {}
    input_tokens = []
    output_tokens = []
    correct_answers = 0

    # Track performance by source dataset
    source_stats = {}
    for example in eval_dataset:
        source = example["source_dataset"]
        if source not in source_stats:
            source_stats[source] = {"total": 0, "correct": 0}
        source_stats[source]["total"] += 1

    # Setup sampling parameters (AIME configuration)
    sampling_params = SamplingParams(
        temperature = temperature,
        top_p = top_p,
        max_tokens = max_tokens,
        n = n_sampling,  # Multiple samples per question
        seed = seed,
    )

    print(f"\n🔧 Configuration:")
    print(f"   Temperature: {temperature}")
    print(f"   Samples per question: {n_sampling}")
    print(f"   Max tokens: {max_tokens}")
    print(f"   Top-p: {top_p}")
    print(f"   Seed: {seed}")

    # Temporarily suppress verbose logging
    original_levels = {}
    loggers_to_suppress = [
        "vllm",
        "vllm.engine",
        "vllm.worker",
        "vllm.model_executor",
        "vllm.executor",
        "ray",
    ]

    for logger_name in loggers_to_suppress:
        logger = logging.getLogger(logger_name)
        original_levels[logger_name] = logger.level
        logger.setLevel(logging.WARNING)

    try:
        print(f"\n🚀 Evaluating {len(eval_dataset)} problems...")

        # Main evaluation loop
        with tqdm(
            total = len(eval_dataset), desc = "Processing AIME problems", unit = "problem"
        ) as pbar:
            for task_id, item in enumerate(eval_dataset):
                try:
                    # Prepare prompt
                    prompt_text = tokenizer.apply_chat_template(
                        item["prompt"], add_generation_prompt = True, tokenize = False
                    )

                    input_tokens.append(get_num_tokens(prompt_text, tokenizer))

                    # Generate multiple responses
                    outputs = model.fast_generate(
                        [prompt_text],
                        sampling_params = sampling_params,
                        lora_request = lora_request,
                        use_tqdm = False,
                    )[0].outputs

                    # Process all generated responses
                    responses = [output.text for output in outputs]
                    extracted_answers = [
                        extract_aime_answer(response) for response in responses
                    ]

                    # Calculate total output tokens
                    total_output_tokens = sum(
                        get_num_tokens(response, tokenizer) for response in responses
                    )
                    output_tokens.append(total_output_tokens)

                    # Check if any answer is correct
                    ground_truth = item["answer"]
                    correct_responses = [
                        ans == ground_truth for ans in extracted_answers
                    ]
                    is_correct = any(correct_responses)

                    if is_correct:
                        correct_answers += 1
                        source_stats[item["source_dataset"]]["correct"] += 1

                    # Store detailed record
                    records[task_id] = {
                        "global_id": item["global_id"],
                        "original_id": item["original_id"],
                        "source_dataset": item["source_dataset"],
                        "problem": item["problem"],
                        "ground_truth": ground_truth,
                        "responses": responses,
                        "extracted_answers": extracted_answers,
                        "correct_responses": correct_responses,
                        "is_correct": is_correct,
                        "input_tokens": input_tokens[-1],
                        "output_tokens": total_output_tokens,
                        "n_correct": sum(correct_responses),
                        "n_total": len(responses),
                        "solution": item.get("solution", ""),
                        "url": item.get("url", ""),
                    }

                    # Update progress
                    current_accuracy = correct_answers / (task_id + 1) * 100
                    pbar.set_postfix(
                        {
                            "accuracy": f"{current_accuracy:.1f}%",
                            "correct": correct_answers,
                            "total": task_id + 1,
                        }
                    )
                    pbar.update(1)

                except Exception as e:
                    print(f"\nError processing problem {task_id}: {str(e)}")
                    records[task_id] = {
                        "global_id": item.get("global_id", task_id),
                        "original_id": item.get("original_id", task_id),
                        "source_dataset": item.get("source_dataset", "unknown"),
                        "problem": item["problem"],
                        "ground_truth": item["answer"],
                        "error": str(e),
                        "is_correct": False,
                    }
                    pbar.update(1)
                    continue

    finally:
        # Restore logging levels
        for logger_name, level in original_levels.items():
            logging.getLogger(logger_name).setLevel(level)

    # Calculate metrics
    total_problems = len(eval_dataset)
    accuracy = correct_answers / total_problems * 100

    # Calculate Pass@k (probability that at least one of k samples is correct)
    pass_at_k_scores = []
    for record in records.values():
        if "n_correct" in record and "n_total" in record:
            n_correct = record["n_correct"]
            n_total = record["n_total"]
            if n_correct > 0:
                pass_at_k_scores.append(1.0)
            else:
                pass_at_k_scores.append(0.0)

    pass_at_k = sum(pass_at_k_scores) / len(pass_at_k_scores) if pass_at_k_scores else 0

    # Calculate per-source accuracies
    source_accuracies = {}
    for source, stats in source_stats.items():
        source_accuracies[source] = (
            (stats["correct"] / stats["total"] * 100) if stats["total"] > 0 else 0
        )

    results = {
        "model_type": model_type,
        "dataset": "aime_combined",
        "total_problems": total_problems,
        "correct_answers": correct_answers,
        "accuracy": accuracy,
        "pass_at_k": pass_at_k * 100,
        "source_stats": source_stats,
        "source_accuracies": source_accuracies,
        "temperature": temperature,
        "n_sampling": n_sampling,
        "max_tokens": max_tokens,
        "top_p": top_p,
        "seed": seed,
        "avg_input_tokens": sum(input_tokens) / len(input_tokens)
        if input_tokens
        else 0,
        "avg_output_tokens": sum(output_tokens) / len(output_tokens)
        if output_tokens
        else 0,
        "max_input_tokens": max(input_tokens) if input_tokens else 0,
        "max_output_tokens": max(output_tokens) if output_tokens else 0,
    }

    # Save results
    filename = f"aime_eval_combined_{model_type}_t{temperature}_n{n_sampling}.json"
    with open(filename, "w", encoding = "utf-8") as f:
        json.dump({"results": results, "records": records}, f, indent = 4)

    # Print comprehensive summary
    print(f"\n{'='*70}")
    print(f"📊 AIME EVALUATION RESULTS - {model_type.upper()}")
    print(f"{'='*70}")

    print(f"\n🎯 Overall Performance:")
    print(f"   Total problems:       {total_problems:>6}")
    print(
        f"   Correct answers:      {correct_answers:>6}/{total_problems} ({accuracy:>5.1f}%)"
    )
    print(f"   Pass@{n_sampling}:              {pass_at_k:>10.1f}%")

    print(f"\n📈 Performance by Dataset:")
    for source, stats in source_stats.items():
        source_acc = source_accuracies[source]
        print(
            f"   {source:>12}: {stats['correct']:>3}/{stats['total']:>3} ({source_acc:>5.1f}%)"
        )

    print(f"\n🔧 Configuration:")
    print(f"   Temperature:          {temperature}")
    print(f"   Samples per problem:  {n_sampling}")
    print(f"   Max tokens:           {max_tokens}")
    print(f"   Top-p:                {top_p}")
    print(f"   Seed:                 {seed}")

    print(f"\n📝 Token Statistics:")
    print(f"   Avg input tokens:     {results['avg_input_tokens']:>10.1f}")
    print(f"   Avg output tokens:    {results['avg_output_tokens']:>10.1f}")
    print(f"   Max input tokens:     {results['max_input_tokens']:>10}")
    print(f"   Max output tokens:    {results['max_output_tokens']:>10}")

    # Performance assessment for AIME
    if accuracy >= 50:
        tier = "🏆 EXCEPTIONAL"
    elif accuracy >= 30:
        tier = "✅ EXCELLENT"
    elif accuracy >= 20:
        tier = "🎯 VERY GOOD"
    elif accuracy >= 10:
        tier = "⚠️  GOOD"
    elif accuracy >= 5:
        tier = "📈 FAIR"
    else:
        tier = "❌ NEEDS IMPROVEMENT"

    print(f"\n🎖️  AIME Performance:     {tier} ({accuracy:.1f}%)")
    print(f"\n💾 Detailed results saved to: {filename}")
    print(f"\n{'='*70}")

    return results


# Comparison functions for multiple model results
def compare_aime_results(all_results):
    """Generate comprehensive comparison for AIME evaluation results"""
    print(f"\n{'='*80}")
    print("COMPREHENSIVE AIME MODEL COMPARISON")
    print(f"{'='*80}")

    # Main comparison table
    print(
        f"{'Model':<15} {'Accuracy %':<12} {'Pass@K %':<10} {'Correct':<8} {'Total':<8}"
    )
    print("-" * 80)

    for result in all_results:
        print(
            f"{result['model_type']:<15} "
            f"{result['accuracy']:<12.1f} "
            f"{result['pass_at_k']:<10.1f} "
            f"{result['correct_answers']:<8} "
            f"{result['total_problems']:<8}"
        )

    # Performance improvement analysis
    if len(all_results) > 1:
        print(f"\n{'='*50}")
        print("IMPROVEMENT ANALYSIS")
        print(f"{'='*50}")

        base_result = all_results[0]  # Assume first is base model

        for i, result in enumerate(all_results[1:], 1):
            print(f"\n{result['model_type']} vs {base_result['model_type']}:")

            accuracy_improvement = result["accuracy"] - base_result["accuracy"]
            pass_k_improvement = result["pass_at_k"] - base_result["pass_at_k"]

            print(f"  Accuracy improvement:  {accuracy_improvement:+.1f}%")
            print(f"  Pass@K improvement:    {pass_k_improvement:+.1f}%")

    # Dataset breakdown
    print(f"\n{'='*50}")
    print("PERFORMANCE BY DATASET")
    print(f"{'='*50}")

    # Get all unique datasets from the first result
    if all_results and "source_accuracies" in all_results[0]:
        datasets = list(all_results[0]["source_accuracies"].keys())

        print(f"{'Model':<15}", end = "")
        for dataset in datasets:
            print(f"{dataset:<15}", end = "")
        print()
        print("-" * (15 + 15 * len(datasets)))

        for result in all_results:
            print(f"{result['model_type']:<15}", end = "")
            for dataset in datasets:
                accuracy = result["source_accuracies"].get(dataset, 0)
                print(f"{accuracy:<15.1f}", end = "")
            print()

    # Save comparison
    comparison_data = {
        "summary": all_results,
        "best_model": max(all_results, key = lambda x: x["accuracy"]),
    }

    with open("aime_model_comparison.json", "w") as f:
        json.dump(comparison_data, f, indent = 4)

    print(
        f"\nBest performing model: {comparison_data['best_model']['model_type']} "
        f"({comparison_data['best_model']['accuracy']:.1f}% accuracy)"
    )


================================================
FILE: tests/utils/cleanup_utils.py
================================================
import gc
import logging
import os
import shutil
import torch
import sys
import warnings


def clear_memory(variables_to_clear = None, verbose = False, clear_all_caches = True):
    """
    Comprehensive memory clearing for persistent memory leaks.

    Args:
        variables_to_clear: List of variable names to clear
        verbose: Print memory status
        clear_all_caches: Clear all types of caches (recommended for memory leaks)
    """

    # Save current logging levels
    saved_log_levels = {}
    for name, logger in logging.Logger.manager.loggerDict.items():
        if isinstance(logger, logging.Logger):
            saved_log_levels[name] = logger.level
    root_level = logging.getLogger().level

    if variables_to_clear is None:
        variables_to_clear = [
            "inputs",
            "model",
            "base_model",
            "processor",
            "tokenizer",
            "base_processor",
            "base_tokenizer",
            "trainer",
            "peft_model",
            "bnb_config",
        ]

    # 1. Clear LRU caches FIRST (very important for memory leaks)
    if clear_all_caches:
        clear_all_lru_caches(verbose)

    # 2. Delete specified variables
    g = globals()
    deleted_vars = []
    for var in variables_to_clear:
        if var in g:
            del g[var]
            deleted_vars.append(var)

    if verbose and deleted_vars:
        print(f"Deleted variables: {deleted_vars}")

    # 3. Multiple garbage collection passes (important for circular references)
    for i in range(3):
        collected = gc.collect()
        if verbose and collected > 0:
            print(f"GC pass {i+1}: collected {collected} objects")

    # 4. CUDA cleanup
    if torch.cuda.is_available():
        # Get memory before cleanup
        if verbose:
            mem_before = torch.cuda.memory_allocated() / 1024**3

        torch.cuda.empty_cache()
        torch.cuda.synchronize()

        # Additional CUDA cleanup for persistent leaks
        if clear_all_caches:
            # Reset memory stats
            torch.cuda.reset_peak_memory_stats()
            torch.cuda.reset_accumulated_memory_stats()

            # Clear JIT cache
            if hasattr(torch.jit, "_state") and hasattr(
                torch.jit._state, "_clear_class_state"
            ):
                torch.jit._state._clear_class_state()

            # Force another CUDA cache clear
            torch.cuda.empty_cache()

        # Final garbage collection
        gc.collect()

        if verbose:
            mem_after = torch.cuda.memory_allocated() / 1024**3
            mem_reserved = torch.cuda.memory_reserved() / 1024**3
            print(
                f"GPU memory - Before: {mem_before:.2f} GB, After: {mem_after:.2f} GB"
            )
            print(f"GPU reserved memory: {mem_reserved:.2f} GB")
            if mem_before > 0:
                print(f"Memory freed: {mem_before - mem_after:.2f} GB")

    # restore original logging levels
    logging.getLogger().setLevel(root_level)
    for name, level in saved_log_levels.items():
        if name in logging.Logger.manager.loggerDict:
            logger = logging.getLogger(name)
            logger.setLevel(level)


def clear_all_lru_caches(verbose = True):
    """Clear all LRU caches in loaded modules."""
    cleared_caches = []

    # Modules to skip to avoid warnings
    skip_modules = {
        "torch.distributed",
        "torchaudio",
        "torch._C",
        "torch.distributed.reduce_op",
        "torchaudio.backend",
    }

    # Create a static list of modules to avoid RuntimeError
    modules = list(sys.modules.items())

    # Method 1: Clear caches in all loaded modules
    for module_name, module in modules:
        if module is None:
            continue

        # Skip problematic modules
        if any(module_name.startswith(skip) for skip in skip_modules):
            continue

        try:
            # Look for functions with lru_cache
            for attr_name in dir(module):
                try:
                    # Suppress warnings when checking attributes
                    with warnings.catch_warnings():
                        warnings.simplefilter("ignore", FutureWarning)
                        warnings.simplefilter("ignore", UserWarning)
                        warnings.simplefilter("ignore", DeprecationWarning)

                    attr = getattr(module, attr_name)
                    if hasattr(attr, "cache_clear"):
                        attr.cache_clear()
                        cleared_caches.append(f"{module_name}.{attr_name}")
                except Exception:
                    continue  # Skip problematic attributes
        except Exception:
            continue  # Skip problematic modules

    # Method 2: Clear specific known caches
    known_caches = [
        "transformers.utils.hub.cached_file",
        "transformers.tokenization_utils_base.get_tokenizer",
        "torch._dynamo.utils.counters",
    ]

    for cache_path in known_caches:
        try:
            parts = cache_path.split(".")
            module = sys.modules.get(parts[0])
            if module:
                obj = module
                for part in parts[1:]:
                    obj = getattr(obj, part, None)
                    if obj is None:
                        break
                if obj and hasattr(obj, "cache_clear"):
                    obj.cache_clear()
                    cleared_caches.append(cache_path)
        except Exception:
            continue  # Skip problematic caches

    if verbose and cleared_caches:
        print(f"Cleared {len(cleared_caches)} LRU caches")


def clear_specific_lru_cache(func):
    """Clear cache for a specific function."""
    if hasattr(func, "cache_clear"):
        func.cache_clear()
        return True
    return False


# Additional utility for monitoring cache sizes
def monitor_cache_sizes():
    """Monitor LRU cache sizes across modules."""
    cache_info = []

    for module_name, module in sys.modules.items():
        if module is None:
            continue
        try:
            for attr_name in dir(module):
                try:
                    attr = getattr(module, attr_name)
                    if hasattr(attr, "cache_info"):
                        info = attr.cache_info()
                        cache_info.append(
                            {
                                "function": f"{module_name}.{attr_name}",
                                "size": info.currsize,
                                "hits": info.hits,
                                "misses": info.misses,
                            }
                        )
                except:
                    pass
        except:
            pass

    return sorted(cache_info, key = lambda x: x["size"], reverse = True)


def safe_remove_directory(path):
    try:
        if os.path.exists(path) and os.path.isdir(path):
            shutil.rmtree(path)
            return True
        else:
            print(f"Path {path} is not a valid directory")
            return False
    except Exception as e:
        print(f"Failed to remove directory {path}: {e}")
        return False


================================================
FILE: tests/utils/data_utils.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch
from datasets import Dataset

QUESTION = "What day was I born?"
ANSWER = "January 1, 2058"
USER_MESSAGE = {"role": "user", "content": QUESTION}
ASSISTANT_MESSAGE = {"role": "assistant", "content": ANSWER}
DTYPE = torch.bfloat16
DEFAULT_MESSAGES = [[USER_MESSAGE, ASSISTANT_MESSAGE]]


def create_instruction_dataset(messages: list[dict] = DEFAULT_MESSAGES):
    dataset = Dataset.from_dict({"messages": messages})
    return dataset


def create_dataset(tokenizer, num_examples: int = None, messages: list[dict] = None):
    dataset = create_instruction_dataset(messages)

    def _apply_chat_template(example):
        chat = tokenizer.apply_chat_template(example["messages"], tokenize = False)
        return {"text": chat}

    dataset = dataset.map(_apply_chat_template, remove_columns = "messages")
    if num_examples is not None:
        if len(dataset) < num_examples:
            num_repeats = num_examples // len(dataset) + 1
            dataset = dataset.repeat(num_repeats)
        dataset = dataset.select(range(num_examples))

    return dataset


def describe_param(
    param: torch.Tensor,
    include_l1: bool = False,
    include_l2: bool = False,
    include_infinity: bool = False,
    as_str: bool = True,
) -> dict:
    """
    Provide a statistical summary of a 2D weight matrix or tensor.
    If as_str is True, the summary is returned as a formatted string.
    Parameters:
        param: torch.Tensor
        include_l1 (bool): Whether to include the L1 norm (sum of absolute values).
        include_l2 (bool): Whether to include the L2 norm (Frobenius norm).
        include_infinity (bool): Whether to include the infinity norm (max absolute value).
        as_str (bool): Whether to return the summary as a formatted string.

    Returns:
        dict: A dictionary with the following statistics:
              - shape: Dimensions of the matrix.
              - mean: Average value.
              - median: Median value.
              - std: Standard deviation.
              - min: Minimum value.
              - max: Maximum value.
              - percentile_25: 25th percentile.
              - percentile_75: 75th percentile.
              Additionally, if enabled:
              - L1_norm: Sum of absolute values.
              - L2_norm: Euclidean (Frobenius) norm.
              - infinity_norm: Maximum absolute value.
    """

    param = param.float()
    summary = {
        "shape": param.shape,
        "mean": param.mean().cpu().item(),
        "std": param.std().cpu().item(),
        "min": param.min().cpu().item(),
        "max": param.max().cpu().item(),
        "percentile_25": param.quantile(0.25).cpu().item(),
        "percentile_50": param.quantile(0.5).cpu().item(),
        "percentile_75": param.quantile(0.75).cpu().item(),
    }

    if include_l1:
        summary["L1_norm"] = param.abs().sum().cpu().item()
    if include_l2:
        summary["L2_norm"] = param.norm().cpu().item()
    if include_infinity:
        summary["infinity_norm"] = param.abs().max().cpu().item()

    return format_summary(summary) if as_str else summary


def format_summary(stats: dict, precision: int = 6) -> str:
    """
    Format the statistical summary dictionary for printing.

    Parameters:
        stats (dict): The dictionary returned by describe_param.
        precision (int): Number of decimal places for floating point numbers.

    Returns:
        str: A formatted string representing the summary.
    """
    lines = []
    for key, value in stats.items():
        if isinstance(value, float):
            formatted_value = f"{value:.{precision}f}"
        elif isinstance(value, (tuple, list)):
            # Format each element in tuples or lists (e.g., the shape)
            formatted_value = ", ".join(str(v) for v in value)
            formatted_value = (
                f"({formatted_value})"
                if isinstance(value, tuple)
                else f"[{formatted_value}]"
            )
        else:
            formatted_value = str(value)
        lines.append(f"{key}: {formatted_value}")
    return "\n".join(lines)


def get_peft_weights(model):
    # ruff: noqa
    is_lora_weight = lambda name: any(s in name for s in ["lora_A", "lora_B"])
    return {
        name: param for name, param in model.named_parameters() if is_lora_weight(name)
    }


def describe_peft_weights(model):
    for name, param in get_peft_weights(model).items():
        yield name, describe_param(param, as_str = True)


def check_responses(responses: list[str], answer: str, prompt: str = None) -> bool:
    for i, response in enumerate(responses, start = 1):
        if answer in response:
            print(f"\u2713 response {i} contains answer")
        else:
            print(f"\u2717 response {i} does not contain answer")
            if prompt is not None:
                response = response.replace(prompt, "")
            print(f" -> response: {response}")


================================================
FILE: tests/utils/hf_utils.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from contextlib import contextmanager, nullcontext
from typing import Callable, Optional

import bitsandbytes as bnb
import torch
from bitsandbytes.functional import dequantize_4bit
from peft import get_peft_model, prepare_model_for_kbit_training
from peft.tuners.lora import LoraConfig, LoraLayer
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)
from transformers.trainer_callback import (
    TrainerCallback,
    TrainerControl,
    TrainerState,
    TrainingArguments,
)
from trl import SFTTrainer


class PeftWeightCallback(TrainerCallback):
    def on_log(
        self,
        args: TrainingArguments,
        state: TrainerState,
        control: TrainerControl,
        logs,
        **kwargs,
    ):
        print(f"DEBUG::CALLBACK::on_log::{state.log_history}")

    def on_train_begin(
        self,
        args: TrainingArguments,
        state: TrainerState,
        control: TrainerControl,
        **kwargs,
    ):
        model = kwargs.get("model")
        assert model is not None
        print(f"DEBUG::CALLBACK::on_train_begin::{kwargs.keys()}")

    def on_step_end(
        self,
        args: TrainingArguments,
        state: TrainerState,
        control: TrainerControl,
        **kwargs,
    ):
        print(f"DEBUG::CALLBACK::on_step_end::{state.global_step}")


@torch.inference_mode()
def generate_responses(
    model,
    tokenizer,
    prompt,
    max_new_tokens: int = 100,
    temperature: float = 0.8,
    do_sample: bool = True,
    num_generations: int = 1,
    skip_special_tokens: bool = True,
    dtype: torch.dtype = None,
):
    inputs = [tokenizer(prompt, return_tensors = "pt") for _ in range(num_generations)]
    keys = inputs[0].keys()
    batched_inputs = {
        key: torch.cat([input[key] for input in inputs], dim = 0).to(model.device)
        for key in keys
    }

    if dtype is not None:
        inference_context = torch.autocast(device_type = "cuda", dtype = dtype)
    else:
        inference_context = nullcontext()

    with inference_context:
        outputs = model.generate(
            **batched_inputs,
            max_new_tokens = max_new_tokens,
            do_sample = do_sample,
            temperature = temperature,
        )

    responses = tokenizer.batch_decode(outputs, skip_special_tokens = skip_special_tokens)
    return responses


def sample_responses(
    model,
    tokenizer,
    prompt,
    temperature: float = 0.8,
    num_generations: int = 1,
    max_new_tokens: int = 100,
    skip_special_tokens: bool = True,
    dtype: torch.dtype = None,
):
    responses = generate_responses(
        model,
        tokenizer,
        prompt,
        temperature = temperature,
        num_generations = num_generations,
        max_new_tokens = max_new_tokens,
        skip_special_tokens = skip_special_tokens,
        dtype = dtype,
    )
    return responses


def setup_tokenizer(model_name, fixup_funcs: list[Callable] = []):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    for fixup_func in fixup_funcs:
        tokenizer = fixup_func(tokenizer)
    return tokenizer


def setup_model(
    model_name,
    quantize: bool = True,
    dtype = torch.bfloat16,
    peft_config = None,
    autocast_adapter: bool = True,
):
    if quantize:
        bnb_config = BitsAndBytesConfig(
            load_in_4bit = True,
            bnb_4bit_use_double_quant = True,
            bnb_4bit_quant_type = "nf4",
            bnb_4bit_compute_dtype = dtype,
        )
    else:
        bnb_config = None

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map = "cuda:0",
        attn_implementation = "sdpa",
        quantization_config = bnb_config,
        torch_dtype = dtype,
    )
    model = prepare_model_for_kbit_training(model) if quantize else model

    if peft_config is not None:
        model = get_peft_model(
            model, peft_config, autocast_adapter_dtype = autocast_adapter
        )

    return model


def get_peft_config(
    lora_rank,
    lora_alpha = None,
    lora_dropout = 0.0,
    bias = "none",
    target_modules = "all-linear",
):
    lora_alpha = lora_alpha or 2 * lora_rank
    peft_config = LoraConfig(
        lora_alpha = lora_alpha,
        lora_dropout = lora_dropout,
        r = lora_rank,
        bias = bias,
        target_modules = target_modules,
        task_type = "CAUSAL_LM",
    )
    return peft_config


def setup_trainer(
    model,
    tokenizer,
    dataset,
    train_args,
    peft_config = None,
    formatting_func = None,
    collator = None,
):
    return SFTTrainer(
        model = model,
        peft_config = peft_config,
        train_dataset = dataset,
        processing_class = tokenizer,
        formatting_func = formatting_func,
        data_collator = collator,
        args = train_args,
    )


def setup_lora(
    model,
    tokenizer,
    dataset,
    peft_config,
    train_args,
    formatting_func = None,
    collator = None,
):
    return LoraConfig(
        model = model,
        peft_config = peft_config,
        train_dataset = dataset,
        processing_class = tokenizer,
        formatting_func = formatting_func,
        data_collator = collator,
        args = train_args,
    )


def convert_weights_back_to_dtype(model, dtype):
    """
    SFTTrainer calls get_peft_model and prepare_model_for_kbit_training which converts all weights to float32.
    This function converts the non-loraweights back to the original dtype.
    """
    for name, param in model.named_parameters():
        if any(s in name for s in ["norm", "embed"]):
            param.data = param.data.to(dtype)


def fix_llama3_tokenizer(tokenizer, padding_side = "right"):
    tokenizer.padding_side = padding_side
    added_vocab = tokenizer.get_added_vocab()
    pad_token = [w for w in added_vocab if "pad" in w]
    assert len(pad_token) == 1
    tokenizer.pad_token = pad_token[0]  # Load dataset from the hub
    return tokenizer


def replace_module(
    module: torch.nn.Module,
    target_module_type: torch.nn.Module,
    conversion_func: Callable,
):
    for child_name, child_module in module.named_children():
        if isinstance(child_module, target_module_type):
            new_module = conversion_func(child_module)
            setattr(module, child_name, new_module)
        else:
            replace_module(child_module, target_module_type, conversion_func)


def _convert_lora_to_linear(module: LoraLayer, adapter_name: str = "default"):
    base_layer = module.get_base_layer()
    weight = base_layer.weight

    assert isinstance(weight, bnb.nn.Params4bit)
    quant_state = weight.quant_state
    original_dtype = quant_state.dtype

    w_dq = dequantize_4bit(weight.data, quant_state).float()
    lora_delta = (
        module.lora_B[adapter_name].weight
        @ module.lora_A[adapter_name].weight
        * module.scaling[adapter_name]
    )
    w_dq += lora_delta.float()
    w_dq = w_dq.to(original_dtype)

    new_module = torch.nn.Linear(
        w_dq.shape[1], w_dq.shape[0], bias = module.base_layer.bias is not None
    )
    new_module.weight.data = torch.nn.Parameter(w_dq, requires_grad = False)
    if module.lora_bias[adapter_name]:
        bias_data = module.base_layer.bias.data + module.lora_B[adapter_name].bias
        new_module.bias.data = torch.nn.Parameter(bias_data, requires_grad = False)
    return new_module


def convert_lora_to_linear(model: torch.nn.Module):
    replace_module(model, LoraLayer, _convert_lora_to_linear)
    assert not any(isinstance(module, LoraLayer) for module in model.modules())
    return model


================================================
FILE: tests/utils/ocr_eval.md
================================================

# OCR Model Evaluator
A comprehensive Python module for evaluating Optical Character Recognition (OCR) models using Word Error Rate (WER) and Character Error Rate (CER) metrics. This evaluator supports vision-language models and provides detailed analysis with comparison capabilities across multiple models

## Basic Usage

```python
from ocr_evaluator import evaluate_ocr_model

# Simple evaluation
avg_wer, avg_cer = evaluate_ocr_model(
    model=your_model,
    processor=your_processor,
    dataset=your_dataset,
    output_dir="evaluation_results"
)

print(f"Average WER: {avg_wer:.4f}")
print(f"Average CER: {avg_cer:.4f}")
```


### Dataset Format

The evaluator expects datasets in a chatml conversational format with the following structure:
```
dataset = [
    {
        "messages": [
            {
                "role": "system",
                "content": [{"type": "text", "text": "You are an OCR system."}]
            },
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "Extract text from this image"},
                    {"type": "image", "image": PIL_Image_object}
                ]
            },
            {
                "role": "assistant",
                "content": [{"type": "text", "text": "Ground truth text"}]
            }
        ]
    },
    # ... more samples
]
```


## Examples

### Document OCR evaluation

```python
from ocr_evaluator import OCRModelEvaluator
from datasets import load_dataset

# Load document OCR dataset
dataset = load_dataset("your-ocr-dataset", split="test")

# Convert to required format
eval_data = [format_document_sample(sample) for sample in dataset]

# Evaluate models
evaluator = OCRModelEvaluator()

# Compare different model configurations
configs = {
    "Standard Model": {"temperature": 1.0, "max_new_tokens": 512},
    "Conservative Model": {"temperature": 0.7, "max_new_tokens": 256},
    "Creative Model": {"temperature": 1.5, "max_new_tokens": 1024}
}

for config_name, params in configs.items():
    wer, cer = evaluator.evaluate_model(
        model=base_model,
        processor=processor,
        dataset=eval_data,
        output_dir=f"document_ocr_{config_name.lower().replace(' ', '_')}",
        **params
    )
    evaluator.add_to_comparison(config_name, wer, cer)

# Generate final report
evaluator.print_model_comparison()
```

### Handwriting Recognition
```python
# Specialized evaluation for handwriting
def evaluate_handwriting_models(models, handwriting_dataset):
    evaluator = OCRModelEvaluator()

    for model_name, (model, processor) in models.items():
        # Adjust parameters for handwriting recognition
        wer, cer = evaluator.evaluate_model(
            model=model,
            processor=processor,
            dataset=handwriting_dataset,
            temperature=1.2,  # Slightly higher for handwriting variety
            max_new_tokens=128,  # Usually shorter text
            output_dir=f"handwriting_{model_name}"
        )
        evaluator.add_to_comparison(f"Handwriting - {model_name}", wer, cer)

    return evaluator.print_model_comparison()
```


================================================
FILE: tests/utils/ocr_eval.py
================================================
"""
OCR Model Evaluation Module

This module provides functionality to evaluate OCR models on datasets with
word error rate (WER) and character error rate (CER) metrics.
"""

import os
import torch
from tqdm import tqdm
import pandas as pd
from jiwer import wer, cer
from qwen_vl_utils import process_vision_info
import matplotlib.pyplot as plt
from typing import List, Dict, Tuple, Optional, Any
import traceback


class OCRModelEvaluator:
    """
    A comprehensive OCR model evaluator that supports multiple models and provides
    detailed analysis with WER and CER metrics.
    """

    def __init__(self):
        """Initialize the OCR evaluator."""
        self.model_comparison_results = {}

    def evaluate_model(
        self,
        model: Any,
        processor: Any,
        dataset: List[Dict],
        output_dir: str = "ocr_evaluation_results",
        max_new_tokens: int = 1024,
        temperature: float = 1.5,
        min_p: float = 0.1,
        verbose: bool = True,
    ) -> Tuple[Optional[float], Optional[float]]:
        """
        Evaluate a model on an OCR dataset.
        """
        # Create output directory if it doesn't exist
        os.makedirs(output_dir, exist_ok = True)

        # Initialize results storage
        results = []

        # Process each sample in the dataset
        for i, sample in enumerate(
            tqdm(dataset, desc = "Evaluating OCR performance", disable = not verbose)
        ):
            try:
                # Extract components from sample
                messages = sample["messages"]

                # Get ground truth, image, and question
                ground_truth, image, question, input_messages = (
                    self._extract_sample_components(messages, i, verbose)
                )

                if ground_truth is None or image is None or question is None:
                    continue

                # Generate model response
                generated_response = self._generate_response(
                    model, processor, input_messages, max_new_tokens, temperature, min_p
                )

                # Calculate metrics
                word_error = wer(ground_truth, generated_response)
                char_error = cer(ground_truth, generated_response)

                # Save individual result
                self._save_individual_result(
                    output_dir,
                    i,
                    question,
                    generated_response,
                    ground_truth,
                    word_error,
                    char_error,
                )

                # Store results for summary
                results.append(
                    {
                        "sample_id": i,
                        "wer": word_error,
                        "cer": char_error,
                        "model_output": generated_response.strip(),
                        "ground_truth": ground_truth,
                        "question": question,
                    }
                )

            except Exception as e:
                if verbose:
                    print(f"Error processing sample {i}: {str(e)}")
                    traceback.print_exc()

        # Generate summary report
        return self._generate_summary_report(results, output_dir, verbose)

    def _extract_sample_components(
        self, messages: List[Dict], sample_idx: int, verbose: bool
    ) -> Tuple[Optional[str], Optional[Any], Optional[str], List[Dict]]:
        """Extract ground truth, image, question, and input messages from sample."""

        # Extract system message (if present)
        system_message = next(
            (msg for msg in messages if msg["role"] == "system"), None
        )

        # Extract user message with the image and question
        user_message = next((msg for msg in messages if msg["role"] == "user"), None)
        if not user_message:
            if verbose:
                print(f"Skipping sample {sample_idx}: No user message found")
            return None, None, None, []

        # Extract assistant message with ground truth
        assistant_message = next(
            (msg for msg in messages if msg["role"] == "assistant"), None
        )
        if not assistant_message:
            if verbose:
                print(
                    f"Skipping sample {sample_idx}: No assistant message (ground truth) found"
                )
            return None, None, None, []

        # Extract ground truth text
        ground_truth = None
        for content_item in assistant_message["content"]:
            if content_item["type"] == "text":
                ground_truth = content_item["text"]
                break

        if not ground_truth:
            if verbose:
                print(
                    f"Skipping sample {sample_idx}: No text found in assistant message"
                )
            return None, None, None, []

        # Extract image and question from user message
        image = None
        question = None

        for content_item in user_message["content"]:
            if content_item["type"] == "image":
                image = content_item["image"]
            elif content_item["type"] == "text":
                question = content_item["text"]

        if not image:
            if verbose:
                print(f"Skipping sample {sample_idx}: No image found in user message")
            return None, None, None, []

        if not question:
            if verbose:
                print(
                    f"Skipping sample {sample_idx}: No question found in user message"
                )
            return None, None, None, []

        # Construct messages for the model input (excluding assistant message)
        input_messages = []
        if system_message:
            input_messages.append(system_message)
        input_messages.append(user_message)

        return ground_truth, image, question, input_messages

    def _generate_response(
        self,
        model: Any,
        processor: Any,
        input_messages: List[Dict],
        max_new_tokens: int,
        temperature: float,
        min_p: float,
    ) -> str:
        """Generate response from the model."""

        # Preparation for inference using Qwen's specific processing
        text = processor.apply_chat_template(
            input_messages, tokenize = False, add_generation_prompt = True
        )

        # Process vision info (images/videos) from messages
        image_inputs, video_inputs = process_vision_info(input_messages)

        # Create model inputs
        inputs = processor(
            text = [text],
            images = image_inputs,
            videos = video_inputs,
            padding = True,
            return_tensors = "pt",
        )
        inputs = inputs.to(model.device)

        # Generate response
        with torch.no_grad():
            generated_ids = model.generate(
                **inputs,
                max_new_tokens = max_new_tokens,
                temperature = temperature,
                min_p = min_p,
                use_cache = True,
            )

        # Extract only the generated part (not the input)
        generated_ids_trimmed = [
            out_ids[len(in_ids) :]
            for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
        ]

        # Decode the generated text
        generated_response = processor.batch_decode(
            generated_ids_trimmed,
            skip_special_tokens = True,
            clean_up_tokenization_spaces = False,
        )[0]

        return generated_response

    def _save_individual_result(
        self,
        output_dir: str,
        sample_idx: int,
        question: str,
        generated_response: str,
        ground_truth: str,
        word_error: float,
        char_error: float,
    ):
        """Save individual sample result to file."""
        output_file = os.path.join(output_dir, f"sample_{sample_idx}.txt")
        with open(output_file, "w", encoding = "utf-8") as f:
            f.write(f"Sample {sample_idx}\n")
            f.write(f"Question: {question}\n\n")
            f.write(f"Model output:\n{generated_response.strip()}\n\n")
            f.write(f"Ground truth:\n{ground_truth}\n\n")
            f.write(f"WER: {word_error:.4f}, CER: {char_error:.4f}")

    def _generate_summary_report(
        self, results: List[Dict], output_dir: str, verbose: bool
    ) -> Tuple[Optional[float], Optional[float]]:
        """Generate and save summary report."""
        if not results:
            if verbose:
                print("No results to summarize.")
            return None, None

        df = pd.DataFrame(results)

        # Calculate overall averages
        avg_wer = df["wer"].mean()
        avg_cer = df["cer"].mean()

        # Save average metrics
        with open(os.path.join(output_dir, "avg_metrics.txt"), "w") as f:
            f.write(f"Average WER: {avg_wer:.4f}\n")
            f.write(f"Average CER: {avg_cer:.4f}\n")

        # Save detailed results
        df.to_csv(os.path.join(output_dir, "detailed_results.csv"), index = False)

        if verbose:
            print("\nResults Summary:")
            print(f"Average WER: {avg_wer:.4f}")
            print(f"Average CER: {avg_cer:.4f}")
            print(f"\nDetailed results saved to {output_dir}/")

        return avg_wer, avg_cer

    def add_to_comparison(self, model_name: str, wer: float, cer: float):
        """Add model results to the comparison tracker."""
        self.model_comparison_results[model_name] = {"wer": wer, "cer": cer}

    def print_model_comparison(
        self, save_csv: bool = True, save_plot: bool = True
    ) -> Optional[pd.DataFrame]:
        """Print a comparison of all models evaluated so far."""
        if not self.model_comparison_results:
            print("No model results available for comparison")
            return None

        print("\n==== MODEL COMPARISON REPORT ====")

        # Create a comparison dataframe
        comparison_df = pd.DataFrame(
            {
                "Model": list(self.model_comparison_results.keys()),
                "WER": [
                    results["wer"] for results in self.model_comparison_results.values()
                ],
                "CER": [
                    results["cer"] for results in self.model_comparison_results.values()
                ],
            }
        )

        # Sort by WER (best performance first)
        comparison_df = comparison_df.sort_values("WER")

        # Display the comparison table
        print("\nComparison Table (sorted by WER):")
        print(comparison_df.to_string(index = False))

        # Save the comparison table
        if save_csv:
            comparison_file = "model_comparison_results.csv"
            comparison_df.to_csv(comparison_file, index = False)
            print(f"\nComparison table saved to {comparison_file}")

        # Generate a bar chart visualization
        if save_plot:
            self._create_comparison_plot(comparison_df)

        return comparison_df

    def _create_comparison_plot(self, comparison_df: pd.DataFrame):
        """Create and save comparison plot."""
        plt.figure(figsize = (12, 6))

        # Plot WER
        plt.subplot(1, 2, 1)
        plt.bar(comparison_df["Model"], comparison_df["WER"], color = "skyblue")
        plt.title("Word Error Rate Comparison")
        plt.ylabel("WER (lower is better)")
        plt.ylim(bottom = 0)
        plt.xticks(rotation = 45, ha = "right")

        # Plot CER
        plt.subplot(1, 2, 2)
        plt.bar(comparison_df["Model"], comparison_df["CER"], color = "lightgreen")
        plt.title("Character Error Rate Comparison")
        plt.ylabel("CER (lower is better)")
        plt.ylim(bottom = 0)
        plt.xticks(rotation = 45, ha = "right")

        plt.tight_layout()
        plt.savefig("ocr_model_comparison.png")
        plt.show()

        print(f"\nVisualization saved to ocr_model_comparison.png")

    def get_comparison_results(self) -> Dict[str, Dict[str, float]]:
        """Get the current comparison results."""
        return self.model_comparison_results.copy()

    def clear_comparison_results(self):
        """Clear all comparison results."""
        self.model_comparison_results.clear()


def evaluate_ocr_model(
    model, processor, dataset, output_dir = "ocr_evaluation_results", **kwargs
):
    """
    Convenience function that maintains backward compatibility with the original function.
    """
    evaluator = OCRModelEvaluator()
    return evaluator.evaluate_model(model, processor, dataset, output_dir, **kwargs)


def create_evaluator():
    """Create a new OCR evaluator instance."""
    return OCRModelEvaluator()


================================================
FILE: tests/utils/os_utils.py
================================================
import subprocess
import sys
import os
import shutil
import importlib


def detect_package_manager():
    """Detect the available package manager"""
    package_managers = {
        "apt": "/usr/bin/apt",
        "yum": "/usr/bin/yum",
        "dnf": "/usr/bin/dnf",
        "pacman": "/usr/bin/pacman",
        "zypper": "/usr/bin/zypper",
    }

    for pm, path in package_managers.items():
        if os.path.exists(path):
            return pm
    return None


def check_package_installed(package_name, package_manager = None):
    """Check if a package is installed using the system package manager"""

    if package_manager is None:
        package_manager = detect_package_manager()

    if package_manager is None:
        print("Warning: Could not detect package manager")
        return None

    try:
        if package_manager == "apt":
            # Check with dpkg
            result = subprocess.run(
                ["dpkg", "-l", package_name], capture_output = True, text = True
            )
            return result.returncode == 0

        elif package_manager in ["yum", "dnf"]:
            # Check with rpm
            result = subprocess.run(
                ["rpm", "-q", package_name], capture_output = True, text = True
            )
            return result.returncode == 0

        elif package_manager == "pacman":
            result = subprocess.run(
                ["pacman", "-Q", package_name], capture_output = True, text = True
            )
            return result.returncode == 0

        elif package_manager == "zypper":
            result = subprocess.run(
                ["zypper", "se", "-i", package_name], capture_output = True, text = True
            )
            return package_name in result.stdout

    except Exception as e:
        print(f"Error checking package: {e}")
        return None


def require_package(package_name, executable_name = None):
    """Require a package to be installed, exit if not found"""

    # First check if executable is in PATH (most reliable)
    if executable_name:
        if shutil.which(executable_name):
            print(f"✓ {executable_name} is available")
            return

    # Then check with package manager
    pm = detect_package_manager()
    is_installed = check_package_installed(package_name, pm)

    if is_installed:
        print(f"✓ Package {package_name} is installed")
        return

    # Package not found - show installation instructions
    print(f"❌ Error: {package_name} is not installed")
    print(f"\nPlease install {package_name} using your system package manager:")

    install_commands = {
        "apt": f"sudo apt update && sudo apt install {package_name}",
        "yum": f"sudo yum install {package_name}",
        "dnf": f"sudo dnf install {package_name}",
        "pacman": f"sudo pacman -S {package_name}",
        "zypper": f"sudo zypper install {package_name}",
    }

    if pm and pm in install_commands:
        print(f"  {install_commands[pm]}")
    else:
        for pm_name, cmd in install_commands.items():
            print(f"  {pm_name}: {cmd}")

    print(f"\nAlternatively, install with conda:")
    print(f"  conda install -c conda-forge {package_name}")

    print(f"\nPlease install the required package and run the script again.")
    sys.exit(1)


# Usage
# require_package("ffmpeg", "ffmpeg")


def require_python_package(package_name, import_name = None, pip_name = None):
    """Require a Python package to be installed, exit if not found"""
    if import_name is None:
        import_name = package_name
    if pip_name is None:
        pip_name = package_name

    if importlib.util.find_spec(import_name) is None:
        print(f"❌ Error: Python package '{package_name}' is not installed")
        print(f"\nPlease install {package_name} using pip:")
        print(f"  pip install {pip_name}")
        print(f"  # or with conda:")
        print(f"  conda install {pip_name}")
        print(f"\nAfter installation, run this script again.")
        sys.exit(1)
    else:
        print(f"✓ Python package '{package_name}' is installed")


================================================
FILE: tests/utils/perplexity_eval.md
================================================
# Language Model Perplexity Evaluator

A Python module for evaluating language models using perplexity metrics with sliding window approach for long sequences. This evaluator provides efficient computation of perplexity scores across datasets with model comparison capabilities.

## Basic Usage

```python
from perplexity_evaluator import ppl_model, add_to_comparison, print_model_comparison

# Simple perplexity evaluation
dataset = {"text": ["Your text samples here...", "Another text sample..."]}
perplexity = ppl_model(model, tokenizer, dataset)

print(f"Model Perplexity: {perplexity:.4f}")

# Add to comparison tracker
add_to_comparison("My Model", perplexity)
print_model_comparison()
```


================================================
FILE: tests/utils/perplexity_eval.py
================================================
from tqdm import tqdm
import torch
import pandas as pd

model_comparison_results = {}
# return the perplexity of the model on the dataset
# The perplexity is computed on each example, individually, with a sliding window for examples longer than 512 tokens.


def ppl_model(model, tokenizer, dataset):
    nlls = []
    max_length = 2048
    stride = 512
    for s in tqdm(range(len(dataset["text"]))):
        encodings = tokenizer(dataset["text"][s], return_tensors = "pt")
        seq_len = encodings.input_ids.size(1)
        prev_end_loc = 0
        for begin_loc in range(0, seq_len, stride):
            end_loc = min(begin_loc + max_length, seq_len)
            trg_len = end_loc - prev_end_loc
            input_ids = encodings.input_ids[:, begin_loc:end_loc].to("cuda")
            target_ids = input_ids.clone()
            target_ids[:, :-trg_len] = -100
            # Create attention mask based on pad token id
            pad_token_id = (
                tokenizer.pad_token_id if tokenizer.pad_token_id is not None else 0
            )
            attention_mask = (input_ids != pad_token_id).long()
            with torch.no_grad():
                outputs = model(
                    input_ids, labels = target_ids, attention_mask = attention_mask
                )
                neg_log_likelihood = outputs.loss
            nlls.append(neg_log_likelihood)
            prev_end_loc = end_loc
            if end_loc == seq_len:
                break
    ppl = torch.exp(torch.stack(nlls).mean())
    return ppl


# --------------------------------------------------------------------


## ----------- Reporting helper function ----------- ##


# Create a simple function to add results to the comparison
def add_to_comparison(model_name, ppl):
    """Add model results to the comparison tracker"""
    model_comparison_results[model_name] = {"ppl": ppl}
    # return model_comparison_results


# Create a function to print the comparison report whenever needed
def print_model_comparison():
    """Print a comparison of all models evaluated so far"""
    if not model_comparison_results:
        print("No model results available for comparison")
        return

    print("\n==== MODEL COMPARISON REPORT ====")

    # Create a comparison dataframe
    comparison_df = pd.DataFrame(
        {
            "Model": list(model_comparison_results.keys()),
            # "Perplexity": [results["ppl"] for results in model_comparison_results.values()],
            "Perplexity": [
                # Convert tensors to CPU and then to float if needed
                results["ppl"].cpu().item()
                if torch.is_tensor(results["ppl"])
                else results["ppl"]
                for results in model_comparison_results.values()
            ],
        }
    )

    # Display the comparison table
    print("\nComparison Table:")
    print(comparison_df.to_string(index = False))


================================================
FILE: tests/utils/test_attention_masks.py
================================================
# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""Unit tests for packed-attention mask helpers with sliding-window logic."""

import math

import torch

from unsloth.utils import attention_dispatch
from unsloth.utils import packing as packing_utils


def _make_seq_info(lengths):
    lengths = torch.tensor(lengths, dtype = torch.int32)
    cu = torch.cat(
        [
            torch.zeros(1, dtype = torch.int32),
            torch.cumsum(lengths, dim = 0, dtype = torch.int32),
        ]
    )
    max_len = int(lengths.max().item())
    return lengths, cu, max_len


def test_sdpa_packed_attention_mask_sliding_window():
    seq_info = _make_seq_info([5, 3])
    mask = packing_utils.build_sdpa_packed_attention_mask(
        seq_info,
        dtype = torch.float32,
        device = torch.device("cpu"),
        sliding_window = 3,
    )

    assert mask.shape == (1, 1, 8, 8)

    block_first = mask[0, 0, :5, :5]
    upper = torch.triu(torch.ones_like(block_first), diagonal = 1).bool()
    assert torch.all(block_first[upper] == float("-inf"))
    assert block_first[3, 0].item() == float("-inf")
    assert block_first[4, 1].item() == float("-inf")
    assert block_first[4, 2].item() > -math.inf
    assert mask[0, 0, 0, 6].item() == float("-inf")


def test_xformers_block_mask_sliding_window(monkeypatch):
    class _FakeMask:
        def __init__(self, lengths, window = None):
            self.lengths = lengths
            self.window = window

        @classmethod
        def from_seqlens(cls, lengths):
            return cls(tuple(lengths))

        def make_local_attention(self, window_size):
            return _FakeMask(self.lengths, window = window_size)

    monkeypatch.setattr(packing_utils, "_XFormersBlockMask", _FakeMask, raising = False)

    seq_info = _make_seq_info([4, 4])
    mask = packing_utils.build_xformers_block_causal_mask(
        seq_info,
        sliding_window = 2,
    )

    assert isinstance(mask, _FakeMask)
    assert mask.window == 2


def test_run_attention_sdpa_passes_sliding_window(monkeypatch):
    seq_info = _make_seq_info([3, 2])
    sliding_window = 2

    original_builder = attention_dispatch.build_sdpa_packed_attention_mask
    captured = {}

    def _capture_builder(seq_info_arg, *, dtype, device, sliding_window = None):
        captured["window"] = sliding_window
        return original_builder(
            seq_info_arg,
            dtype = dtype,
            device = device,
            sliding_window = sliding_window,
        )

    monkeypatch.setattr(
        attention_dispatch,
        "build_sdpa_packed_attention_mask",
        _capture_builder,
    )

    def _fake_sdpa(Q, K, V, **kwargs):
        captured["mask"] = kwargs.get("attn_mask")
        return torch.zeros_like(Q)

    monkeypatch.setattr(attention_dispatch, "scaled_dot_product_attention", _fake_sdpa)

    config = attention_dispatch.AttentionConfig(
        backend = attention_dispatch.SDPA,
        n_kv_heads = 1,
        n_groups = 1,
    )

    context = attention_dispatch.AttentionContext(
        bsz = 1,
        q_len = 5,
        kv_seq_len = 5,
        n_heads = 1,
        head_dim = 1,
        requires_grad = False,
        seq_info = seq_info,
        attention_mask = None,
        causal_mask = None,
        sliding_window = sliding_window,
    )

    Q = torch.zeros(1, 1, 5, 1)
    K = torch.zeros_like(Q)
    V = torch.zeros_like(Q)

    attention_dispatch.run_attention(
        config = config,
        context = context,
        Q = Q,
        K = K,
        V = V,
    )

    assert captured["window"] == sliding_window
    mask = captured["mask"]
    assert mask is not None and mask.shape == (1, 1, 5, 5)
    assert mask[0, 0, 4, 1].item() == float("-inf")


def test_run_attention_xformers_passes_sliding_window(monkeypatch):
    seq_info = _make_seq_info([4])
    sliding_window = 3

    class _FakeBias:
        pass

    captured = {}

    def _fake_builder(seq_info_arg, *, sliding_window = None, base_mask = None):
        captured["window"] = sliding_window
        captured["base"] = base_mask
        return _FakeBias()

    def _fake_attention(Q, K, V, attn_bias = None, **_):
        captured["bias"] = attn_bias
        return torch.zeros_like(Q)

    monkeypatch.setattr(
        attention_dispatch, "build_xformers_block_causal_mask", _fake_builder
    )
    monkeypatch.setattr(
        attention_dispatch, "xformers_attention", _fake_attention, raising = False
    )
    monkeypatch.setattr(
        attention_dispatch, "XFORMERS_BLOCK_DIAG_CLS", _FakeBias, raising = False
    )

    config = attention_dispatch.AttentionConfig(
        backend = attention_dispatch.XFORMERS,
        n_kv_heads = 1,
        n_groups = 1,
    )

    context = attention_dispatch.AttentionContext(
        bsz = 1,
        q_len = 4,
        kv_seq_len = 4,
        n_heads = 1,
        head_dim = 1,
        requires_grad = False,
        seq_info = seq_info,
        attention_mask = None,
        causal_mask = None,
        sliding_window = sliding_window,
    )

    Q = torch.zeros(1, 1, 4, 1)
    K = torch.zeros_like(Q)
    V = torch.zeros_like(Q)

    attention_dispatch.run_attention(
        config = config,
        context = context,
        Q = Q,
        K = K,
        V = V,
    )

    assert captured["window"] == sliding_window
    assert isinstance(captured["bias"], _FakeBias)


def test_run_attention_flash_varlen_receives_window_and_softcap(monkeypatch):
    seq_info = _make_seq_info([4])
    sliding_window = 3
    softcap = 0.5
    window_tuple = (sliding_window, sliding_window)

    captured = {}

    def _fake_flash_varlen(Q, K, V, cu_q, cu_k, max_q, max_k, **kwargs):
        captured["kwargs"] = kwargs
        return torch.zeros_like(Q)

    monkeypatch.setattr(
        attention_dispatch,
        "flash_attn_varlen_func",
        _fake_flash_varlen,
    )
    monkeypatch.setattr(attention_dispatch, "HAS_FLASH_ATTENTION", True)

    config = attention_dispatch.AttentionConfig(
        backend = attention_dispatch.FLASH_VARLEN,
        n_kv_heads = 1,
        n_groups = 1,
        flash_varlen_kwargs = {
            "dropout_p": 0.0,
            "softmax_scale": 1.0,
            "causal": True,
            "softcap": softcap,
            "window_size": window_tuple,
        },
    )

    context = attention_dispatch.AttentionContext(
        bsz = 1,
        q_len = 4,
        kv_seq_len = 4,
        n_heads = 1,
        head_dim = 2,
        requires_grad = False,
        seq_info = seq_info,
        attention_mask = None,
        causal_mask = None,
        sliding_window = sliding_window,
    )

    Q = torch.zeros(1, 1, 4, 2)
    K = torch.zeros_like(Q)
    V = torch.zeros_like(Q)

    attention_dispatch.run_attention(
        config = config,
        context = context,
        Q = Q,
        K = K,
        V = V,
    )

    assert captured["kwargs"]["softcap"] == softcap
    assert captured["kwargs"]["window_size"] == window_tuple


"""Unit tests for packed-attention mask helpers with sliding-window logic."""


================================================
FILE: tests/utils/test_packing.py
================================================
# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

from unsloth import FastLanguageModel
from unsloth.utils import attention_dispatch as attention_dispatch_utils
from unsloth.utils.packing import (
    configure_padding_free,
    configure_sample_packing,
    enable_padding_free_metadata,
    enable_sample_packing,
    mask_packed_sequence_boundaries,
)

from contextlib import ExitStack
from types import SimpleNamespace
from unittest.mock import patch

import pytest
import torch
from datasets import Dataset
from trl import SFTConfig, SFTTrainer
from trl.trainer.sft_trainer import DataCollatorForLanguageModeling


def _build_packed_training_setup(tmp_path, device):
    dtype = None
    if device.type == "cuda":
        if torch.cuda.is_bf16_supported():
            dtype = torch.bfloat16
        else:
            dtype = torch.float16

    try:
        model, tokenizer = FastLanguageModel.from_pretrained(
            model_name = "hf-internal-testing/tiny-random-LlamaForCausalLM",
            max_seq_length = 64,
            load_in_4bit = False,
            dtype = dtype,
        )
    except OSError as exc:  # pragma: no cover - offline CI
        pytest.skip(f"Requires access to tiny llama checkpoint: {exc}")

    model.to(device)

    dataset = Dataset.from_dict(
        {
            "text": [
                "Hello world!",
                "Short sample.",
                "This is a slightly longer packed example to test batching.",
                "Another response to include in the batch.",
            ]
        }
    )

    training_args = SFTConfig(
        per_device_train_batch_size = 1,
        per_device_eval_batch_size = 1,
        gradient_accumulation_steps = 1,
        dataset_text_field = "text",
        max_length = 64,
        logging_steps = 1,
        max_steps = 1,
        fp16 = device.type == "cuda" and not torch.cuda.is_bf16_supported(),
        bf16 = device.type == "cuda" and torch.cuda.is_bf16_supported(),
        dataset_num_proc = 1,
        output_dir = str(tmp_path),
        packing = True,
    )

    trainer = SFTTrainer(
        model = model,
        processing_class = tokenizer,
        train_dataset = dataset,
        args = training_args,
    )

    enable_sample_packing(model, trainer)

    dataloader = trainer.get_train_dataloader()
    batch = next(iter(dataloader))

    model_device = next(model.parameters()).device

    for key, value in list(batch.items()):
        if torch.is_tensor(value):
            batch[key] = value.to(model_device)

    from unsloth.models import llama as llama_mod

    return model, batch, trainer, llama_mod


def _trim_batch_to_total_tokens(data, total_tokens):
    def _trim_tensor(t: torch.Tensor):
        if t.ndim >= 2 and t.size(1) > total_tokens:
            return t[:, :total_tokens].contiguous()
        return t

    trimmed = {}
    for key, value in data.items():
        if torch.is_tensor(value):
            trimmed[key] = _trim_tensor(value)
        else:
            trimmed[key] = value
    return trimmed


def test_mask_packed_sequence_boundaries_marks_single_row():
    shift_labels = torch.arange(6, dtype = torch.long).view(1, 6)
    changed = mask_packed_sequence_boundaries(
        shift_labels,
        torch.tensor([2, 1, 3], dtype = torch.int32),
    )
    assert changed is True
    flat = shift_labels.view(-1)
    assert flat[1].item() == -100
    assert flat[2].item() == -100
    assert flat[5].item() == -100
    assert flat[0].item() != -100


def test_mask_packed_sequence_boundaries_across_multiple_rows():
    shift_labels = torch.arange(10, dtype = torch.long).view(2, 5)
    lengths = torch.tensor([3, 2, 4, 1], dtype = torch.int32)
    changed = mask_packed_sequence_boundaries(shift_labels, lengths)
    assert changed is True
    flat = shift_labels.view(-1)
    for idx in (2, 4, 8, 9):
        assert flat[idx].item() == -100
    assert torch.any(flat != -100)


def test_configure_sample_packing():
    config = SimpleNamespace()
    configure_sample_packing(config)

    assert config.packing is True
    assert config.padding_free is True
    assert config.remove_unused_columns is False


def test_configure_padding_free():
    config = SimpleNamespace(remove_unused_columns = True)
    configure_padding_free(config)

    assert config.padding_free is True
    assert config.remove_unused_columns is False


class _DummyChild(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.max_seq_length = 8


class _DummyModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.max_seq_length = 16
        self.child = _DummyChild()
        self.config = SimpleNamespace(_attn_implementation = "sdpa")
        self.generation_config = SimpleNamespace(attn_implementation = "sdpa")


class _DummyTrainer:
    def __init__(self):
        self.args = SimpleNamespace(remove_unused_columns = True)
        collator_args = {
            "pad_token_id": 0,
            "completion_only_loss": False,
            "return_tensors": "pt",
        }
        optional_flags = [
            {"padding_free": True, "return_position_ids": False},
            {"padding_free": True},
            {},
        ]
        for extra in optional_flags:
            try:
                self.data_collator = DataCollatorForLanguageModeling(
                    **collator_args, **extra
                )
                break
            except TypeError:
                continue
        # Ensure attributes exist even if the constructor did not accept them
        if not hasattr(self.data_collator, "padding_free"):
            self.data_collator.padding_free = True
        if not hasattr(self.data_collator, "return_position_ids"):
            self.data_collator.return_position_ids = False


class _PaddingFreeCollator:
    def __init__(self):
        self.padding_free = True
        self.return_position_ids = False
        self.calls = 0

    def torch_call(self, examples):
        self.calls += 1
        return {
            "input_ids": torch.tensor([[0]], dtype = torch.long),
            "examples_seen": self.calls,
        }


def test_enable_sample_packing():
    model = _DummyModel()
    trainer = _DummyTrainer()

    enable_sample_packing(model, trainer)

    # model hierarchy should now allow packed overlength inputs
    assert getattr(model, "_unsloth_allow_packed_overlength") is True
    assert getattr(model.child, "_unsloth_allow_packed_overlength") is True

    collator = trainer.data_collator
    assert collator.return_position_ids is True
    assert getattr(collator, "_unsloth_packing_wrapped") is True

    examples = [
        {
            "input_ids": [0, 1, 2],
            "labels": [0, 1, 2],
            "seq_lengths": [2, 1],
        },
        {
            "input_ids": [3, 4, 5],
            "labels": [3, 4, 5],
            "seq_lengths": [3],
        },
    ]
    batch = collator.torch_call(examples)

    # packed lengths are aggregated into a single tensor
    assert "packed_seq_lengths" in batch
    assert torch.equal(
        batch["packed_seq_lengths"],
        torch.tensor([2, 1, 3], dtype = torch.int32),
    )

    assert batch["input_ids"].shape == (1, 6)
    expected_positions = torch.tensor([0, 1, 0, 0, 1, 2], dtype = torch.long)
    assert torch.equal(batch["position_ids"].view(-1)[:6], expected_positions)


def test_enable_sample_packing_trl_collator(tmp_path):
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model, _, trainer, _ = _build_packed_training_setup(tmp_path, device)

    enable_sample_packing(model, trainer)

    examples = [
        {
            "input_ids": [0, 1, 2],
            "labels": [0, 1, 2],
            "seq_lengths": [2, 1],
        },
        {
            "input_ids": [3, 4, 5],
            "labels": [3, 4, 5],
            "seq_lengths": [3],
        },
    ]

    batch = trainer.data_collator.torch_call(examples)

    assert batch["input_ids"].shape == (1, 6)
    assert torch.equal(
        batch["packed_seq_lengths"],
        torch.tensor([2, 1, 3], dtype = torch.int32),
    )

    expected_positions = torch.tensor([0, 1, 0, 0, 1, 2], dtype = torch.long)
    assert torch.equal(batch["position_ids"].view(-1)[:6], expected_positions)

    if hasattr(trainer, "accelerator"):
        trainer.accelerator.free_memory()


def test_enable_padding_free_metadata():
    model = _DummyModel()
    trainer = SimpleNamespace(
        args = SimpleNamespace(remove_unused_columns = True),
        data_collator = _PaddingFreeCollator(),
    )

    enable_padding_free_metadata(model, trainer)

    assert getattr(model, "_unsloth_allow_packed_overlength") is True
    assert getattr(model.child, "_unsloth_allow_packed_overlength") is True

    collator = trainer.data_collator
    assert collator.return_position_ids is True
    assert getattr(collator, "_unsloth_padding_free_lengths_wrapped") is True

    examples = [
        {"input_ids": [0, 1, 2]},
        {"input_ids": [3, 4]},
    ]
    batch = collator.torch_call(examples)
    assert torch.equal(
        batch["packed_seq_lengths"],
        torch.tensor([3, 2], dtype = torch.int32),
    )
    assert trainer.args.remove_unused_columns is False


def test_packing_sdpa(tmp_path):
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model, batch, trainer, llama_mod = _build_packed_training_setup(tmp_path, device)

    assert "packed_seq_lengths" in batch
    assert "attention_mask" not in batch
    assert batch["packed_seq_lengths"].dtype == torch.int32

    total_tokens = batch["input_ids"].size(-1)
    assert int(batch["packed_seq_lengths"].sum().item()) == total_tokens

    packed_tokens = int(batch["packed_seq_lengths"].sum().item())
    assert "position_ids" in batch
    flat_positions = batch["position_ids"].reshape(-1)[:packed_tokens]
    expected_positions = torch.cat(
        [
            torch.arange(length, dtype = torch.long)
            for length in batch["packed_seq_lengths"].tolist()
        ]
    )
    assert torch.equal(flat_positions.cpu(), expected_positions)
    inputs = _trim_batch_to_total_tokens(batch, packed_tokens)

    seq_info = llama_mod.get_packed_info_from_kwargs(
        {"packed_seq_lengths": batch["packed_seq_lengths"]},
        inputs["input_ids"].device,
    )
    assert seq_info is not None

    original_mask = attention_dispatch_utils.build_sdpa_packed_attention_mask
    mask_calls = []
    captured_loss_labels = {}

    def _capture_mask(seq_info, dtype, device, *, sliding_window = None):
        mask_calls.append(tuple(seq_info[0].tolist()))
        return original_mask(
            seq_info,
            dtype = dtype,
            device = device,
            sliding_window = sliding_window,
        )

    def _capture_loss(*, logits, labels, **loss_kwargs):
        captured_loss_labels["labels"] = labels.detach().to("cpu")
        return torch.zeros((), device = logits.device, dtype = logits.dtype)

    with ExitStack() as stack:
        stack.enter_context(
            patch.object(attention_dispatch_utils, "HAS_FLASH_ATTENTION", False)
        )
        stack.enter_context(
            patch.object(attention_dispatch_utils, "HAS_XFORMERS", False)
        )
        stack.enter_context(
            patch.object(
                attention_dispatch_utils,
                "build_sdpa_packed_attention_mask",
                side_effect = _capture_mask,
            )
        )
        stack.enter_context(
            patch.object(
                llama_mod,
                "fast_cross_entropy_loss",
                side_effect = _capture_loss,
            )
        )
        with torch.no_grad():
            outputs = model(**inputs)

    assert mask_calls, "SDPA packed mask was not constructed"
    assert outputs.loss is not None
    assert "labels" in captured_loss_labels
    flat_loss_labels = captured_loss_labels["labels"].reshape(-1)
    boundaries = (
        torch.cumsum(
            batch["packed_seq_lengths"].to(device = "cpu", dtype = torch.long), dim = 0
        )
        - 1
    )
    for idx in boundaries.tolist():
        assert flat_loss_labels[idx].item() == -100
    assert torch.any(flat_loss_labels != -100)

    if hasattr(trainer, "accelerator"):
        trainer.accelerator.free_memory()


================================================
FILE: tests/utils/test_qat.py
================================================
from unsloth import FastLanguageModel

from typing import Dict

import pytest
import torch

try:
    from torchao.quantization.qat import FakeQuantizedLinear
    from torchao.quantization.qat.fake_quantizer import (
        FakeQuantizerBase,
        Float8FakeQuantizer,
        Int4WeightFakeQuantizer,
        IntxFakeQuantizer,
    )
except ImportError:
    print(
        "Missing torchao import, please install or upgrade torchao with: pip install 'torchao>=0.15.0'"
    )


class _CountingFakeQuantizer(torch.nn.Module):
    """
    Dummy fake quantizer that counts the number of times it has been called.
    """

    def __init__(self):
        super().__init__()
        self.count = 0

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        self.count += 1
        return x


def _get_model(qat_scheme: str, full_finetuning: bool):
    """
    Return a 2-tuple of (model, tokenizer), where the model has been configured
    to use QAT. If `full_finetuning` is False, return the PEFT (LoRA) model.
    """
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "unsloth/Qwen3-1.7B",
        load_in_4bit = False,
        full_finetuning = full_finetuning,
        qat_scheme = qat_scheme if full_finetuning else None,
    )
    if not full_finetuning:
        model = FastLanguageModel.get_peft_model(
            model,
            qat_scheme = qat_scheme,
        )
    return model, tokenizer


def _test_linear_is_fake_quantized(linear: torch.nn.Linear, qat_scheme: str):
    """
    Verify that the given linear contains fake quantizers according to the `qat_scheme`.
    """
    weight_only = False
    if qat_scheme == "fp8-int4":
        act_fq_class = Float8FakeQuantizer
        weight_fq_class = Int4WeightFakeQuantizer
        min_in_features = 128
    elif qat_scheme == "fp8-fp8":
        act_fq_class = Float8FakeQuantizer
        weight_fq_class = Float8FakeQuantizer
        min_in_features = -1
    elif qat_scheme == "int8":
        act_fq_class = None
        weight_fq_class = IntxFakeQuantizer
        min_in_features = 128
        weight_only = True
    else:
        raise ValueError(f"Unknown qat_scheme: {qat_scheme}")

    # Check base layer activations and weights
    base_layer = getattr(linear, "base_layer", linear)
    if base_layer.in_features >= min_in_features:
        assert isinstance(base_layer, FakeQuantizedLinear)
        if not weight_only:
            assert isinstance(base_layer.activation_fake_quantizer, act_fq_class)
        assert isinstance(base_layer.weight_fake_quantizer, weight_fq_class)

    # Check lora A and B (only for full_finetuning=False)
    if hasattr(linear, "lora_A") and hasattr(linear, "lora_B"):
        lora_A = linear.lora_A.default
        lora_B = linear.lora_B.default
        if lora_A.in_features >= min_in_features:
            assert isinstance(lora_A, FakeQuantizedLinear)
            if not weight_only:
                assert isinstance(lora_A.activation_fake_quantizer, act_fq_class)
            assert isinstance(lora_A.weight_fake_quantizer, weight_fq_class)
        if lora_B.in_features >= min_in_features:
            assert isinstance(lora_B, FakeQuantizedLinear)
            if not weight_only:
                assert isinstance(lora_B.activation_fake_quantizer, act_fq_class)
            assert isinstance(lora_B.weight_fake_quantizer, weight_fq_class)


def _test_fake_quantizers_are_called(
    model: torch.nn.Module,
    example_inputs: Dict,
    full_finetuning: bool,
    qat_scheme: str,
):
    """
    Verify that the fake quantizers are actually called when the model is called.
    """
    weight_only = qat_scheme == "int8"

    def _swap_fake_quantizers(model: torch.nn.Module):
        for name, child in model.named_children():
            if isinstance(child, FakeQuantizerBase):
                setattr(model, name, _CountingFakeQuantizer())

    def _assert_fake_quantizers_are_called(model: torch.nn.Module):
        for name, child in model.named_children():
            if full_finetuning:
                if isinstance(child, FakeQuantizedLinear):
                    if not weight_only:
                        assert child.activation_fake_quantizer.count == 1
                    assert child.weight_fake_quantizer.count == 1
            else:
                # For LoRA, we only fake quantize the input activations once per block:
                # For self_attn, we only fake quantize the q_proj's input activations
                # For mlp, we only fake quantize the gate_proj's input activations
                if name == "self_attn":
                    base_layer = child.q_proj.base_layer
                    if not weight_only:
                        assert hasattr(base_layer, "activation_fake_quantizer")
                        assert base_layer.activation_fake_quantizer.count == 1
                elif name == "mlp":
                    base_layer = child.gate_proj.base_layer
                    if not weight_only:
                        assert hasattr(base_layer, "activation_fake_quantizer")
                        assert base_layer.activation_fake_quantizer.count == 1
                elif isinstance(child, FakeQuantizedLinear):
                    # Weight fake quantizers should always be called
                    assert child.weight_fake_quantizer.count == 1

    for k, v in example_inputs.items():
        example_inputs[k] = v.cuda()
    model.apply(_swap_fake_quantizers)
    model(**example_inputs)
    model.apply(_assert_fake_quantizers_are_called)


def _test_model_fake_quantize(qat_scheme: str, full_finetuning: bool):
    """
    Test that all linear layers in the model are fake quantized according to the `qat_scheme`.
    """
    model, tokenizer = _get_model(qat_scheme, full_finetuning)
    if full_finetuning:
        model = model.model
    else:
        model = model.base_model.model.model
    for layer in model.layers:
        _test_linear_is_fake_quantized(layer.self_attn.q_proj, qat_scheme)
        _test_linear_is_fake_quantized(layer.self_attn.k_proj, qat_scheme)
        _test_linear_is_fake_quantized(layer.self_attn.v_proj, qat_scheme)
        _test_linear_is_fake_quantized(layer.mlp.gate_proj, qat_scheme)
        _test_linear_is_fake_quantized(layer.mlp.up_proj, qat_scheme)
        _test_linear_is_fake_quantized(layer.mlp.down_proj, qat_scheme)
    inputs = tokenizer("How are you?", return_tensors = "pt")
    _test_fake_quantizers_are_called(model, inputs, full_finetuning, qat_scheme)


# TODO: there are bad interactions across tests right now, need to figure out
# how to disable model caching before re-enabling this test
@pytest.mark.parametrize("qat_scheme", ["fp8-int4", "fp8-fp8", "int8"])
def _test_full_model_fake_quantize(qat_scheme: str):
    _test_model_fake_quantize(qat_scheme, full_finetuning = True)


@pytest.mark.parametrize("qat_scheme", ["fp8-int4", "fp8-fp8", "int8"])
def test_lora_model_fake_quantize(qat_scheme: str):
    _test_model_fake_quantize(qat_scheme, full_finetuning = False)


================================================
FILE: tests/utils/test_trunc_normal_patch.py
================================================
# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""Tests for trunc_normal low-precision patch compatibility."""

import importlib.util
import inspect
from pathlib import Path

import pytest
import torch


_MISSING = object()


def _load_import_fixes_module():
    repo_root = Path(__file__).resolve().parents[2]
    import_fixes_path = repo_root / "unsloth" / "import_fixes.py"
    spec = importlib.util.spec_from_file_location(
        "unsloth_import_fixes_local", import_fixes_path
    )
    assert spec is not None and spec.loader is not None
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)
    return module


def _getattr_or_missing(obj, name):
    return getattr(obj, name) if hasattr(obj, name) else _MISSING


def _restore_attr(obj, name, value):
    if value is _MISSING:
        if hasattr(obj, name):
            delattr(obj, name)
        return
    setattr(obj, name, value)


def test_trunc_normal_patch_accepts_positional_generator():
    import_fixes = _load_import_fixes_module()
    patch_fn = import_fixes.patch_trunc_normal_precision_issue

    init_mod = torch.nn.init
    old_fn = init_mod.trunc_normal_
    old_patched = _getattr_or_missing(init_mod, "_unsloth_trunc_normal_patched")
    old_original = _getattr_or_missing(init_mod, "_unsloth_trunc_normal_original")
    try:
        # Normalize to an unpatched baseline before applying the patch.
        if old_original is not _MISSING:
            init_mod.trunc_normal_ = old_original
        if hasattr(init_mod, "_unsloth_trunc_normal_patched"):
            delattr(init_mod, "_unsloth_trunc_normal_patched")
        if hasattr(init_mod, "_unsloth_trunc_normal_original"):
            delattr(init_mod, "_unsloth_trunc_normal_original")

        patch_fn()
        sig = inspect.signature(init_mod.trunc_normal_)
        assert "generator" in sig.parameters
        assert sig.parameters["generator"].kind is not inspect.Parameter.KEYWORD_ONLY

        tensor = torch.empty(1024, dtype = torch.float32)
        gen = torch.Generator()
        gen.manual_seed(3407)

        init_mod.trunc_normal_(tensor, 0.0, 1.0, -2.0, 2.0, gen)
        init_mod.trunc_normal_(tensor, mean = 0.0, std = 1.0, a = -2.0, b = 2.0, generator = gen)
    finally:
        init_mod.trunc_normal_ = old_fn
        _restore_attr(init_mod, "_unsloth_trunc_normal_patched", old_patched)
        _restore_attr(init_mod, "_unsloth_trunc_normal_original", old_original)


def test_trunc_normal_patch_rejects_invalid_generator():
    import_fixes = _load_import_fixes_module()
    patch_fn = import_fixes.patch_trunc_normal_precision_issue

    init_mod = torch.nn.init
    old_fn = init_mod.trunc_normal_
    old_patched = _getattr_or_missing(init_mod, "_unsloth_trunc_normal_patched")
    old_original = _getattr_or_missing(init_mod, "_unsloth_trunc_normal_original")
    try:
        if old_original is not _MISSING:
            init_mod.trunc_normal_ = old_original
        if hasattr(init_mod, "_unsloth_trunc_normal_patched"):
            delattr(init_mod, "_unsloth_trunc_normal_patched")
        if hasattr(init_mod, "_unsloth_trunc_normal_original"):
            delattr(init_mod, "_unsloth_trunc_normal_original")

        patch_fn()
        sig = inspect.signature(init_mod.trunc_normal_)
        if "generator" not in sig.parameters:
            pytest.skip("torch.nn.init.trunc_normal_ lacks a generator parameter")

        tensor = torch.empty(16, dtype = torch.float32)
        with pytest.raises(TypeError):
            init_mod.trunc_normal_(tensor, generator = 123)
    finally:
        init_mod.trunc_normal_ = old_fn
        _restore_attr(init_mod, "_unsloth_trunc_normal_patched", old_patched)
        _restore_attr(init_mod, "_unsloth_trunc_normal_original", old_original)


================================================
FILE: unsloth/__init__.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import warnings, importlib, sys
from packaging.version import Version
import os, re, subprocess, inspect, functools
import numpy as np

# Log Unsloth is being used
os.environ["UNSLOTH_IS_PRESENT"] = "1"

# Check if modules that need patching are already imported
critical_modules = ["trl", "transformers", "peft"]
already_imported = [mod for mod in critical_modules if mod in sys.modules]

# Fix some issues before importing other packages
from .import_fixes import (
    fix_message_factory_issue,
    check_fbgemm_gpu_version,
    disable_broken_causal_conv1d,
    disable_broken_vllm,
    configure_amdgpu_asic_id_table_path,
    torchvision_compatibility_check,
    fix_diffusers_warnings,
    fix_huggingface_hub,
)

# Configure libdrm ids table path early so ROCm can resolve AMD GPU names.
configure_amdgpu_asic_id_table_path()
disable_broken_causal_conv1d()
disable_broken_vllm()
fix_message_factory_issue()
check_fbgemm_gpu_version()
torchvision_compatibility_check()
fix_diffusers_warnings()
fix_huggingface_hub()
del configure_amdgpu_asic_id_table_path
del disable_broken_causal_conv1d
del disable_broken_vllm
del fix_message_factory_issue
del check_fbgemm_gpu_version
del torchvision_compatibility_check
del fix_diffusers_warnings
del fix_huggingface_hub

# This check is critical because Unsloth optimizes these libraries by modifying
# their code at import time. If they're imported first, the original (slower,
# more memory-intensive) implementations will be used instead of Unsloth's
# optimized versions, potentially causing OOM errors or slower training.
if already_imported:
    # stacklevel=2 makes warning point to user's import line rather than this library code,
    # showing them exactly where to fix the import order in their script
    warnings.warn(
        f"WARNING: Unsloth should be imported before [{', '.join(already_imported)}] "
        f"to ensure all optimizations are applied. Your code may run slower or encounter "
        f"memory issues without these optimizations.\n\n"
        f"Please restructure your imports with 'import unsloth' at the top of your file.",
        stacklevel = 2,
    )
del already_imported, critical_modules

# Unsloth currently does not work on multi GPU setups - sadly we are a 2 brother team so
# enabling it will require much more work, so we have to prioritize. Please understand!
# We do have a beta version, which you can contact us about!
# Thank you for your understanding and we appreciate it immensely!

# Fixes https://github.com/unslothai/unsloth/issues/1266
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"

# [TODO] Check why some GPUs don't work
#    "pinned_use_cuda_host_register:True,"\
#    "pinned_num_register_threads:8"


from importlib.metadata import version as importlib_version
from importlib.metadata import PackageNotFoundError

# Check for unsloth_zoo
try:
    unsloth_zoo_version = importlib_version("unsloth_zoo")
    if Version(unsloth_zoo_version) < Version("2026.3.4"):
        print(
            "Unsloth: Please update Unsloth and Unsloth-Zoo to the latest version!\n"
            "Do this via `pip install --upgrade --force-reinstall --no-cache-dir --no-deps unsloth unsloth_zoo`"
        )
        # if os.environ.get("UNSLOTH_DISABLE_AUTO_UPDATES", "0") == "0":
        #     try:
        #         os.system("pip install --upgrade --no-cache-dir --no-deps unsloth_zoo")
        #     except:
        #         try:
        #             os.system("pip install --upgrade --no-cache-dir --no-deps --user unsloth_zoo")
        #         except:
        #             raise ImportError("Unsloth: Please update unsloth_zoo via `pip install --upgrade --no-cache-dir --no-deps unsloth_zoo`")
    import unsloth_zoo
except PackageNotFoundError:
    raise ImportError(
        f"Unsloth: Please install unsloth_zoo via `pip install unsloth_zoo` then retry!"
    )
except:
    raise
del PackageNotFoundError, importlib_version

# Try importing PyTorch and check version
try:
    import torch
except ModuleNotFoundError:
    raise ImportError(
        "Unsloth: Pytorch is not installed. Go to https://pytorch.org/.\n"
        "We have some installation instructions on our Github page."
    )
except:
    raise

from unsloth_zoo.device_type import (
    is_hip,
    get_device_type,
    DEVICE_TYPE,
    DEVICE_TYPE_TORCH,
    DEVICE_COUNT,
    ALLOW_PREQUANTIZED_MODELS,
)

# Fix other issues
from .import_fixes import (
    fix_xformers_performance_issue,
    fix_vllm_aimv2_issue,
    check_vllm_torch_sm100_compatibility,
    fix_vllm_guided_decoding_params,
    fix_vllm_pdl_blackwell,
    fix_triton_compiled_kernel_missing_attrs,
    patch_trunc_normal_precision_issue,
    ignore_logger_messages,
    patch_ipykernel_hf_xet,
    patch_trackio,
    patch_datasets,
    patch_enable_input_require_grads,
    fix_openenv_no_vllm,
    patch_openspiel_env_async,
    fix_executorch,
    patch_vllm_for_notebooks,
    patch_torchcodec_audio_decoder,
    disable_torchcodec_if_broken,
    disable_broken_wandb,
)

fix_xformers_performance_issue()
fix_vllm_aimv2_issue()
# Check vLLM + torch < 2.9.0 + SM100 compatibility BEFORE importing vLLM
check_vllm_torch_sm100_compatibility()
fix_vllm_guided_decoding_params()
fix_vllm_pdl_blackwell()
fix_triton_compiled_kernel_missing_attrs()
patch_trunc_normal_precision_issue()
ignore_logger_messages()
patch_ipykernel_hf_xet()
patch_trackio()
patch_datasets()
patch_enable_input_require_grads()
fix_openenv_no_vllm()
patch_openspiel_env_async()
fix_executorch()
patch_vllm_for_notebooks()
patch_torchcodec_audio_decoder()
disable_torchcodec_if_broken()
disable_broken_wandb()

del fix_xformers_performance_issue
del fix_vllm_aimv2_issue
del check_vllm_torch_sm100_compatibility
del fix_vllm_guided_decoding_params
del fix_vllm_pdl_blackwell
del fix_triton_compiled_kernel_missing_attrs
del patch_trunc_normal_precision_issue
del ignore_logger_messages
del patch_ipykernel_hf_xet
del patch_trackio
del patch_datasets
del patch_enable_input_require_grads
del fix_openenv_no_vllm
del patch_openspiel_env_async
del fix_executorch
del patch_vllm_for_notebooks
del patch_torchcodec_audio_decoder
del disable_torchcodec_if_broken
del disable_broken_wandb

# Torch 2.4 has including_emulation
if DEVICE_TYPE == "cuda":
    major_version, minor_version = torch.cuda.get_device_capability()
    SUPPORTS_BFLOAT16 = major_version >= 8

    old_is_bf16_supported = torch.cuda.is_bf16_supported
    if "including_emulation" in str(inspect.signature(old_is_bf16_supported)):

        def is_bf16_supported(including_emulation = False):
            return old_is_bf16_supported(including_emulation)

        torch.cuda.is_bf16_supported = is_bf16_supported
    else:

        def is_bf16_supported():
            return SUPPORTS_BFLOAT16

        torch.cuda.is_bf16_supported = is_bf16_supported
    del major_version, minor_version
elif DEVICE_TYPE == "hip":
    SUPPORTS_BFLOAT16 = torch.cuda.is_bf16_supported()
elif DEVICE_TYPE == "xpu":
    # torch.xpu.is_bf16_supported() does not have including_emulation
    # set SUPPORTS_BFLOAT16 as torch.xpu.is_bf16_supported()
    SUPPORTS_BFLOAT16 = torch.xpu.is_bf16_supported()

# For Gradio HF Spaces?
# if "SPACE_AUTHOR_NAME" not in os.environ and "SPACE_REPO_NAME" not in os.environ:
import triton

if DEVICE_TYPE == "cuda":
    libcuda_dirs = lambda: None
    if Version(triton.__version__) >= Version("3.0.0"):
        try:
            from triton.backends.nvidia.driver import libcuda_dirs
        except:
            pass
    else:
        from triton.common.build import libcuda_dirs

    # Try loading bitsandbytes and triton
    try:
        import bitsandbytes as bnb
    except:
        print(
            "Unsloth: `bitsandbytes` is not installed - 4bit QLoRA unallowed, but 16bit and full finetuning works!"
        )
        bnb = None
    try:
        cdequantize_blockwise_fp32 = bnb.functional.lib.cdequantize_blockwise_fp32
        libcuda_dirs()
    except:
        warnings.warn("Unsloth: Running `ldconfig /usr/lib64-nvidia` to link CUDA.")

        if os.path.exists("/usr/lib64-nvidia"):
            os.system("ldconfig /usr/lib64-nvidia")
        elif os.path.exists("/usr/local"):
            # Sometimes bitsandbytes cannot be linked properly in Runpod for example
            possible_cudas = (
                subprocess.check_output(["ls", "-al", "/usr/local"])
                .decode("utf-8")
                .split("\n")
            )
            find_cuda = re.compile(r"[\s](cuda\-[\d\.]{2,})$")
            possible_cudas = [find_cuda.search(x) for x in possible_cudas]
            possible_cudas = [x.group(1) for x in possible_cudas if x is not None]

            # Try linking cuda folder, or everything in local
            if len(possible_cudas) == 0:
                os.system("ldconfig /usr/local/")
            else:
                find_number = re.compile(r"([\d\.]{2,})")
                latest_cuda = np.argsort(
                    [float(find_number.search(x).group(1)) for x in possible_cudas]
                )[::-1][0]
                latest_cuda = possible_cudas[latest_cuda]
                os.system(f"ldconfig /usr/local/{latest_cuda}")
                del find_number, latest_cuda
            del possible_cudas, find_cuda

        if bnb is not None:
            importlib.reload(bnb)
        importlib.reload(triton)
        try:
            libcuda_dirs = lambda: None
            if Version(triton.__version__) >= Version("3.0.0"):
                try:
                    from triton.backends.nvidia.driver import libcuda_dirs
                except:
                    pass
            else:
                from triton.common.build import libcuda_dirs
            cdequantize_blockwise_fp32 = bnb.functional.lib.cdequantize_blockwise_fp32
            libcuda_dirs()
        except:
            warnings.warn(
                "Unsloth: CUDA is not linked properly.\n"
                "Try running `python -m bitsandbytes` then `python -m xformers.info`\n"
                "We tried running `ldconfig /usr/lib64-nvidia` ourselves, but it didn't work.\n"
                "You need to run in your terminal `sudo ldconfig /usr/lib64-nvidia` yourself, then import Unsloth.\n"
                "Also try `sudo ldconfig /usr/local/cuda-xx.x` - find the latest cuda version.\n"
                "Unsloth will still run for now, but maybe it might crash - let's hope it works!"
            )
    del libcuda_dirs
elif DEVICE_TYPE == "hip":
    # NO-OP for rocm device
    pass
elif DEVICE_TYPE == "xpu":
    import bitsandbytes as bnb

    # TODO: check triton for intel installed properly.
    pass

from .models import *
from .models import __version__
from .save import *
from .chat_templates import *
from .tokenizer_utils import *
from .trainer import *

# Export dataprep utilities for CLI and downstream users
from .dataprep.raw_text import RawTextDataLoader, TextPreprocessor
from unsloth_zoo.rl_environments import (
    check_python_modules,
    create_locked_down_function,
    execute_with_time_limit,
    Benchmarker,
    is_port_open,
    launch_openenv,
)

# Patch TRL trainers for backwards compatibility
_patch_trl_trainer()


================================================
FILE: unsloth/_auto_install.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

try: import torch
except: raise ImportError('Install torch via `pip install torch`')
from packaging.version import Version as V
import re
v = V(re.match(r"[0-9\.]{3,}", torch.__version__).group(0))
cuda = str(torch.version.cuda)
is_ampere = torch.cuda.get_device_capability()[0] >= 8
USE_ABI = torch._C._GLIBCXX_USE_CXX11_ABI
if cuda not in ("11.8", "12.1", "12.4", "12.6", "12.8", "13.0"): raise RuntimeError(f"CUDA = {cuda} not supported!")
if   v <= V('2.1.0'): raise RuntimeError(f"Torch = {v} too old!")
elif v <= V('2.1.1'): x = 'cu{}{}-torch211'
elif v <= V('2.1.2'): x = 'cu{}{}-torch212'
elif v  < V('2.3.0'): x = 'cu{}{}-torch220'
elif v  < V('2.4.0'): x = 'cu{}{}-torch230'
elif v  < V('2.5.0'): x = 'cu{}{}-torch240'
elif v  < V('2.5.1'): x = 'cu{}{}-torch250'
elif v <= V('2.5.1'): x = 'cu{}{}-torch251'
elif v  < V('2.7.0'): x = 'cu{}{}-torch260'
elif v  < V('2.7.9'): x = 'cu{}{}-torch270'
elif v  < V('2.8.0'): x = 'cu{}{}-torch271'
elif v  < V('2.8.9'): x = 'cu{}{}-torch280'
elif v  < V('2.9.1'): x = 'cu{}{}-torch290'
elif v  < V('2.9.2'): x = 'cu{}{}-torch291'
elif v  < V('2.10.1'): x = 'cu{}{}-torch2100'
else: raise RuntimeError(f"Torch = {v} too new!")
if v > V('2.6.9') and cuda not in ("11.8", "12.6", "12.8", "13.0"): raise RuntimeError(f"CUDA = {cuda} not supported!")
if v >= V('2.10.0') and cuda not in ("12.6", "12.8", "13.0"): raise RuntimeError(f"Torch 2.10 requires CUDA 12.6, 12.8, or 13.0! Got CUDA = {cuda}")
x = x.format(cuda.replace(".", ""), "-ampere" if False else "") # is_ampere is broken due to flash-attn
print(f'pip install --upgrade pip && pip install --no-deps git+https://github.com/unslothai/unsloth-zoo.git && pip install "unsloth[{x}] @ git+https://github.com/unslothai/unsloth.git" --no-build-isolation')

================================================
FILE: unsloth/chat_templates.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = [
    "get_chat_template",
    "test_chat_templates",
    "test_hf_gguf_equivalence",
    "remove_special_tokens",

    "to_sharegpt",
    "standardize_sharegpt",
    "standardize_data_formats",
    "apply_chat_template",
    "train_on_responses_only",

    "test_construct_chat_template",
]

from transformers import StoppingCriteria, StoppingCriteriaList
from torch import LongTensor, FloatTensor
from transformers.models.llama.modeling_llama import logger
from .save import patch_saving_functions
import os
import shutil
from .tokenizer_utils import *
from .models._utils import patch_tokenizer
import re
from .ollama_template_mappers import OLLAMA_TEMPLATES
from unsloth_zoo.dataset_utils import (
    train_on_responses_only,
    standardize_data_formats,
)
standardize_sharegpt = standardize_data_formats
CHAT_TEMPLATES = {}
DEFAULT_SYSTEM_MESSAGE = {}
def _ollama_template(name: str):
    return OLLAMA_TEMPLATES[name]

# =========================================== Unsloth
# Unsloth efficient template leverages from Zephyr
unsloth_template = \
    "{{ bos_token }}"\
    "{% if messages[0]['role'] == 'system' %}"\
        "{{ messages[0]['content'] + '\n' }}"\
        "{% set loop_messages = messages[1:] %}"\
    "{% else %}"\
        "{{ '{system_message}' + '\n' }}"\
        "{% set loop_messages = messages %}"\
    "{% endif %}"\
    "{% for message in loop_messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ '>>> User: ' + message['content'] + '\n' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ '>>> Assistant: ' + message['content'] + eos_token + '\n' }}"\
        "{% else %}"\
            "{{ raise_exception('Only user and assistant roles are supported!') }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '>>> Assistant: ' }}"\
    "{% endif %}"

unsloth_ollama = _ollama_template("unsloth")

unsloth_eos_token = "eos_token"
CHAT_TEMPLATES["unsloth"] = (unsloth_template, unsloth_eos_token, False, unsloth_ollama,)
DEFAULT_SYSTEM_MESSAGE["unsloth"] = "You are a helpful assistant to the user"

# =========================================== Zephyr
# Zephyr has no BOS!
zephyr_template = \
    "{% for message in messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ '<|user|>\n' + message['content'] + eos_token + '\n' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ '<|assistant|>\n' + message['content'] + eos_token + '\n' }}"\
        "{% else %}"\
            "{{ '<|system|>\n' + message['content'] + eos_token + '\n' }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '<|assistant|>\n' }}"\
    "{% endif %}"

zephyr_ollama = _ollama_template("zephyr")

zephyr_eos_token = "eos_token"
CHAT_TEMPLATES["zephyr"] = (zephyr_template, zephyr_eos_token, False, zephyr_ollama,)
DEFAULT_SYSTEM_MESSAGE["zephyr"] = None # No system message in Zephyr

# =========================================== ChatML
# ChatML has no BOS and not EOS! Rather <|im_start|> and <|im_end|> acts as BOS / EOS.
chatml_template = \
    "{% for message in messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{'<|im_start|>user\n' + message['content'] + '<|im_end|>\n'}}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{'<|im_start|>assistant\n' + message['content'] + '<|im_end|>\n' }}"\
        "{% else %}"\
            "{{ '<|im_start|>system\n' + message['content'] + '<|im_end|>\n' }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '<|im_start|>assistant\n' }}"\
    "{% endif %}"

chatml_ollama = _ollama_template("chatml")

chatml_eos_token = "<|im_end|>"
CHAT_TEMPLATES["chatml"] = (chatml_template, chatml_eos_token, True, chatml_ollama,)
DEFAULT_SYSTEM_MESSAGE["chatml"] = None # No system message in ChatML

# =========================================== Mistral-1
# Mistral Instruct doesn't allow system prompts, so we append it to the user message.
mistral_template = \
    "{{ bos_token }}"\
    "{% if messages[0]['role'] == 'system' %}"\
        "{% if messages[1]['role'] == 'user' %}"\
            "{{ '[INST] ' + messages[0]['content'] + ' ' + messages[1]['content'] + ' [/INST]' }}"\
            "{% set loop_messages = messages[2:] %}"\
        "{% else %}"\
            "{{ '[INST] ' + messages[0]['content'] + ' [/INST]' }}"\
            "{% set loop_messages = messages[1:] %}"\
        "{% endif %}"\
    "{% else %}"\
        "{% set loop_messages = messages %}"\
    "{% endif %}"\
    "{% for message in loop_messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ '[INST] ' + message['content'] + ' [/INST]' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ message['content'] + eos_token }}"\
        "{% else %}"\
            "{{ raise_exception('Only user and assistant roles are supported!') }}"\
        "{% endif %}"\
    "{% endfor %}"

# Ollama from https://www.ollama.com/library/mistral
mistral_ollama = _ollama_template("mistral")

mistral_eos_token = "eos_token"
CHAT_TEMPLATES["mistral"] = (mistral_template, mistral_eos_token, False, mistral_ollama,)
DEFAULT_SYSTEM_MESSAGE["mistral"] = None # No system message in Mistral

# =========================================== Llama-2
# Adds BOS to every convo! And weird <<SYS>> system messages.
llama_template = \
    "{% if messages[0]['role'] == 'system' %}"\
        "{% if messages[1]['role'] == 'user' %}"\
            "{{ bos_token + '[INST] <<SYS>>\n' + messages[0]['content'] + '\n<</SYS>>\n\n' + messages[1]['content'] + ' [/INST]' }}"\
            "{% set loop_messages = messages[2:] %}"\
        "{% else %}"\
            "{{ bos_token + '[INST] ' + messages[0]['content'] + ' [/INST]' }}"\
            "{% set loop_messages = messages[1:] %}"\
        "{% endif %}"\
    "{% else %}"\
        "{% set loop_messages = messages %}"\
    "{% endif %}"\
    "{% for message in loop_messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ bos_token + '[INST] ' + message['content'].strip() + ' [/INST]' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ ' ' + message['content'].strip() + ' ' + eos_token }}"\
        "{% else %}"\
            "{{ raise_exception('Only user and assistant roles are supported!') }}"\
        "{% endif %}"\
    "{% endfor %}"

# Ollama from https://www.ollama.com/library/llama3
llama_ollama = _ollama_template("llama")

llama_eos_token = "eos_token"
CHAT_TEMPLATES["llama"] = (llama_template, llama_eos_token, False, llama_ollama,)
DEFAULT_SYSTEM_MESSAGE["llama"] = None # No system message in Llama

# ===========================================  Vicuna
# https://github.com/lm-sys/FastChat/blob/main/docs/vicuna_weights_version.md#prompt-template
vicuna_template = \
    "{{ bos_token }}"\
    "{% if messages[0]['role'] == 'system' %}"\
        "{{ messages[0]['content'] + ' ' }}"\
        "{% set loop_messages = messages[1:] %}"\
    "{% else %}"\
        "{{ '{system_message}' + ' ' }}"\
        "{% set loop_messages = messages %}"\
    "{% endif %}"\
    "{% for message in loop_messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ 'USER: ' + message['content'] + ' ' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ 'ASSISTANT: ' + message['content'] + eos_token }}"\
        "{% else %}"\
            "{{ raise_exception('Only user and assistant roles are supported!') }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ 'ASSISTANT:' }}"\
    "{% endif %}"

# Ollama from https://www.ollama.com/library/vicuna
vicuna_ollama = _ollama_template("vicuna")

vicuna_eos_token = "eos_token"
CHAT_TEMPLATES["vicuna"] = (vicuna_template, vicuna_eos_token, False, vicuna_ollama,)
DEFAULT_SYSTEM_MESSAGE["vicuna"] = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions."

# =========================================== Vicuna Old
# https://github.com/lm-sys/FastChat/blob/main/docs/vicuna_weights_version.md#prompt-template
vicuna_old_template = \
    "{{ bos_token }}"\
    "{% if messages[0]['role'] == 'system' %}"\
        "{{ messages[0]['content'] + '\n' }}"\
        "{% set loop_messages = messages[1:] %}"\
    "{% else %}"\
        "{{ '{system_message}' + '\n' }}"\
        "{% set loop_messages = messages %}"\
    "{% endif %}"\
    "{% for message in loop_messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ '### Human: ' + message['content'] + '\n' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ '### Assistant: ' + message['content'] + eos_token + '\n' }}"\
        "{% else %}"\
            "{{ raise_exception('Only user and assistant roles are supported!') }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '### Assistant:' }}"\
    "{% endif %}"

vicuna_old_ollama = _ollama_template("vicuna_old")

vicuna_old_eos_token = "eos_token"
CHAT_TEMPLATES["vicuna_old"] = (vicuna_old_template, vicuna_old_eos_token, False, vicuna_old_ollama,)
DEFAULT_SYSTEM_MESSAGE["vicuna_old"] = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human\\'s questions."

CHAT_TEMPLATES["vicuna old"] = CHAT_TEMPLATES["vicuna_old"]
DEFAULT_SYSTEM_MESSAGE["vicuna old"] = DEFAULT_SYSTEM_MESSAGE["vicuna_old"]

# =========================================== Alpaca multi turn
# https://github.com/tatsu-lab/stanford_alpaca Changed for multi-turn convos
alpaca_template = \
    "{{ bos_token }}"\
    "{% if messages[0]['role'] == 'system' %}"\
        "{{ messages[0]['content'] + '\n\n' }}"\
        "{% set loop_messages = messages[1:] %}"\
    "{% else %}"\
        "{{ '{system_message}' + '\n\n' }}"\
        "{% set loop_messages = messages %}"\
    "{% endif %}"\
    "{% for message in loop_messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ '### Instruction:\n' + message['content'] + '\n\n' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ '### Response:\n' + message['content'] + eos_token + '\n\n' }}"\
        "{% else %}"\
            "{{ raise_exception('Only user and assistant roles are supported!') }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '### Response:\n' }}"\
    "{% endif %}"

alpaca_ollama = _ollama_template("alpaca")

alpaca_eos_token = "eos_token"
CHAT_TEMPLATES["alpaca"] = (alpaca_template, alpaca_eos_token, False, alpaca_ollama,)
DEFAULT_SYSTEM_MESSAGE["alpaca"] = "Below are some instructions that describe some tasks. Write responses that appropriately complete each request."

# =========================================== Gemma
# https://huggingface.co/google/gemma-7b-it
# Notice we must use |trim for lstrip and rstrip. <start_of_turn> maps to 106.
# <end_of_turn> maps to 107. user and model are normal 1 word tokens.
gemma_template = \
    "{{ bos_token }}"\
    "{% if messages[0]['role'] == 'system' %}"\
        "{{'<start_of_turn>user\n' + messages[0]['content'] | trim + ' ' + messages[1]['content'] | trim + '<end_of_turn>\n'}}"\
        "{% set messages = messages[2:] %}"\
    "{% endif %}"\
    "{% for message in messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{'<start_of_turn>user\n' + message['content'] | trim + '<end_of_turn>\n'}}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{'<start_of_turn>model\n' + message['content'] | trim + '<end_of_turn>\n' }}"\
        "{% else %}"\
            "{{ raise_exception('Only user and assistant roles are supported!') }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '<start_of_turn>model\n' }}"\
    "{% endif %}"

# Ollama from https://www.ollama.com/library/gemma
gemma_ollama = _ollama_template("gemma")

gemma_eos_token = "<end_of_turn>"
CHAT_TEMPLATES["gemma"] = (gemma_template, gemma_eos_token, True, gemma_ollama,)
DEFAULT_SYSTEM_MESSAGE["gemma"] = None # No system message in Gemma

# =========================================== Gemma with ChatML instead
# We find using <eos> is still more appropriate!
gemma_chatml_template = "{{ bos_token }}" + chatml_template

gemma_chatml_ollama = _ollama_template("gemma_chatml")

gemma_chatml_eos_token = (
    {"<start_of_turn>" : "<|im_start|>", "<eos>" : "<|im_end|>"},
    "<|im_end|>",
)
CHAT_TEMPLATES["gemma_chatml"] = (gemma_chatml_template, gemma_chatml_eos_token, True, gemma_chatml_ollama,)
DEFAULT_SYSTEM_MESSAGE["gemma_chatml"] = None # No system message in Gemma

# =========================================== Gemma 2
# Same as Gemma 1, but with sliding window attention!
# https://ollama.com/library/gemma2/blobs/6522ca797f47
gemma2_template = gemma_template
gemma2_ollama = _ollama_template("gemma2")
gemma2_eos_token = "<end_of_turn>"
CHAT_TEMPLATES["gemma2"] = (gemma2_template, gemma2_eos_token, True, gemma2_ollama,)
DEFAULT_SYSTEM_MESSAGE["gemma2"] = None # No system message in Gemma 2

# =========================================== Gemma 2 with ChatML instead
gemma2_chatml_template = gemma_chatml_template
gemma2_chatml_ollama = _ollama_template("gemma2_chatml")
gemma2_chatml_eos_token = gemma_chatml_eos_token
CHAT_TEMPLATES["gemma2_chatml"] = (gemma2_chatml_template, gemma2_chatml_eos_token, True, gemma2_chatml_ollama,)
DEFAULT_SYSTEM_MESSAGE["gemma2_chatml"] = None # No system message in Gemma 2

# =========================================== Llama-3
# Weirdly \n\n is needed?
llama3_template = \
    "{{ bos_token }}"\
    "{% for message in messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{ '<|start_header_id|>user<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' }}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' }}"\
        "{% else %}"\
            "{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}"\
    "{% endif %}"

# Ollama from https://www.ollama.com/library/llama3
llama3_ollama = _ollama_template("llama-3")

llama3_template_eos_token = "eos_token"

CHAT_TEMPLATES["llama-3"] = (llama3_template, llama3_template_eos_token, False, llama3_ollama,)
DEFAULT_SYSTEM_MESSAGE["llama-3"] = None # No system message in Llama-3

CHAT_TEMPLATES["llama3"] = (llama3_template, llama3_template_eos_token, False, llama3_ollama,)
DEFAULT_SYSTEM_MESSAGE["llama3"] = None # No system message in Llama-3


# =========================================== Phi-3
# "{{ bos_token }}"\ # Phi-3.5 removes BOS?
phi3_template = \
    "{% for message in messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{'<|user|>\n' + message['content'] + '<|end|>\n'}}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}"\
        "{% else %}"\
            "{{'<|' + message['role'] + '|>\n' + message['content'] + '<|end|>\n'}}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '<|assistant|>\n' }}"\
    "{% endif %}"

# Ollama from https://www.ollama.com/library/phi3
phi3_ollama = _ollama_template("phi-3")

phi3_template_eos_token = "<|end|>"
CHAT_TEMPLATES["phi-3"]   = (phi3_template, phi3_template_eos_token, False, phi3_ollama,)
DEFAULT_SYSTEM_MESSAGE["phi-3"] = None # No system message in Phi-3

CHAT_TEMPLATES["phi-35"]  = CHAT_TEMPLATES["phi-3"]
DEFAULT_SYSTEM_MESSAGE["phi-35"] = None # No system message in Phi-3.5

CHAT_TEMPLATES["phi-3.5"] = CHAT_TEMPLATES["phi-3"]
DEFAULT_SYSTEM_MESSAGE["phi-3.5"] = None # No system message in Phi-3.5

# =========================================== Llama-3.1
"""
No trimming in Llama 3.1 Instruct!
Also an extra newline for Cutting Knowledge Date
See https://colab.research.google.com/drive/1Xpqq5xpIgO-B00MQ-UccYMwN2J8QFgBM?usp=sharing

Also should be

import datetime
tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,
    tokenize = False,
    date_string = datetime.today().strftime("%d %B %Y")),
)
"""

llama31_template = \
"""{{- bos_token }}
{%- if custom_tools is defined %}
    {%- set tools = custom_tools %}
{%- endif %}
{%- if not tools_in_user_message is defined %}
    {%- set tools_in_user_message = true %}
{%- endif %}
{%- if not date_string is defined %}
    {%- set date_string = "26 July 2024" %}
{%- endif %}
{%- if not tools is defined %}
    {%- set tools = none %}
{%- endif %}

{#- This block extracts the system message, so we can slot it into the right place. #}
{%- if messages[0]['role'] == 'system' %}
    {%- set system_message = messages[0]['content'] %}
    {%- set messages = messages[1:] %}
{%- else %}
    {%- set system_message = "{system_message}" %}
{%- endif %}

{#- System message + builtin tools #}
{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
{%- if builtin_tools is defined or tools is not none %}
    {{- "Environment: ipython\n" }}
{%- endif %}
{%- if builtin_tools is defined %}
    {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}}
{%- endif %}
{{- "Cutting Knowledge Date: December 2023\n" }}
{{- "Today Date: " + date_string + "\n\n" }}
{%- if tools is not none and not tools_in_user_message %}
    {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
    {{- "Do not use variables.\n\n" }}
    {%- for t in tools %}
        {{- t | tojson(indent=4) }}
        {{- "\n\n" }}
    {%- endfor %}
{%- endif %}
{{- system_message }}
{{- "<|eot_id|>" }}

{#- Custom tools are passed in a user message with some extra guidance #}
{%- if tools_in_user_message and not tools is none %}
    {#- Extract the first user message so we can plug it in here #}
    {%- if messages | length != 0 %}
        {%- set first_user_message = messages[0]['content'] %}
        {%- set messages = messages[1:] %}
    {%- else %}
        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
{%- endif %}
    {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
    {{- "Given the following functions, please respond with a JSON for a function call " }}
    {{- "with its proper arguments that best answers the given prompt.\n\n" }}
    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
    {{- "Do not use variables.\n\n" }}
    {%- for t in tools %}
        {{- t | tojson(indent=4) }}
        {{- "\n\n" }}
    {%- endfor %}
    {{- first_user_message + "<|eot_id|>"}}
{%- endif %}

{%- for message in messages %}
    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] + '<|eot_id|>' }}
    {%- elif 'tool_calls' in message %}
        {%- if not message.tool_calls|length == 1 %}
            {{- raise_exception("This model only supports single tool-calls at once!") }}
        {%- endif %}
        {%- set tool_call = message.tool_calls[0].function %}
        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}
            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
            {{- "<|python_tag|>" + tool_call.name + ".call(" }}
            {%- for arg_name, arg_val in tool_call.arguments | items %}
                {{- arg_name + '="' + arg_val + '"' }}
                {%- if not loop.last %}
                    {{- ", " }}
                {%- endif %}
                {%- endfor %}
            {{- ")" }}
        {%- else  %}
            {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
            {{- '{"name": "' + tool_call.name + '", ' }}
            {{- '"parameters": ' }}
            {{- tool_call.arguments | tojson }}
            {{- "}" }}
        {%- endif %}
        {%- if builtin_tools is defined %}
            {#- This means we're in ipython mode #}
            {{- "<|eom_id|>" }}
        {%- else %}
            {{- "<|eot_id|>" }}
        {%- endif %}
    {%- elif message.role == "tool" or message.role == "ipython" %}
        {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
        {%- if message.content is mapping or message.content is iterable %}
            {{- message.content | tojson }}
        {%- else %}
            {{- message.content }}
        {%- endif %}
        {{- "<|eot_id|>" }}
    {%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
{%- endif %}
"""

# Ollama from https://ollama.com/library/llama3.1 (needs updating!)
llama31_ollama = _ollama_template("llama-3.1")

llama31_template_eos_token = "eos_token"
CHAT_TEMPLATES["llama-3.1"] = (llama31_template, llama31_template_eos_token, False, llama31_ollama,)
DEFAULT_SYSTEM_MESSAGE["llama-3.1"] = "" # Llama3.1 default system message is empty + the dates

CHAT_TEMPLATES["llama-31"]  = (llama31_template, llama31_template_eos_token, False, llama31_ollama,)
DEFAULT_SYSTEM_MESSAGE["llama-31"] = "" # Llama3.1 default system message is empty + the dates

for version in ("llama-3.2", "llama-3.3", "llama-32", "llama-33"):
    CHAT_TEMPLATES[version] = CHAT_TEMPLATES["llama-3.1"]
    DEFAULT_SYSTEM_MESSAGE[version] = ""


# =========================================== Qwen 2.5
qwen25_template = \
"""{%- if tools %}
    {{- \'<|im_start|>system\\n\' }}
    {%- if messages[0][\'role\'] == \'system\' %}
        {{- messages[0][\'content\'] }}
    {%- else %}
        {{- \'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\' }}
    {%- endif %}
    {{- "\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>" }}
    {%- for tool in tools %}
        {{- "\\n" }}
        {{- tool | tojson }}
    {%- endfor %}
    {{- "\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\"name\\": <function-name>, \\"arguments\\": <args-json-object>}\\n</tool_call><|im_end|>\\n" }}\n{%- else %}
    {%- if messages[0][\'role\'] == \'system\' %}
        {{- \'<|im_start|>system\\n\' + messages[0][\'content\'] + \'<|im_end|>\\n\' }}
    {%- else %}
        {{- \'<|im_start|>system\\n{system_message}<|im_end|>\\n\' }}
    {%- endif %}\n{%- endif %}\n{%- for message in messages %}
    {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
        {{- \'<|im_start|>\' + message.role + \'\\n\' + message.content + \'<|im_end|>\' + \'\\n\' }}
    {%- elif message.role == "assistant" %}
        {{- \'<|im_start|>\' + message.role }}
        {%- if message.content %}
            {{- \'\\n\' + message.content }}
        {%- endif %}
        {%- for tool_call in message.tool_calls %}
            {%- if tool_call.function is defined %}
                {%- set tool_call = tool_call.function %}
            {%- endif %}
            {{- \'\\n<tool_call>\\n{"name": "\' }}
            {{- tool_call.name }}
            {{- \'", "arguments": \' }}
            {{- tool_call.arguments | tojson }}
            {{- \'}\\n</tool_call>\' }}
        {%- endfor %}
        {{- \'<|im_end|>\\n\' }}
    {%- elif message.role == "tool" %}
        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}            {{- \'<|im_start|>user\' }}
        {%- endif %}
        {{- \'\\n<tool_response>\\n\' }}
        {{- message.content }}
        {{- \'\\n</tool_response>\' }}
        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
            {{- \'<|im_end|>\\n\' }}
        {%- endif %}
    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}
    {{- \'<|im_start|>assistant\\n\' }}
{%- endif %}
"""


# Ollama from https://ollama.com/library/qwen2.5/blobs/eb4402837c78
qwen25_ollama = _ollama_template("qwen-2.5")

qwen25_template_eos_token = "eos_token"
qwen25_default_system_message = "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."
CHAT_TEMPLATES["qwen-2.5"] = (qwen25_template, qwen25_template_eos_token, False, qwen25_ollama,)
DEFAULT_SYSTEM_MESSAGE["qwen-2.5"] = qwen25_default_system_message # No system message in Qwen 2.5

CHAT_TEMPLATES["qwen-25"]  = (qwen25_template, qwen25_template_eos_token, False, qwen25_ollama,)
DEFAULT_SYSTEM_MESSAGE["qwen-25"] = qwen25_default_system_message # No system message in Qwen 2.5

CHAT_TEMPLATES["qwen25"]   = (qwen25_template, qwen25_template_eos_token, False, qwen25_ollama,)
DEFAULT_SYSTEM_MESSAGE["qwen25"] = qwen25_default_system_message # No system message in Qwen 2.5

CHAT_TEMPLATES["qwen2.5"]  = (qwen25_template, qwen25_template_eos_token, False, qwen25_ollama,)
DEFAULT_SYSTEM_MESSAGE["qwen2.5"] = qwen25_default_system_message # No system message in Qwen 2.5

# =========================================== Phi-4
# "{{ bos_token }}"\ # Phi-4 removes BOS?
phi4_template = \
    "{% for message in messages %}"\
        "{% if (message['role'] == 'system') %}"\
            "{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}"\
        "{% elif (message['role'] == 'user') %}"\
            "{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|>'}}"\
        "{% elif (message['role'] == 'assistant') %}"\
            "{{'<|im_start|>assistant<|im_sep|>' + message['content'] + '<|im_end|>'}}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{% if add_generation_prompt %}"\
        "{{ '<|im_start|>assistant<|im_sep|>' }}"\
    "{% endif %}"

_phi4_ollama_template = \
    "{{ if .System }}<|im_start|><|system|><|im_sep|>{{ .System }}<|im_end|>{{ end }}"\
    "{{ if .Prompt }}<|im_start|><|user|><|im_sep|>{{ .Prompt }}<|im_end|>{{ end }}"\
    "<|im_start|><|assistant|><|im_sep|>{{ .Response }}<|im_end|>"

# Ollama from https://www.ollama.com/library/phi4 is different
phi4_ollama = _ollama_template("phi-4")

phi4_template_eos_token = "<|im_end|>"
CHAT_TEMPLATES["phi-4"] = (phi4_template, phi4_template_eos_token, False, phi4_ollama,)
DEFAULT_SYSTEM_MESSAGE["phi-4"] = None # No system message in Phi-4


# =========================================== Gemma-3
# Obtained via
# print(tokenizer.chat_template.replace("}\n", "####").replace("\n", "\\n").replace("####", "}\n"))
gemma3_template = \
"""{{ bos_token }}
{%- if messages[0]['role'] == 'system' -%}
    {%- if messages[0]['content'] is string -%}
        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}
    {%- else -%}
        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}
    {%- endif -%}
    {%- set loop_messages = messages[1:] -%}
{%- else -%}
    {%- set first_user_prefix = "" -%}
    {%- set loop_messages = messages -%}
{%- endif -%}
{%- for message in loop_messages -%}
    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
    {%- endif -%}
    {%- if (message['role'] == 'assistant') -%}
        {%- set role = "model" -%}
    {%- else -%}
        {%- set role = message['role'] -%}
    {%- endif -%}
    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else "") }}
    {%- if message['content'] is string -%}
        {{ message['content'] | trim }}
    {%- elif message['content'] is iterable -%}
        {%- for item in message['content'] -%}
            {%- if item['type'] == 'image' -%}
                {{ '<start_of_image>' }}
            {%- elif item['type'] == 'text' -%}
                {{ item['text'] | trim }}
            {%- endif -%}
        {%- endfor -%}
    {%- else -%}
        {{ raise_exception("Invalid content type") }}
    {%- endif -%}
    {{ '<end_of_turn>\n' }}
{%- endfor -%}
{%- if add_generation_prompt -%}
    {{ '<start_of_turn>model\n' }}
{%- endif -%}
"""

# Ollama from https://ollama.com/library/gemma3/blobs/e0a42594d802
gemma3_ollama = _ollama_template("gemma-3")

gemma3_template_eos_token = "<end_of_turn>"
CHAT_TEMPLATES["gemma-3"] = (gemma3_template, gemma3_template_eos_token, False, gemma3_ollama,)
DEFAULT_SYSTEM_MESSAGE["gemma-3"] = None # No system message in Gemma-3

CHAT_TEMPLATES["gemma3"] = (gemma3_template, gemma3_template_eos_token, False, gemma3_ollama,)
DEFAULT_SYSTEM_MESSAGE["gemma3"] = None # No system message in Gemma-3

# =========================================== Qwen-3
# Official Qwen-3 chat template (see https://ollama.com/library/qwen3/blobs/eb4402837c78)
qwen3_template = \
"""
{%- if tools %}
    {{- '<|im_start|>system\n' }}
    {%- if messages[0].role == 'system' %}
        {{- messages[0].content + '\n\n' }}
    {%- endif %}
    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
    {%- for tool in tools %}
        {{- "\n" }}
        {{- tool | tojson }}
    {%- endfor %}
    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\\"name\\": <function-name>, \\"arguments\\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
    {%- if messages[0].role == 'system' %}
        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
    {%- endif %}
{%- endif %}
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
{%- for forward_message in messages %}
    {%- set index = (messages|length - 1) - loop.index0 %}
    {%- set message = messages[index] %}
    {%- set current_content = message.content if message.content is not none else '' %}
    {%- set tool_start = '<tool_response>' %}
    {%- set tool_start_length = tool_start|length %}
    {%- set start_of_message = current_content[:tool_start_length] %}
    {%- set tool_end = '</tool_response>' %}
    {%- set tool_end_length = tool_end|length %}
    {%- set start_pos = (current_content|length) - tool_end_length %}
    {%- if start_pos < 0 %}
        {%- set start_pos = 0 %}
    {%- endif %}
    {%- set end_of_message = current_content[start_pos:] %}
    {%- if ns.multi_step_tool and message.role == "user" and not(start_of_message == tool_start and end_of_message == tool_end) %}
        {%- set ns.multi_step_tool = false %}
        {%- set ns.last_query_index = index %}
    {%- endif %}
{%- endfor %}
{%- for message in messages %}
    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
    {%- elif message.role == "assistant" %}
        {%- set content = message.content %}
        {%- set reasoning_content = '' %}
        {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
            {%- set reasoning_content = message.reasoning_content %}
        {%- else %}
            {%- if '</think>' in message.content %}
                {%- set content = (message.content.split('</think>')|last).lstrip('\n') %}
                {%- set reasoning_content = (message.content.split('</think>')|first).rstrip('\n') %}
                {%- set reasoning_content = (reasoning_content.split('<think>')|last).lstrip('\n') %}
            {%- endif %}
        {%- endif %}
        {%- if loop.index0 > ns.last_query_index %}
            {%- if loop.last or (not loop.last and reasoning_content) %}
                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
            {%- else %}
                {{- '<|im_start|>' + message.role + '\n' + content }}
            {%- endif %}
        {%- else %}
            {{- '<|im_start|>' + message.role + '\n' + content }}
        {%- endif %}
        {%- if message.tool_calls %}
            {%- for tool_call in message.tool_calls %}
                {%- if (loop.first and content) or (not loop.first) %}
                    {{- '\n' }}
                {%- endif %}
                {%- if tool_call.function %}
                    {%- set tool_call = tool_call.function %}
                {%- endif %}
                {{- '<tool_call>\n{"name": "' }}
                {{- tool_call.name }}
                {{- '", "arguments": ' }}
                {%- if tool_call.arguments is string %}
                    {{- tool_call.arguments }}
                {%- else %}
                    {{- tool_call.arguments | tojson }}
                {%- endif %}
                {{- '}\n</tool_call>' }}
            {%- endfor %}
        {%- endif %}
        {{- '<|im_end|>\n' }}
    {%- elif message.role == "tool" %}
        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
            {{- '<|im_start|>user' }}
        {%- endif %}
        {{- '\n<tool_response>\n' }}
        {{- message.content }}
        {{- '\n</tool_response>' }}
        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
            {{- '<|im_end|>\n' }}
        {%- endif %}
    {%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
    {{- '<|im_start|>assistant\n' }}
    {%- if enable_thinking is defined and enable_thinking is false %}
        {{- '<think>\n\n</think>\n\n' }}
    {%- endif %}
{%- endif %}
"""

qwen3_ollama = _ollama_template("qwen-3")
qwen3_template_eos_token = "<|im_end|>"
CHAT_TEMPLATES["qwen-3"] = (qwen3_template, qwen3_template_eos_token, False, qwen3_ollama,)
DEFAULT_SYSTEM_MESSAGE["qwen-3"] = None # No default system message for Qwen-3

CHAT_TEMPLATES["qwen3"] = (qwen3_template, qwen3_template_eos_token, False, qwen3_ollama,)
DEFAULT_SYSTEM_MESSAGE["qwen3"] = None # No default system message for Qwen-3

# =========================================== Gemma-3n
# Obtained via
# print(tokenizer.chat_template.replace("}\n", "####").replace("\n", "\\n").replace("####", "}\n"))
gemma3n_template = \
"""{{ bos_token }}
{%- if messages[0]['role'] == 'system' -%}
    {%- if messages[0]['content'] is string -%}
        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}
    {%- else -%}
        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}
    {%- endif -%}
    {%- set loop_messages = messages[1:] -%}
{%- else -%}
    {%- set first_user_prefix = "" -%}
    {%- set loop_messages = messages -%}
{%- endif -%}
{%- for message in loop_messages -%}
    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
    {%- endif -%}
    {%- if (message['role'] == 'assistant') -%}
        {%- set role = "model" -%}
    {%- else -%}
        {%- set role = message['role'] -%}
    {%- endif -%}
    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else "") }}
    {%- if message['content'] is string -%}
        {{ message['content'] | trim }}
    {%- elif message['content'] is iterable -%}
        {%- for item in message['content'] -%}
            {%- if item['type'] == 'audio' -%}
                {{ '<audio_soft_token>' }}
            {%- elif item['type'] == 'image' -%}
                {{ '<image_soft_token>' }}
            {%- elif item['type'] == 'text' -%}
                {{ item['text'] | trim }}
            {%- endif -%}
        {%- endfor -%}
    {%- else -%}
        {{ raise_exception("Invalid content type") }}
    {%- endif -%}
    {{ '<end_of_turn>\n' }}
{%- endfor -%}
{%- if add_generation_prompt -%}
    {{'<start_of_turn>model\n'}}
{%- endif -%}
"""

# Ollama from https://ollama.com/library/gemma3n/blobs/e0a42594d802
gemma3n_ollama = _ollama_template("gemma-3n")
gemma3n_template_eos_token = "<end_of_turn>"
CHAT_TEMPLATES["gemma-3n"] = (gemma3n_template, gemma3n_template_eos_token, False, gemma3n_ollama,)
DEFAULT_SYSTEM_MESSAGE["gemma-3n"] = None # No system message in Gemma-3n

CHAT_TEMPLATES["gemma3n"] = (gemma3n_template, gemma3n_template_eos_token, False, gemma3n_ollama,)
DEFAULT_SYSTEM_MESSAGE["gemma3n"] = None # No system message in Gemma-3n

# =========================================== GPT-OSS
# Obtained via
# print(tokenizer.chat_template.replace("}\n", "####").replace("\n", "\\n").replace("####", "}\n"))
gptoss_template = \
"""{#-
  In addition to the normal inputs of `messages` and `tools`, this template also accepts the
  following kwargs:
  - "builtin_tools": A list, can contain "browser" and/or "python".
  - "model_identity": A string that optionally describes the model identity.
  - "reasoning_effort": A string that describes the reasoning effort, defaults to "medium".
 #}

{#- Tool Definition Rendering ============================================== #}
{%- macro render_typescript_type(param_spec, required_params, is_nullable=false) -%}
    {%- if param_spec.type == "array" -%}
        {%- if param_spec['items'] -%}
            {%- if param_spec['items']['type'] == "string" -%}
                {{- "string[]" }}
            {%- elif param_spec['items']['type'] == "number" -%}
                {{- "number[]" }}
            {%- elif param_spec['items']['type'] == "integer" -%}
                {{- "number[]" }}
            {%- elif param_spec['items']['type'] == "boolean" -%}
                {{- "boolean[]" }}
            {%- else -%}
                {%- set inner_type = render_typescript_type(param_spec['items'], required_params) -%}
                {%- if inner_type == "object | object" or inner_type|length > 50 -%}
                    {{- "any[]" }}
                {%- else -%}
                    {{- inner_type + "[]" }}
                {%- endif -%}
            {%- endif -%}
            {%- if param_spec.nullable -%}
                {{- " | null" }}
            {%- endif -%}
        {%- else -%}
            {{- "any[]" }}
            {%- if param_spec.nullable -%}
                {{- " | null" }}
            {%- endif -%}
        {%- endif -%}
    {%- elif param_spec.type is defined and param_spec.type is iterable and param_spec.type is not string and param_spec.type is not mapping and param_spec.type[0] is defined -%}
        {#- Handle array of types like ["object", "object"] from Union[dict, list] #}
        {%- if param_spec.type | length > 1 -%}
            {{- param_spec.type | join(" | ") }}
        {%- else -%}
            {{- param_spec.type[0] }}
        {%- endif -%}
    {%- elif param_spec.oneOf -%}
        {#- Handle oneOf schemas - check for complex unions and fallback to any #}
        {%- set has_object_variants = false -%}
        {%- for variant in param_spec.oneOf -%}
            {%- if variant.type == "object" -%}
                {%- set has_object_variants = true -%}
            {%- endif -%}
        {%- endfor -%}
        {%- if has_object_variants and param_spec.oneOf|length > 1 -%}
            {{- "any" }}
        {%- else -%}
            {%- for variant in param_spec.oneOf -%}
                {{- render_typescript_type(variant, required_params) -}}
                {%- if variant.description %}
                    {{- "// " + variant.description }}
                {%- endif -%}
                {%- if variant.default is defined %}
                    {{ "// default: " + variant.default|tojson }}
                {%- endif -%}
                {%- if not loop.last %}
                    {{- " | " }}
                {% endif -%}
            {%- endfor -%}
        {%- endif -%}
    {%- elif param_spec.type == "string" -%}
        {%- if param_spec.enum -%}
            {{- '"' + param_spec.enum|join('" | "') + '"' -}}
        {%- else -%}
            {{- "string" }}
            {%- if param_spec.nullable %}
                {{- " | null" }}
            {%- endif -%}
        {%- endif -%}
    {%- elif param_spec.type == "number" -%}
        {{- "number" }}
    {%- elif param_spec.type == "integer" -%}
        {{- "number" }}
    {%- elif param_spec.type == "boolean" -%}
        {{- "boolean" }}

    {%- elif param_spec.type == "object" -%}
        {%- if param_spec.properties -%}
            {{- "{\n" }}
            {%- for prop_name, prop_spec in param_spec.properties.items() -%}
                {{- prop_name -}}
                {%- if prop_name not in (param_spec.required or []) -%}
                    {{- "?" }}
                {%- endif -%}
                {{- ": " }}
                {{ render_typescript_type(prop_spec, param_spec.required or []) }}
                {%- if not loop.last -%}
                    {{-", " }}
                {%- endif -%}
            {%- endfor -%}
            {{- "}" }}
        {%- else -%}
            {{- "object" }}
        {%- endif -%}
    {%- else -%}
        {{- "any" }}
    {%- endif -%}
{%- endmacro -%}

{%- macro render_tool_namespace(namespace_name, tools) -%}
    {{- "## " + namespace_name + "\n\n" }}
    {{- "namespace " + namespace_name + " {\n\n" }}
    {%- for tool in tools %}
        {%- set tool = tool.function %}
        {{- "// " + tool.description + "\n" }}
        {{- "type "+ tool.name + " = " }}
        {%- if tool.parameters and tool.parameters.properties %}
            {{- "(_: {\n" }}
            {%- for param_name, param_spec in tool.parameters.properties.items() %}
                {%- if param_spec.description %}
                    {{- "// " + param_spec.description + "\n" }}
                {%- endif %}
                {{- param_name }}
                {%- if param_name not in (tool.parameters.required or []) -%}
                    {{- "?" }}
                {%- endif -%}
                {{- ": " }}
                {{- render_typescript_type(param_spec, tool.parameters.required or []) }}
                {%- if param_spec.default is defined -%}
                    {%- if param_spec.enum %}
                        {{- ", // default: " + param_spec.default }}
                    {%- elif param_spec.oneOf %}
                        {{- "// default: " + param_spec.default }}
                    {%- else %}
                        {{- ", // default: " + param_spec.default|tojson }}
                    {%- endif -%}
                {%- endif -%}
                {%- if not loop.last %}
                    {{- ",\n" }}
                {%- else %}
                    {{- ",\n" }}
                {%- endif -%}
            {%- endfor %}
            {{- "}) => any;\n\n" }}
        {%- else -%}
            {{- "() => any;\n\n" }}
        {%- endif -%}
    {%- endfor %}
    {{- "} // namespace " + namespace_name }}
{%- endmacro -%}

{%- macro render_builtin_tools(browser_tool, python_tool) -%}
    {%- if browser_tool %}
        {{- "## browser\n\n" }}
        {{- "// Tool for browsing.\n" }}
        {{- "// The `cursor` appears in brackets before each browsing display: `[{cursor}]`.\n" }}
        {{- "// Cite information from the tool using the following format:\n" }}
        {{- "// `【{cursor}†L{line_start}(-L{line_end})?】`, for example: `【6†L9-L11】` or `【8†L3】`.\n" }}
        {{- "// Do not quote more than 10 words directly from the tool output.\n" }}
        {{- "// sources=web (default: web)\n" }}
        {{- "namespace browser {\n\n" }}
        {{- "// Searches for information related to `query` and displays `topn` results.\n" }}
        {{- "type search = (_: {\n" }}
        {{- "query: string,\n" }}
        {{- "topn?: number, // default: 10\n" }}
        {{- "source?: string,\n" }}
        {{- "}) => any;\n\n" }}
        {{- "// Opens the link `id` from the page indicated by `cursor` starting at line number `loc`, showing `num_lines` lines.\n" }}
        {{- "// Valid link ids are displayed with the formatting: `【{id}†.*】`.\n" }}
        {{- "// If `cursor` is not provided, the most recent page is implied.\n" }}
        {{- "// If `id` is a string, it is treated as a fully qualified URL associated with `source`.\n" }}
        {{- "// If `loc` is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available.\n" }}
        {{- "// Use this function without `id` to scroll to a new location of an opened page.\n" }}
        {{- "type open = (_: {\n" }}
        {{- "id?: number | string, // default: -1\n" }}
        {{- "cursor?: number, // default: -1\n" }}
        {{- "loc?: number, // default: -1\n" }}
        {{- "num_lines?: number, // default: -1\n" }}
        {{- "view_source?: boolean, // default: false\n" }}
        {{- "source?: string,\n" }}
        {{- "}) => any;\n\n" }}
        {{- "// Finds exact matches of `pattern` in the current page, or the page given by `cursor`.\n" }}
        {{- "type find = (_: {\n" }}
        {{- "pattern: string,\n" }}
        {{- "cursor?: number, // default: -1\n" }}
        {{- "}) => any;\n\n" }}
        {{- "} // namespace browser\n\n" }}
    {%- endif -%}

    {%- if python_tool %}
        {{- "## python\n\n" }}
        {{- "Use this tool to execute Python code in your chain of thought. The code will not be shown to the user. This tool should be used for internal reasoning, but not for code that is intended to be visible to the user (e.g. when creating plots, tables, or files).\n\n" }}
        {{- "When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 120.0 seconds. The drive at '/mnt/data' can be used to save and persist user files. Internet access for this session is UNKNOWN. Depends on the cluster.\n\n" }}
    {%- endif -%}
{%- endmacro -%}

{#- System Message Construction ============================================ #}
{%- macro build_system_message() -%}
    {%- if model_identity is not defined %}
        {%- set model_identity = "You are ChatGPT, a large language model trained by OpenAI." %}
    {%- endif %}
    {{- model_identity + "\n" }}
    {{- "Knowledge cutoff: 2024-06\n" }}
    {{- "Current date: " + strftime_now("%Y-%m-%d") + "\n\n" }}
    {%- if reasoning_effort is not defined %}
        {%- set reasoning_effort = "medium" %}
    {%- endif %}
    {{- "Reasoning: " + reasoning_effort + "\n\n" }}
    {%- if builtin_tools is defined and builtin_tools is not none %}
        {{- "# Tools\n\n" }}
        {%- set available_builtin_tools = namespace(browser=false, python=false) %}
        {%- for tool in builtin_tools %}
            {%- if tool == "browser" %}
                {%- set available_builtin_tools.browser = true %}
            {%- elif tool == "python" %}
                {%- set available_builtin_tools.python = true %}
            {%- endif %}
        {%- endfor %}
        {{- render_builtin_tools(available_builtin_tools.browser, available_builtin_tools.python) }}
    {%- endif -%}
    {{- "# Valid channels: analysis, commentary, final. Channel must be included for every message." }}
    {%- if tools -%}
        {{- "\nCalls to these tools must go to the commentary channel: 'functions'." }}
    {%- endif -%}
{%- endmacro -%}

{#- Main Template Logic ================================================= #}
{#- Set defaults #}

{#- Render system message #}
{{- "<|start|>system<|message|>" }}
{{- build_system_message() }}
{{- "<|end|>" }}

{#- Extract developer message #}
{%- if developer_instructions is defined and developer_instructions is not none %}
    {%- set developer_message = developer_instructions %}
    {%- set loop_messages = messages %}
{%- elif messages[0].role == "developer" or messages[0].role == "system" %}
    {%- set developer_message = messages[0].content %}
    {%- set loop_messages = messages[1:] %}
{%- else %}
    {%- set developer_message = "" %}
    {%- set loop_messages = messages %}
{%- endif %}

{#- Render developer message #}
{%- if developer_message or tools %}
    {{- "<|start|>developer<|message|>" }}
    {%- if developer_message %}
        {{- "# Instructions\n\n" }}
        {{- developer_message }}
    {%- endif %}
    {%- if tools -%}
        {%- if developer_message %}
            {{- "\n\n" }}
        {%- endif %}
        {{- "# Tools\n\n" }}
        {{- render_tool_namespace("functions", tools) }}
    {%- endif -%}
    {{- "<|end|>" }}
{%- endif %}

{#- Render messages #}
{%- set last_tool_call = namespace(name=none) %}
{%- for message in loop_messages -%}
    {#- At this point only assistant/user/tool messages should remain #}
    {%- if message.role == 'assistant' -%}
        {#- Checks to ensure the messages are being passed in the format we expect #}
        {%- if "content" in message %}
            {%- if "<|channel|>analysis<|message|>" in message.content or "<|channel|>final<|message|>" in message.content %}
                {{- raise_exception("You have passed a message containing <|channel|> tags in the content field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") }}
            {%- endif %}
        {%- endif %}
        {%- if "thinking" in message %}
            {%- if "<|channel|>analysis<|message|>" in message.thinking or "<|channel|>final<|message|>" in message.thinking %}
                {{- raise_exception("You have passed a message containing <|channel|> tags in the thinking field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") }}
            {%- endif %}
        {%- endif %}
        {%- if "tool_calls" in message %}
            {#- We need very careful handling here - we want to drop the tool call analysis message if the model #}
            {#- has output a later <|final|> message, but otherwise we want to retain it. This is the only case #}
            {#- when we render CoT/analysis messages in inference. #}
            {%- set future_final_message = namespace(found=false) %}
            {%- for future_message in loop_messages[loop.index:] %}
                {%- if future_message.role == 'assistant' and "tool_calls" not in future_message %}
                    {%- set future_final_message.found = true %}
                {%- endif %}
            {%- endfor %}
            {#- We assume max 1 tool call per message, and so we infer the tool call name #}
            {#- in "tool" messages from the most recent assistant tool call name #}
            {%- set tool_call = message.tool_calls[0] %}
            {%- if tool_call.function %}
                {%- set tool_call = tool_call.function %}
            {%- endif %}
            {%- if message.content and message.thinking %}
                {{- raise_exception("Cannot pass both content and thinking in an assistant message with tool calls! Put the analysis message in one or the other, but not both.") }}
            {%- elif message.content and not future_final_message.found %}
                {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.content + "<|end|>" }}
            {%- elif message.thinking and not future_final_message.found %}
                {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }}
            {%- endif %}
            {{- "<|start|>assistant to=" }}
            {{- "functions." + tool_call.name + "<|channel|>commentary " }}
            {{- (tool_call.content_type if tool_call.content_type is defined else "json") + "<|message|>" }}
            {%- if tool_call.arguments is string %}
                {{- tool_call.arguments }}
            {%- else %}
                {{- tool_call.arguments|tojson }}
            {%- endif %}
            {{- "<|call|>" }}
            {%- set last_tool_call.name = tool_call.name %}
        {%- elif loop.last and not add_generation_prompt %}
            {#- Only render the CoT if the final turn is an assistant turn and add_generation_prompt is false #}
            {#- This is a situation that should only occur in training, never in inference. #}
            {%- if "thinking" in message %}
                {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }}
            {%- endif %}
            {#- <|return|> indicates the end of generation, but <|end|> does not #}
            {#- <|return|> should never be an input to the model, but we include it as the final token #}
            {#- when training, so the model learns to emit it. #}
            {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|end|>" }}
        {%- elif "thinking" in message %}
            {#- CoT is dropped during all previous turns, so we never render it for inference #}
            {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.content + "<|end|>" }}
            {%- set last_tool_call.name = none %}
        {%- else %}
            {#- CoT is dropped during all previous turns, so we never render it for inference #}
            {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|end|>" }}
            {%- set last_tool_call.name = none %}
        {%- endif %}
    {%- elif message.role == 'tool' -%}
        {%- if last_tool_call.name is none %}
            {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }}
        {%- endif %}
        {{- "<|start|>functions." + last_tool_call.name }}
        {%- if message.content is string %}
            {{- " to=assistant<|channel|>commentary<|message|>" + message.content + "<|end|>" }}
        {%- else %}
            {{- " to=assistant<|channel|>commentary<|message|>" + message.content|tojson + "<|end|>" }}
        {%- endif %}
    {%- elif message.role == 'user' -%}
        {{- "<|start|>user<|message|>" + message.content + "<|end|>" }}
    {%- endif -%}
{%- endfor -%}

{#- Generation prompt #}
{%- if add_generation_prompt -%}
<|start|>assistant
{%- endif -%}"""

# Ollama from https://ollama.com/library/gpt-oss
gptoss_ollama = \
'''
FROM {__FILE_LOCATION__}
TEMPLATE """<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI.
Knowledge cutoff: 2024-06
Current date: {{ currentDate }}
{{- if and .IsThinkSet .Think (ne .ThinkLevel "") }}

Reasoning: {{ .ThinkLevel }}
{{- else if or (not .IsThinkSet) (and .IsThinkSet .Think) }}

Reasoning: medium
{{- end }}

{{- $hasNonBuiltinTools := false }}
{{- if .Tools -}}
{{- $hasBrowserSearch := false }}
{{- $hasBrowserOpen := false }}
{{- $hasBrowserFind := false }}
{{- $hasPython := false }}
  {{- range .Tools }}
    {{- if eq .Function.Name "browser.search" -}}{{- $hasBrowserSearch = true -}}
    {{- else if eq .Function.Name "browser.open" -}}{{- $hasBrowserOpen = true -}}
    {{- else if eq .Function.Name "browser.find" -}}{{- $hasBrowserFind = true -}}
    {{- else if eq .Function.Name "python" -}}{{- $hasPython = true -}}
    {{- else }}{{ $hasNonBuiltinTools = true -}}
    {{- end }}
  {{- end }}
{{- if or $hasBrowserSearch $hasBrowserOpen $hasBrowserFind $hasPython }}

# Tools
{{- if or $hasBrowserSearch $hasBrowserOpen $hasBrowserFind }}

## browser

// Tool for browsing.
// The `cursor` appears in brackets before each browsing display: `[{cursor}]`.
// Cite information from the tool using the following format:
// `【{cursor}†L{line_start}(-L{line_end})?】`, for example: `【6†L9-L11】` or `【8†L3】`.
// Do not quote more than 10 words directly from the tool output.
// sources=web (default: web)
namespace browser {
{{- if $hasBrowserSearch }}

// Searches for information related to `query` and displays `topn` results.
type search = (_: {
query: string,
topn?: number, // default: 10
source?: string,
}) => any;
{{- end }}
{{- if $hasBrowserOpen }}

// Opens the link `id` from the page indicated by `cursor` starting at line number `loc`, showing `num_lines` lines.
// Valid link ids are displayed with the formatting: `【{id}†.*】`.
// If `cursor` is not provided, the most recent page is implied.
// If `id` is a string, it is treated as a fully qualified URL associated with `source`.
// If `loc` is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available.
// Use this function without `id` to scroll to a new location of an opened page.
type open = (_: {
id?: number | string, // default: -1
cursor?: number, // default: -1
loc?: number, // default: -1
num_lines?: number, // default: -1
view_source?: boolean, // default: false
source?: string,
}) => any;
{{- end }}
{{- if $hasBrowserFind }}

// Finds exact matches of `pattern` in the current page, or the page given by `cursor`.
type find = (_: {
pattern: string,
cursor?: number, // default: -1
}) => any;
{{- end }}

} // namespace browser
{{- end }}{{/* end if has browser tools */}}
{{- if $hasPython }}

## python

Use this tool to execute Python code in your chain of thought. The code will not be shown to the user. This tool should be used for internal reasoning, but not for code that is intended to be visible to the user (e.g. when creating plots, tables, or files).

When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 120.0 seconds. The drive at '/mnt/data' can be used to save and persist user files. Internet access for this session is UNKNOWN. Depends on the cluster.
{{- end }}{{/* end if hasPython */}}
{{- end }}{{/* end if has any built-in tools */}}
{{- end }}{{/* end if .Tools */}}

# Valid channels: analysis, commentary, final. Channel must be included for every message.{{ if $hasNonBuiltinTools }}
Calls to these tools must go to the commentary channel: 'functions'.
{{- end -}}<|end|>{{/* end of system */ -}}
{{- if or $hasNonBuiltinTools .System -}}
<|start|>developer<|message|>{{- if $hasNonBuiltinTools }}# Tools

## functions

namespace functions {
{{- range .Tools }}
{{- if not (or (eq .Function.Name "browser.search") (eq .Function.Name "browser.open") (eq .Function.Name "browser.find") (eq .Function.Name "python")) }}
{{if .Function.Description }}
// {{ .Function.Description }}
{{- end }}
{{- if and .Function.Parameters.Properties (gt (len .Function.Parameters.Properties) 0) }}
type {{ .Function.Name }} = (_: {
{{- range $name, $prop := .Function.Parameters.Properties }}
{{- if $prop.Description }}
  // {{ $prop.Description }}
{{- end }}
  {{ $name }}: {{ if gt (len $prop.Type) 1 }}{{ range $i, $t := $prop.Type }}{{ if $i }} | {{ end }}{{ $t }}{{ end }}{{ else }}{{ index $prop.Type 0 }}{{ end }},
{{- end }}
}) => any;
{{- else }}
type {{ .Function.Name }} = () => any;
{{- end }}
{{- end }}{{/* end if not browser tool */}}
{{- end }}{{/* end of range .Tools */}}

} // namespace functions
{{- end }}{{/* end if hasNonBuiltinTools */}}
{{- if .System}}

# Instructions

{{ .System }}
{{- end -}}
<|end|>
{{- end -}}
{{- /* Find the index of the last user message */ -}}
{{- $lastUserIdx := -1 }}
{{- $prefillingContent := false }}
{{- $prefillingThinkingOnly := false }}
{{- range $i, $msg := .Messages }}
  {{- $last := eq (len (slice $.Messages $i)) 1 -}}
  {{- if eq $msg.Role "user" }}
    {{- $lastUserIdx = $i }}
  {{- end -}}
  {{- if and $last (eq $msg.Role "assistant") (gt (len $msg.Content) 0) }}
    {{- $prefillingContent = true }}
  {{- else if and $last (eq $msg.Role "assistant") (gt (len $msg.Thinking) 0) }}
    {{- $prefillingThinkingOnly = true }}
  {{- end }}
{{- end -}}
{{- /* Now render messages */ -}}
{{- range $i, $msg := .Messages }}
  {{- $last := eq (len (slice $.Messages $i)) 1 -}}
  {{- if (ne $msg.Role "system") -}}
    {{- if eq $msg.Role "tool" -}}
      {{- if or (eq $msg.ToolName "python") (eq $msg.ToolName "browser.search") (eq $msg.ToolName "browser.open") (eq $msg.ToolName "browser.find") -}}
        <|start|>{{ $msg.ToolName }} to=assistant<|message|>{{ $msg.Content }}<|end|>
      {{- else -}}
        <|start|>functions.{{ $msg.ToolName }} to=assistant<|message|>{{ $msg.Content }}<|end|>
      {{- end -}}
    {{- else if eq $msg.Role "assistant" -}}
      {{- if and $msg.Thinking (gt $i $lastUserIdx) -}}{{- /* Show thinking only after last user message */ -}}
      <|start|>assistant<|channel|>analysis<|message|>{{ $msg.Thinking }}{{- if not $prefillingThinkingOnly -}}<|end|>{{- end -}}
      {{- end -}}
      {{- if gt (len $msg.Content) 0 -}}
        <|start|>assistant<|channel|>final<|message|>{{ $msg.Content }}{{- if not $prefillingContent -}}<|end|>{{- end -}}
      {{- end -}}
      {{- if gt (len $msg.ToolCalls) 0 -}}
        {{- range $j, $toolCall := $msg.ToolCalls -}}
          {{- $isBuiltin := or (eq $toolCall.Function.Name "python") (eq $toolCall.Function.Name "browser.search") (eq $toolCall.Function.Name "browser.open") (eq $toolCall.Function.Name "browser.find") -}}
          <|start|>assistant<|channel|>{{ if $isBuiltin }}analysis{{ else }}commentary{{ end }} to={{ if not $isBuiltin}}functions.{{end}}{{ $toolCall.Function.Name }} <|constrain|>json<|message|>{{ $toolCall.Function.Arguments }}<|call|>
        {{- end -}}
      {{- end -}}
    {{- else if eq $msg.Role "user" -}}
      <|start|>{{ $msg.Role }}<|message|>{{ $msg.Content }}<|end|>
    {{- end }}
  {{- else }}
  {{- end }}
{{- end -}}
{{- if not (or $prefillingContent $prefillingThinkingOnly) -}}
<|start|>assistant
{{- end -}}"""
PARAMETER temperature 1.0
PARAMETER top_k 0
PARAMETER top_p 1.0
'''

gptoss_template_template_eos_token = "<|return|>"
CHAT_TEMPLATES["gpt-oss"] = (gptoss_template, gptoss_template_template_eos_token, False, gptoss_ollama,)
DEFAULT_SYSTEM_MESSAGE["gpt-oss"] = None # No system message in GPT-oss

CHAT_TEMPLATES["gptoss"] = (gptoss_template, gptoss_template_template_eos_token, False, gptoss_ollama,)
DEFAULT_SYSTEM_MESSAGE["gptoss"] = None # No system message in GPT-oss

# =========================================== Qwen3-Instruct
qwen3_instruct_template = \
'''{%- if tools %}
    {{- '<|im_start|>system\\n' }}
    {%- if messages[0].role == 'system' %}
        {{- messages[0].content + '\\n\\n' }}
    {%- endif %}
    {{- "# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>" }}
    {%- for tool in tools %}
        {{- "\\n" }}
        {{- tool | tojson }}
    {%- endfor %}
    {{- "\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\"name\\": <function-name>, \\"arguments\\": <args-json-object>}\\n</tool_call><|im_end|>\\n" }}
{%- else %}
    {%- if messages[0].role == 'system' %}
        {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}
    {%- endif %}
{%- endif %}
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
{%- for message in messages[::-1] %}
    {%- set index = (messages|length - 1) - loop.index0 %}
    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
        {%- set ns.multi_step_tool = false %}
        {%- set ns.last_query_index = index %}
    {%- endif %}
{%- endfor %}
{%- for message in messages %}
    {%- if message.content is string %}
        {%- set content = message.content %}
    {%- else %}
        {%- set content = '' %}
    {%- endif %}
    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
        {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}
    {%- elif message.role == "assistant" %}
        {%- set reasoning_content = '' %}
        {%- if message.reasoning_content is string %}
            {%- set reasoning_content = message.reasoning_content %}
        {%- else %}
            {%- if '</think>' in content %}
                {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}
                {%- set content = content.split('</think>')[-1].lstrip('\\n') %}
            {%- endif %}
        {%- endif %}
        {%- if loop.index0 > ns.last_query_index %}
            {%- if reasoning_content %}
                {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}
            {%- else %}
                {{- '<|im_start|>' + message.role + '\\n' + content }}
            {%- endif %}
        {%- else %}
            {{- '<|im_start|>' + message.role + '\\n' + content }}
        {%- endif %}
        {%- if message.tool_calls %}
            {%- for tool_call in message.tool_calls %}
                {%- if (loop.first and content) or (not loop.first) %}
                    {{- '\\n' }}
                {%- endif %}
                {%- if tool_call.function %}
                    {%- set tool_call = tool_call.function %}
                {%- endif %}
                {{- '<tool_call>\\n{"name": "' }}
                {{- tool_call.name }}
                {{- '", "arguments": ' }}
                {%- if tool_call.arguments is string %}
                    {{- tool_call.arguments }}
                {%- else %}
                    {{- tool_call.arguments | tojson }}
                {%- endif %}
                {{- '}\\n</tool_call>' }}
            {%- endfor %}
        {%- endif %}
        {{- '<|im_end|>\\n' }}
    {%- elif message.role == "tool" %}
        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
            {{- '<|im_start|>user' }}
        {%- endif %}
        {{- '\\n<tool_response>\\n' }}
        {{- content }}
        {{- '\\n</tool_response>' }}
        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
            {{- '<|im_end|>\\n' }}
        {%- endif %}
    {%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
    {{- '<|im_start|>assistant\\n' }}
{%- endif %}'''

qwen3_template_eos_token = "<|im_end|>"
CHAT_TEMPLATES["qwen3-instruct"] = (qwen3_instruct_template, qwen3_template_eos_token, False, _ollama_template("qwen3-instruct"),)
DEFAULT_SYSTEM_MESSAGE["qwen3-instruct"] = None # No system message in Qwen3


# =========================================== Qwen3-Thinking
qwen3_thinking_template = \
'''{%- if tools %}
    {{- '<|im_start|>system\\n' }}
    {%- if messages[0].role == 'system' %}
        {{- messages[0].content + '\\n\\n' }}
    {%- endif %}
    {{- "# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>" }}
    {%- for tool in tools %}
        {{- "\\n" }}
        {{- tool | tojson }}
    {%- endfor %}
    {{- "\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\"name\\": <function-name>, \\"arguments\\": <args-json-object>}\\n</tool_call><|im_end|>\\n" }}
{%- else %}
    {%- if messages[0].role == 'system' %}
        {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}
    {%- endif %}
{%- endif %}
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
{%- for message in messages[::-1] %}
    {%- set index = (messages|length - 1) - loop.index0 %}
    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
        {%- set ns.multi_step_tool = false %}
        {%- set ns.last_query_index = index %}
    {%- endif %}
{%- endfor %}
{%- for message in messages %}
    {%- if message.content is string %}
        {%- set content = message.content %}
    {%- else %}
        {%- set content = '' %}
    {%- endif %}
    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
        {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}
    {%- elif message.role == "assistant" %}
        {%- set reasoning_content = '' %}
        {%- if message.reasoning_content is string %}
            {%- set reasoning_content = message.reasoning_content %}
        {%- else %}
            {%- if '</think>' in content %}
                {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}
                {%- set content = content.split('</think>')[-1].lstrip('\\n') %}
            {%- endif %}
        {%- endif %}
        {%- if loop.index0 > ns.last_query_index %}
            {%- if loop.last or (not loop.last and reasoning_content) %}
                {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}
            {%- else %}
                {{- '<|im_start|>' + message.role + '\\n' + content }}
            {%- endif %}
        {%- else %}
            {{- '<|im_start|>' + message.role + '\\n' + content }}
        {%- endif %}
        {%- if message.tool_calls %}
            {%- for tool_call in message.tool_calls %}
                {%- if (loop.first and content) or (not loop.first) %}
                    {{- '\\n' }}
                {%- endif %}
                {%- if tool_call.function %}
                    {%- set tool_call = tool_call.function %}
                {%- endif %}
                {{- '<tool_call>\\n{"name": "' }}
                {{- tool_call.name }}
                {{- '", "arguments": ' }}
                {%- if tool_call.arguments is string %}
                    {{- tool_call.arguments }}
                {%- else %}
                    {{- tool_call.arguments | tojson }}
                {%- endif %}
                {{- '}\\n</tool_call>' }}
            {%- endfor %}
        {%- endif %}
        {{- '<|im_end|>\\n' }}
    {%- elif message.role == "tool" %}
        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
            {{- '<|im_start|>user' }}
        {%- endif %}
        {{- '\\n<tool_response>\\n' }}
        {{- content }}
        {{- '\\n</tool_response>' }}
        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
            {{- '<|im_end|>\\n' }}
        {%- endif %}
    {%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
    {{- '<|im_start|>assistant\n<think>\n' }}
{%- endif %}'''

CHAT_TEMPLATES["qwen3-thinking"] = (
    qwen3_thinking_template,
    qwen3_template_eos_token,
    False,
    _ollama_template("qwen3-thinking"),
)
DEFAULT_SYSTEM_MESSAGE["qwen3-thinking"] = None # No system message in Qwen3


# =========================================== Liquid-LFM2
liquid_lfm2_template = \
'''
{{bos_token}}{% for message in messages %}{{'<|im_start|>' + message['role'] + '
' + message['content'] + '<|im_end|>' + '
'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
' }}{% endif %}'''

liquid_lfm2_template_eos_token = "<|im_end|>"
CHAT_TEMPLATES["lfm-2"] = (liquid_lfm2_template, liquid_lfm2_template_eos_token, False, None)
DEFAULT_SYSTEM_MESSAGE["lfm-2"] = None # No system message in Phi-3


# =========================================== Starling-LM

starling_template = \
"""{{ bos_token }}
{%- for message in messages %}
    {{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>' }}
{%- endfor %}
{%- if add_generation_prompt %}
    {{ 'GPT4 Correct Assistant:' }}
{%- endif %}"""

# Ollama from https://ollama.com/library/starling-lm:7b/blobs/4b21bfc435b4
starling_ollama = _ollama_template("starling")

starling_template_eos_token = "<|end_of_turn|>"
CHAT_TEMPLATES["starling"] = (starling_template, starling_template_eos_token, False, starling_ollama)
DEFAULT_SYSTEM_MESSAGE["starling"] = None


# =========================================== Yi-chat

yi_chat_template = \
"""
{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '
' + message['content'] + '<|im_end|>' + '
'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
' }}{% endif %}
"""

# Ollama from https://ollama.com/library/yi:34b-chat/blobs/62fbfd9ed093
yi_chat_ollama = _ollama_template("yi-chat")

yi_chat_template_eos_token = "<|endoftext|>"
CHAT_TEMPLATES["yi-chat"] = (yi_chat_template, yi_chat_template_eos_token, False, yi_chat_ollama)
DEFAULT_SYSTEM_MESSAGE["yi-chat"] = None

def _change_system_message(template: str, type_chat_template: str, system_message: str = None):
    system_message_pattern = r"\{system_message\}"

    # For predefined templates, check if default system message exists
    default_system_message = DEFAULT_SYSTEM_MESSAGE.get(f"{type_chat_template}", None)
    if default_system_message is None:
        if system_message is not None:
            logger.warning_once(
                f"Unsloth: You tried to change the system message for {type_chat_template}, "
                "but it doesn't have a default system message. "
                "You need to manually add the system message in your data."
            )
        return template, system_message

    # For custom templates
    if type_chat_template is None:
        has_placeholder = re.search(system_message_pattern, template) is not None

        if has_placeholder:
            if system_message is None:
                raise ValueError("Unsloth: You need to provide a system message for custom templates.")
            new_template = re.sub(system_message_pattern, system_message, template)
            return new_template, system_message

        return template, system_message

    # For predefined templates with default system message
    message_to_use = system_message if system_message is not None else default_system_message
    new_template = re.sub(system_message_pattern, message_to_use, template)

    return new_template, message_to_use


def get_chat_template(
    tokenizer,
    chat_template = "chatml",
    mapping = {"role" : "role", "content" : "content", "user" : "user", "assistant" : "assistant"},
    map_eos_token = True,
    system_message = None,
):
    assert(type(map_eos_token) is bool)
    old_tokenizer = tokenizer

    IS_GEMMA = False
    if tokenizer.__class__.__name__.startswith("Gemma"):
        if chat_template == "chatml": chat_template = "gemma_chatml"
        IS_GEMMA = True

    # We add a check for Llama-3
    # if chat_template == "llama-3":
    #     tokenizer._using_llama3_template = True
    # else:
    #     llama3_tokens = set(["<|end_header_id|>", "<|eot_id|>", "<|start_header_id|>"])
    #     check_llama3_tokens = llama3_tokens & set(str(x) for x in tokenizer.added_tokens_decoder.values())
    #     if len(check_llama3_tokens) == len(llama3_tokens):
    #         tokenizer._using_llama3_template = True
    #     pass
    # pass

    # We first check if the tokenizer is a fast one. If not, we cannot convert this!
    is_fast_tokenizer = getattr(tokenizer, "is_fast", False)
    old_padding_side = tokenizer.padding_side

    same_padding_token = False
    type_chat_template = None

    if type(chat_template) in (list, tuple,):
        # For changing system message later
        # Since it's not supported yet, we will raise an error first!
        type_chat_template = chat_template[0].lower()
        chat_template, stop_word = chat_template
        assert(type(chat_template) is str)
        assert(type(stop_word) is str)
        ollama_modelfile = None

    elif type(chat_template) is str:
        # For changing system message later
        type_chat_template = chat_template.lower()

        chat_template, stop_word, yes_map_eos_token, ollama_modelfile = CHAT_TEMPLATES[chat_template]

        # Check mapping to eos_token
        if not map_eos_token and yes_map_eos_token: map_eos_token = True
        if not yes_map_eos_token and map_eos_token: map_eos_token = False

        if type(stop_word) in (list, tuple,):
            token_mapping, stop_word = stop_word
            assert(type(token_mapping) is dict)
        else:
            token_mapping = None

        assert(type(stop_word) is str)

        # Check fast tokenizer
        if not is_fast_tokenizer:
            pass
            # print(
            #     "Unsloth: Not a fast tokenizer, so can't process it as of yet :(\n"\
            #     "Please log a Github issue if you want this as a new feature!\n"\
            #     "Your chat template will still work, but it won't add or edit tokens."
            # )

        elif token_mapping is not None:
            # token_mapping = {"<start_of_turn>" : "<|im_start|>", "<end_of_turn>" : "<|im_end|>"}
            # For Gemma :)

            string_vocab = tokenizer._tokenizer.to_str()

            skipped = 0
            for old_token, new_token in token_mapping.items():
                old_count = string_vocab.count(f'"{old_token}"')
                new_count = string_vocab.count(f'"{new_token}"')
                if new_count != 0:
                    print(f"{new_token} is already a token. Skipping.")
                    skipped += 1
                elif old_count == 0:
                    raise RuntimeError(f"{old_token} was not part of the tokenizer!")
                else:
                    string_vocab = string_vocab.replace(f'"{old_token}"', f'"{new_token}"')
                pass
            pass

            if map_eos_token and (not stop_word in token_mapping.values()):
                # Do not map 107 = <|im_end|> and 1 = <|im_end|>. This will reduce the vocab size by 1
                logger.warning_once(f"Unsloth: Will map {stop_word} to EOS = {tokenizer.eos_token}.")
                string_vocab = string_vocab.replace(tokenizer.eos_token, stop_word)
            pass

            if skipped != len(token_mapping):
                new_tokenizer = tokenizer._tokenizer.from_str(string_vocab)

                # Careful on pad_token
                old_pad_token = tokenizer.pad_token
                if old_pad_token == tokenizer.eos_token:
                    old_pad_token = stop_word
                    same_padding_token = True
                pass

                if map_eos_token:
                    new_tokenizer = tokenizer.__class__(
                        tokenizer_object = new_tokenizer,
                        eos_token = stop_word,
                        pad_token = old_pad_token,
                    )
                else:
                    new_tokenizer = tokenizer.__class__(
                        tokenizer_object = new_tokenizer,
                        pad_token = old_pad_token,
                    )
                pass

                # Must fix the sentence piece tokenizer since there's no tokenizer.model file!
                tokenizer = fix_sentencepiece_tokenizer(tokenizer, new_tokenizer, token_mapping,)
            else:
                pass

        elif map_eos_token and (stop_word != "eos_token"):
            logger.warning_once(f"Unsloth: Will map {stop_word} to EOS = {tokenizer.eos_token}.")

            # Replaces the old EOS token with a new one.
            # Useful for ChatML <|im_end|> for example.
            # Usually we train 2 more tokens <|im_start|> and <|im_end|>
            # But training the lm_head and embeddings are slow!
            # This is a HACK!
            # Idea from https://huggingface.co/cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser

            old_bos_token = getattr(tokenizer, "bos_token", None)
            old_eos_token = getattr(tokenizer, "eos_token", None)
            old_pad_token = getattr(tokenizer, "pad_token", None)
            old_unk_token = getattr(tokenizer, "unk_token", None)

            string_vocab = tokenizer._tokenizer.to_str()
            # First check if new stop_word is in the tokenizer
            if stop_word in string_vocab:
                # We shall swap them around
                temporary_stop_token = "<|:__TEMP//STOP//TOKEN__:|>"
                string_vocab = string_vocab.replace(old_eos_token, temporary_stop_token)
                string_vocab = string_vocab.replace(stop_word, old_eos_token)
                string_vocab = string_vocab.replace(temporary_stop_token, stop_word)
            else:
                string_vocab = string_vocab.replace(old_eos_token, stop_word)
            pass
            new_tokenizer = tokenizer._tokenizer.from_str(string_vocab)

            # Careful on pad_token
            if old_pad_token == old_eos_token:
                old_pad_token = stop_word
                same_padding_token = True
            pass

            new_tokenizer = tokenizer.__class__(
                tokenizer_object = new_tokenizer,
                bos_token = old_bos_token,
                eos_token = stop_word,
                unk_token = old_unk_token,
                pad_token = old_pad_token,
            )

            # Must fix the sentence piece tokenizer since there's no tokenizer.model file!
            token_mapping = { old_eos_token : stop_word, }
            tokenizer = fix_sentencepiece_tokenizer(tokenizer, new_tokenizer, token_mapping,)
        pass

    else:
        raise TypeError(
            f"Unsloth: `chat_template` must be a tuple of (your_template, eos_token,) or one of\n"\
            f"{CHAT_TEMPLATES.keys()}"
        )

    # Careful on Gemma
    # bos_token is a must or else losses become too high
    if IS_GEMMA and not chat_template.startswith(("{{ bos_token }}", "{{- bos_token }}")):
        chat_template = "{{ bos_token }}" + chat_template

    # For ShareGPT role -> from and content -> value
    new_chat_template = chat_template\
        .replace("'role'",      "'" + mapping["role"]      + "'")\
        .replace("'content'",   "'" + mapping["content"]   + "'")\
        .replace("'user'",      "'" + mapping["user"]      + "'")\
        .replace("'assistant'", "'" + mapping["assistant"] + "'")

    _, tokenizer = patch_tokenizer(model = None, tokenizer = tokenizer)
    tokenizer.padding_side = old_padding_side

    # If not normal HF, we add a check to make old templates work
    if mapping != {"role" : "role", "content" : "content", "user" : "user", "assistant" : "assistant"}:
        chat_template = \
            "{% if 'role' in messages[0] %}" + \
            chat_template + \
            "{% else %}" + \
            new_chat_template + \
            "{% endif %}"
    else:
        chat_template = new_chat_template

    chat_template, system_message = _change_system_message(chat_template, type_chat_template, system_message)

    tokenizer.chat_template = chat_template

    # Also fix up other tokens
    old_pad_token = getattr(old_tokenizer, "pad_token", None)
    old_bos_token = getattr(old_tokenizer, "bos_token", None)
    old_unk_token = getattr(old_tokenizer, "unk_token", None)
    new_pad_token = getattr(tokenizer,     "pad_token", None)
    new_bos_token = getattr(tokenizer,     "bos_token", None)
    new_unk_token = getattr(tokenizer,     "unk_token", None)
    if old_bos_token != new_bos_token: tokenizer.bos_token = old_bos_token
    if old_unk_token != new_unk_token: tokenizer.unk_token = old_unk_token
    if not same_padding_token:
        if old_pad_token != new_pad_token: tokenizer.pad_token = old_pad_token

    # stopping_criteria = create_stopping_criteria(tokenizer, stop_word)

    # Patch saving functions
    tokenizer = patch_saving_functions(tokenizer)

    # Add Ollama
    tokenizer._ollama_modelfile = ollama_modelfile
    tokenizer._system_message   = system_message
    return tokenizer#, stopping_criteria


def remove_special_tokens(tokenizer, prompt):
    # Removes double BOS token
    if prompt.startswith(tokenizer.bos_token):
        prompt = prompt[len(tokenizer.bos_token):]
    return prompt


def _parse_combined_prompt(combined_prompt, dataset):
    # Find {...}
    possible_columns = re.findall(r"\{(.+?)\}", combined_prompt)
    dataset_columns = set(dataset.column_names)
    for column in possible_columns:
        if column not in dataset_columns:
            raise KeyError(
                f"Unsloth: Your prompt includes '{column}' but this does not exist in the dataset. "\
                f"Only allowed columns are {list(dataset_columns)}"
            )

    # Find [[...]]
    optional_prompts = list(re.finditer(r"\[\[.+?\]\]", combined_prompt, flags = re.DOTALL | re.MULTILINE))
    optional_prompts = [(x.span(), x.group(0)) for x in optional_prompts]

    final_optional_prompts = []
    if len(optional_prompts) != 0:
        # Add left
        left = optional_prompts[0]
        l = left[0][0]
        if l != 0: final_optional_prompts.append(combined_prompt[:l])

        # Add in between
        for left, right in zip(optional_prompts[:-1], optional_prompts[1:]):
            l, r = left[0][-1], right[0][0]
            final_optional_prompts.append(left)
            if l != r: final_optional_prompts.append(combined_prompt[l : r])
        final_optional_prompts.append(optional_prompts[-1])

        # Add right
        right = optional_prompts[-1]
        r = right[0][1]
        if r != len(combined_prompt): final_optional_prompts.append(combined_prompt[r:])
    else:
        # Just add in the entire string
        final_optional_prompts.append(combined_prompt)

    check_combined = "".join(x if type(x) is str else x[1] for x in final_optional_prompts)
    assert(combined_prompt == check_combined)

    return possible_columns, final_optional_prompts


def _create_formatter(possible_columns, final_optional_prompts, user_column_name):
    columns = list(dict.fromkeys(possible_columns))
    merged_prompt_parts = []
    formatter_templates = []

    for j, optional_prompt in enumerate(final_optional_prompts):
        if type(optional_prompt) is str:
            needed_columns = re.findall(r"\{(.+?)\}", optional_prompt)
            formatter_templates.append(("required", optional_prompt, needed_columns))
            merged_prompt_parts.append(optional_prompt)
            continue

        _, prompt = optional_prompt
        prompt = prompt[2:-2]
        needed_columns = re.findall(r"\{(.+?)\}", prompt)
        if len(needed_columns) == 0:
            raise IndexError("Unsloth: Optional [[...]] blocks must contain at least 1 {column}.")
        optional_name = f"__optional_{j}__"
        formatter_templates.append(("optional", optional_name, prompt, needed_columns))
        merged_prompt_parts.append("{" + optional_name + "}")

    merged_prompt = "".join(merged_prompt_parts)

    def __combined_prompt_processor__(examples):
        if len(examples) == 0:
            return {user_column_name: []}

        first_key = next(iter(examples.keys()), None)
        if first_key is None:
            return {user_column_name: []}
        n_rows = len(examples[first_key])

        texts = []
        for row_idx in range(n_rows):
            row_values = {column: examples[column][row_idx] for column in columns}
            formatter_values = {}

            for formatter_template in formatter_templates:
                if formatter_template[0] == "required":
                    _, _, needed_columns = formatter_template
                    for column in needed_columns:
                        formatter_values[column] = row_values[column]
                    continue

                _, optional_name, prompt, needed_columns = formatter_template
                if row_values[needed_columns[0]] not in (None, ""):
                    prompt_values = {column: row_values[column] for column in needed_columns}
                    formatter_values[optional_name] = prompt.format(**prompt_values)
                else:
                    formatter_values[optional_name] = ""

            texts.append(merged_prompt.format(**formatter_values))

        return {user_column_name: texts}

    return __combined_prompt_processor__


def to_sharegpt(
    dataset,
    merged_prompt = "",
    merged_column_name = "instruction",
    output_column_name = "output",
    remove_unused_columns = True,
    conversation_extension = 1,
    random_state = 3407,
):
    """
    Converts a dataset to ShareGPT style.
    ShareGPT requires only 1 input and 1 output field.
    This means one has to merge multiple columns into 1 for 1 input field.
    Use `conversation_extension` to increase the length of each conversation by randomnly
    selecting a few and packing them into 1.

    merged_prompt = "",                 Prompt to merge columns into 1 input
    merged_column_name = "instruction", Final column name for the input  field
    output_column_name = "output",      Final column name for the output field
    remove_unused_columns = True,
    conversation_extension = 1,         Automatically combines `conversation_extension` convos into 1
    random_state = 3407,
    """
    if "conversations" in dataset.column_names:
        convo = dataset[0]["conversations"]
        if type(convo) is list:
            raise TypeError("Unsloth: Your dataset is probably already in ShareGPT format!")

    possible_columns, final_optional_prompts = _parse_combined_prompt(merged_prompt, dataset)
    formatter = _create_formatter(possible_columns, final_optional_prompts, merged_column_name)
    dataset = dataset.map(formatter, batched = True, desc = "Merging columns")

    def __convert_to_sharegpt__(examples):
        users      = examples[merged_column_name]
        assistants = examples[output_column_name]
        if len(users) != len(assistants):
            raise ValueError(
                "Unsloth: Input and output columns must have matching batch lengths. "
                f"Got {len(users)} {merged_column_name} rows and {len(assistants)} {output_column_name} rows."
            )
        texts = [
            [
                {"from" : "human", "value" : str(user)     },
                {"from" : "gpt",   "value" : str(assistant)},
            ] \
            for user, assistant in zip(users, assistants)
        ]
        return { "conversations" : texts, }

    dataset = dataset.map(
        __convert_to_sharegpt__,
        batched = True,
        desc = "Converting to ShareGPT",
        # Remove unused columns!
        remove_columns = dataset.column_names if remove_unused_columns else None,
    )

    # Randomnly concat conversations to create a long stream!
    from datasets import concatenate_datasets
    n_extensions = max(conversation_extension-1, 0)
    if n_extensions == 0: return dataset

    dataset = dataset.rename_columns({"conversations" : "conversations0"})
    all_shuffled = [dataset]
    for j in range(1, n_extensions+1):
        shuffled = dataset.shuffle(seed = random_state+j).rename_columns({"conversations0" : f"conversations{j}"})
        all_shuffled.append(shuffled)
    dataset = concatenate_datasets(all_shuffled, axis = 1)

    # Combine them into 1
    n_extensions += 1
    conversation_columns = [f"conversations{j}" for j in range(n_extensions)]
    def __combine_conversations__(examples):
        columns = [examples[column] for column in conversation_columns]
        convos = []
        for conversations in zip(*columns):
            merged_conversation = []
            for conversation in conversations:
                merged_conversation.extend(conversation)
            convos.append(merged_conversation)
        return {"conversations" : convos}

    dataset = dataset.map(
        __combine_conversations__,
        batched = True,
        desc = "Extending conversations",
        # Remove unused columns!
        remove_columns = dataset.column_names if remove_unused_columns else None,
    )
    return dataset


def get_ollama_eos_tokens(tokenizer, extra_eos_tokens = []):
    added_tokens_decoder = tokenizer.added_tokens_decoder.values()
    added_tokens_decoder = [str(x) for x in added_tokens_decoder]

    # Remove added_tokens_decoder duplicates
    added_tokens_decoder = list(set(added_tokens_decoder) - set(extra_eos_tokens))

    # Remove BOS
    if getattr(tokenizer, "bos_token", None) is not None:
        added_tokens_decoder = [x for x in added_tokens_decoder if x != tokenizer.bos_token]

    repeatted_tokens = []
    # Join all vocab
    joined_text = "\x01\x00".join(added_tokens_decoder)
    for token in added_tokens_decoder:
        n = len(token)
        repeatted_counts = joined_text.count(token[:n//2])
        # Try finding longer than 1/2 of the token in the rest
        # For eg <|reserved_special_token_0|>, <|reserved_special_token_1|>
        if repeatted_counts > 2:
            for j in range(n//2+1, n):
                if joined_text.count(token[:j]) < repeatted_counts:
                    j -= 1
                    # Remove repeatted tokens to reduce search space
                    joined_text = joined_text.replace(token[:j], "")
                    repeatted_tokens.append(token[:j])
                    break

    # Remove duplicates
    splitted = joined_text.split("\x01\x00")
    final_eos_tokens = [old for old, new in zip(added_tokens_decoder, splitted) if old == new]
    final_eos_tokens += extra_eos_tokens
    final_eos_tokens += repeatted_tokens

    # Remove new lines, spaces and HTML tags
    filtered_eos_tokens = []
    for token in final_eos_tokens:
        if   token.count("\n") == len(token): continue
        elif token.count("▁") == len(token): continue
        elif token.startswith("<") and len(token) <= 2: continue
        elif token.startswith("</") and len(token) == 3: continue
        filtered_eos_tokens.append(token)
    return filtered_eos_tokens


def construct_chat_template( \

tokenizer = None,

chat_template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

{SYSTEM}<|eot_id|><|start_header_id|>user<|end_header_id|>

{INPUT}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{OUTPUT}<|eot_id|><|start_header_id|>user<|end_header_id|>

{INPUT}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{OUTPUT}<|eot_id|>""",

default_system_message = \
    "Below are some instructions that describe some tasks. Write responses that appropriately complete each request.",

extra_eos_tokens = None,
):
    """
    Creates an Ollama modelfile and a HF Jinja template from a custom
    template. You must provide 2x examples of an input & output.
    There is an optional system message as well.

    You must use {INPUT}, {OUTPUT} twice, and {SYSTEM} is optional.
    """
    # Strip only the left
    chat_template = chat_template.lstrip()

    assert(tokenizer is not None)

    if extra_eos_tokens is None: extra_eos_tokens = []
    elif type(extra_eos_tokens) is str: extra_eos_tokens = [extra_eos_tokens,]

    vocab = tokenizer.get_vocab()
    for extra_eos in extra_eos_tokens:
        assert(type(extra_eos) is str)
        if extra_eos not in vocab:
            raise ValueError(f"Unsloth: `{extra_eos}` is not a singular token in the tokenizer.")

    error_msg = \
        "Unsloth: Your prompt template must have 2 examples showing the user input {INPUT} "\
        "and the assistant output {OUTPUT}\n\n"\
        "For example what is not allowed is just:\n"\
        "### Input:\\n{INPUT}\\n\\n### Response:\\n{OUTPUT}\\n\n\n"\
        "What is required is 2x of this:\n"\
        "### Input:\\n{INPUT}\\n\\n### Response:\\n{OUTPUT}\\n"\
        "### Input:\\n{INPUT}\\n\\n### Response:\\n{OUTPUT}\\n"

    # Check for EOS after {OUTPUT}
    if tokenizer.eos_token is not None:
        extra_eos_tokens.insert(0, tokenizer.eos_token)
    if len(extra_eos_tokens) == 0:
        raise RuntimeError(
            "Unsloth: Your tokenizer does not have an EOS token? Please provide one via extra_eos_tokens!"
        )

    # Check tokenizer types
    tokenizer_name = tokenizer.name_or_path.lower()
    if tokenizer_name.startswith(("unsloth/llama-3-8b-instruct", "unsloth/llama-3-70b-instruct")):
        # Add <|eot_id|>
        extra_eos_tokens.append("<|eot_id|>")
    elif ("<|eot_id|>" in extra_eos_tokens or "<|eot_id|>" in chat_template) and \
        tokenizer_name.startswith(("unsloth/llama-3-8b", "unsloth/llama-3-70b")):
        # Warn
        logger.warning(
            "Unsloth: Base llama-3 models did not train <|eot_id|>.\n"\
            "Please use the instruct version or use <|end_of_text|>"
        )
    extra_eos_tokens = list(set(extra_eos_tokens))

    count_eos = 0
    for eos in extra_eos_tokens:
        count_eos += len(re.findall(r"{OUTPUT}" + re.escape(eos), chat_template))

    # This forces you to provide 2 input and outputs
    final_combined_check = False

    try:
        # O(N^2) search finding 2 repeatted pieces of text
        j = len(chat_template)-1
        at_least_one = False
        while j > 0:
            found = chat_template.rfind(chat_template[j:], 0, j)
            if found == -1: break
            j -= 1
            at_least_one = True
        if j > 0: j += 1
        else: raise RuntimeError(error_msg)

        if not at_least_one: raise RuntimeError(error_msg)

        # Must be equivalent to left
        final_combined_check = True

        # Repeatted text
        instruction_response = chat_template[j:]
        if instruction_response.count("{INPUT}") != 1 or instruction_response.count("{OUTPUT}") != 1:
            raise RuntimeError(error_msg)

        # 1st System, Instruction, Output pair
        left  = chat_template[:j]
        # 2nd Instruction, Output pair
        right = chat_template[j:]

        final_combined_check = left if final_combined_check else chat_template

        # Isolate input
        extra_eos_tokens_regex = "|".join(f"(?:{re.escape(x)})" for x in extra_eos_tokens)
        if len(extra_eos_tokens_regex) != 0:
            find_end = f"(?:{extra_eos_tokens_regex})?"
        else:
            find_end = ""
        find_end = r"\{INPUT\}[\s\n]{0,}" + find_end
        input_end = list(re.finditer(find_end, right))
        assert(len(input_end) == 1)
        input_end = input_end[0]
        input_end = input_end.span(0)[1]
        input_part = right[:input_end]

        # Isolate output
        output_part = right[input_end:]

        # Isolate system
        where_system = left.find(input_part)
        system_part = left[:where_system if where_system != -1 else len(left)]

        # Check if the user provided a correct prompt
        combined = system_part + input_part + output_part
        if combined != final_combined_check:
            combined_changed = combined            .replace('\n', '\\n')
            left_changed     = final_combined_check.replace('\n', '\\n')
            raise RuntimeError(
                "Unsloth: The prompt template you provided isn't correct. You gave:\n"\
                f"{combined_changed}\n\n"\
                "But we require the following:\n"\
                f"{left_changed}"
            )
    except:
        ending = chat_template[chat_template.find("{OUTPUT}") + len("{OUTPUT}"):]

        ending = re.escape(ending)
        find_text = "{INPUT}" + ending + "(.+?{OUTPUT}" + ending + ")"
        response_part = re.findall(find_text, chat_template, flags = re.DOTALL | re.MULTILINE)
        response_part = response_part[0]

        for j in range(1, len(response_part)):
            try_find = re.escape(response_part[:j])
            try: found = next(re.finditer("(" + try_find + ").+?\\{INPUT\\}", chat_template, flags = re.DOTALL | re.MULTILINE))
            except: break
        separator = found.group(1)

        response_start = chat_template.find(response_part)
        start_instruction = chat_template[:response_start].rfind(separator)
        if start_instruction == -1: start_instruction = 0
        instruction_part = chat_template[start_instruction:response_start]

        combined = instruction_part + response_part
        where = chat_template.find(combined)
        system_part = chat_template[:where]

        system_part, input_part, output_part = system_part, instruction_part, response_part

    if count_eos == 0:
        logger.warning("Unsloth: We automatically added an EOS token to stop endless generations.")
        eos = extra_eos_tokens[0]
        output_part = output_part + eos

    # Ollama modelfile parts

    # Check bos_token is in system prompt
    ollama_system = system_part
    has_bos_token = False
    always_bos_token = False
    if tokenizer("A").input_ids[0] == getattr(tokenizer, "bos_token_id", None):
        always_bos_token = True
        if ollama_system.startswith(tokenizer.bos_token):
            has_bos_token = True
            ollama_system = ollama_system[len(tokenizer.bos_token):]
    # Check system
    if "{SYSTEM}" in ollama_system:
        system_modelfile = "{{ if .System }}" + ollama_system.replace("{SYSTEM}", "{{ .System }}") + "{{ end }}"
    else:
        system_modelfile = ollama_system
    input_modelfile  = "{{ if .Prompt }}" + input_part .replace("{INPUT}",  "{{ .Prompt }}") + "{{ end }}"
    output_modelfile = output_part.replace("{OUTPUT}", "{{ .Response }}")

    # Ollama EOS
    ollama_eos = get_ollama_eos_tokens(tokenizer, extra_eos_tokens)
    ollama_eos = '\n'.join(f'PARAMETER stop "{eos}"' for eos in ollama_eos)

    # Add temperature and min_p to counteract gibberish
    ollama_eos += "\nPARAMETER temperature 1.5\nPARAMETER min_p 0.1"

    # Ollama modelfile
    part = '"""'
    modelfile = 'FROM {__FILE_LOCATION__}\n\n'\
    'TEMPLATE ' + part + system_modelfile + input_modelfile + output_modelfile + \
        part + '\n\n' + ollama_eos

    # HF Jinja Chat template
    def process(part, which, content = "message['content']"):
        if part.endswith(which):
            part = "'" + part[:part.find(which)] + f"' + {content}"
        elif part.startswith(which):
            part = f"{content} + '" + part[part.find(which):] + "'"
        else:
            part = "'" + part.replace(which, f"' + {content} + '") + "'"
        if part.startswith("'' + "): part = part[5:]
        return part
    input_jinja  = process(input_part,  "{INPUT}")
    output_jinja = process(output_part, "{OUTPUT}")

    jinja_template = \
        "{% for message in loop_messages %}"\
            "{% if message['role'] == 'user' %}"\
                "{{ " + input_jinja + " }}"\
            "{% elif message['role'] == 'assistant' %}"\
                "{{ " + output_jinja + " }}"\
            "{% else %}"\
                "{{ raise_exception('Only user and assistant roles are supported!') }}"\
            "{% endif %}"\
        "{% endfor %}"\
        "{% if add_generation_prompt %}"\
            "{{ '" + output_part[:output_part.find("{OUTPUT}")] + "' }}"\
        "{% endif %}"

    # Now add system prompt to jinja
    if len(system_part) != 0:
        partial_system = process(system_part, "{SYSTEM}", "messages[0]['content']")
        partial_system = partial_system.replace("{SYSTEM}", "")

        if "{SYSTEM}" in partial_system:
            if default_system_message is None:
                raise RuntimeError("Unsloth: Please specify a default system message!")

        # Separate the BOS
        if has_bos_token:
            partial_system = partial_system.replace(tokenizer.bos_token, "", 1)
            system_part    = system_part   .replace(tokenizer.bos_token, "", 1)

        partial_system = \
            "{% if messages[0]['role'] == 'system' %}"\
                "{{ " + partial_system + " }}"\
                "{% set loop_messages = messages[1:] %}"
        if default_system_message is not None:
            full_system = system_part.replace("{SYSTEM}", default_system_message)
            if "{SYSTEM}" in system_part:
                modelfile += '\nSYSTEM "' + default_system_message + '"'
            partial_system += "{% else %}"\
                "{{ '" + full_system + "' }}"\
                "{% set loop_messages = messages %}"\
            "{% endif %}"
        else:
            partial_system += "{% endif %}"

        jinja_template = partial_system + jinja_template

        if has_bos_token:
            jinja_template = "{{ bos_token }}" + jinja_template

    # Fix missing loop_messages
    if "{% set loop_messages = messages %}" not in jinja_template:
        jinja_template = jinja_template.replace(
            "{% for message in loop_messages %}",
            "{% for message in messages %}",
            1, # Only replace the first one
        )

    # Check if system part is the same!
    jinja_template = re.sub(
        r"\{\% if messages\[0\]\['role'\] \=\= 'system' \%\}\{\{ '(.+?)' \}\}"\
        r"\{\% set loop\_messages \= messages\[1\:\] \%\}"\
        r"\{\% else \%\}\{\{ '\1' \}\}\{\% set loop\_messages \= messages \%\}\{\% endif \%\}"\
        r"\{\% for message in loop\_messages \%\}",
        r"{{ '\1' }}{% for message in messages %}",
        jinja_template, flags = re.MULTILINE | re.DOTALL,
    )

    # Check jinja template for bos
    if always_bos_token:
        if not jinja_template.startswith(("{{ bos_token }}", "{{- bos_token }}")):
            jinja_template = "{{ bos_token }}" + jinja_template

    # Get instruction and output parts for train_on_inputs = False
    input_part  = input_part [:input_part .find("{INPUT}")]
    output_part = output_part[:output_part.find("{OUTPUT}")]
    return modelfile, jinja_template, input_part, output_part


def test_construct_chat_template():
    token = "hf_"
    from transformers import AutoTokenizer
    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", token = token)

    chat_template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

{SYSTEM}<|eot_id|><|start_header_id|>user<|end_header_id|>

{INPUT}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{OUTPUT}<|eot_id|><|start_header_id|>user<|end_header_id|>

{INPUT}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{OUTPUT}<|eot_id|>"""

    default_system_message = \
        "Below are some instructions that describe some tasks. Write responses that appropriately complete each request."

    extra_eos_tokens = None

    modelfile, jinja_template, _, _ = construct_chat_template(
        tokenizer = tokenizer,
        chat_template = chat_template,
        extra_eos_tokens = extra_eos_tokens,
    )

    messages = [
        {"role": "system", "content": "You are an assistant"},
        {"role": "user", "content": "What is 2+2?"},
        {"role": "assistant", "content": "It's 4."},
        {"role": "user", "content": "Ok!"},
        {"role": "assistant", "content": "Anything else?"},
        {"role": "user", "content": "What's 2x2?"},
    ]
    correct_output = tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)

    tokenizer.chat_template = jinja_template
    new_output = tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    assert(correct_output == new_output)


def apply_chat_template( \

dataset,
tokenizer = None,

chat_template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

{SYSTEM}<|eot_id|><|start_header_id|>user<|end_header_id|>

{INPUT}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{OUTPUT}<|eot_id|><|start_header_id|>user<|end_header_id|>

{INPUT}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{OUTPUT}<|eot_id|>""",

default_system_message = \
    "Below are some instructions that describe some tasks. Write responses that appropriately complete each request.",

extra_eos_tokens = None,

):
    """
    Creates an Ollama modelfile and a HF Jinja template from a custom
    template. You must provide 2x examples of an input & output.
    There is an optional system message as well.

    You must use {INPUT}, {OUTPUT} twice, and {SYSTEM} is optional.
    """
    modelfile, jinja_template, input_part, output_part = construct_chat_template(
        tokenizer = tokenizer,
        chat_template = chat_template,
        default_system_message = default_system_message,
        extra_eos_tokens = extra_eos_tokens,
    )
    def formatting_prompts_func(examples):
        convos = examples["conversations"]
        texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
        return { "text" : texts, }

    tokenizer.chat_template = jinja_template
    tokenizer._ollama_modelfile = modelfile
    tokenizer._unsloth_input_part  = input_part
    tokenizer._unsloth_output_part = output_part
    if hasattr(tokenizer, "tokenizer"):
        tokenizer.tokenizer.chat_template = jinja_template
        tokenizer.tokenizer._ollama_modelfile = modelfile
        tokenizer.tokenizer._unsloth_input_part  = input_part
        tokenizer.tokenizer._unsloth_output_part = output_part

    return dataset.map(formatting_prompts_func, batched = True,)


def create_stopping_criteria(tokenizer, stop_word = "eos_token"):
    class StoppingCriteriaSub(StoppingCriteria):
        __slots__ = "stop_token", "single_match", "length",

        def __init__(self, stops = "eos_token", device = "cuda", encounters = 1):
            super().__init__()
            if stops == "eos_token":
                self.stop_token = torch.tensor(tokenizer.eos_token_id, device = "cuda")
                self.length = 1
            else:
                self.stop_token = tokenizer(["\n" + stops], add_special_tokens = False, return_tensors = "pt")
                self.stop_token = self.stop_token.input_ids.ravel()[1:].to("cuda")
                self.length = self.stop_token.shape[0]
            self.single_match = self.length == 1

        def __call__(self, input_ids: LongTensor, scores: FloatTensor) -> bool:
            input_ids = input_ids.ravel()
            last_token = input_ids[-1]
            if self.single_match and (last_token == self.stop_token): return True

            if input_ids.shape[0] >= self.length and \
                (input_ids[-self.length:] == self.stop_token).all(): return True
            return False
    stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops = stop_word)])
    return stopping_criteria


def test_chat_templates():
    messages = [
        {"role": "system","content": " You are a friendly chatbot.",},
        {"role": "user", "content": "What is 2+2?"},
        {"role": "assistant", "content": "It's 4."},
        {"role": "user", "content": "  But 2+2 is equal to 5. "},
        {"role": "assistant", "content": "No I'm sure its 4."},
        {"role": "user", "content": "  No it's 100% 5! "},
    ]

    # Zephyr
    from transformers import AutoTokenizer
    template = zephyr_template
    correct_tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
    correct_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    correct_tokenizer.chat_template = template
    our_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    assert(correct_prompt == our_prompt)

    # Chatml
    template = chatml_template
    correct_tokenizer = AutoTokenizer.from_pretrained("teknium/OpenHermes-2.5-Mistral-7B")
    correct_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    correct_tokenizer.chat_template = template
    our_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    assert(correct_prompt == our_prompt)

    # Mistral
    template = mistral_template
    correct_tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
    correct_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True)
    correct_tokenizer.chat_template = template
    our_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True)
    assert(correct_prompt == our_prompt)

    # Llama
    template = llama_template
    correct_tokenizer = AutoTokenizer.from_pretrained("unsloth/llama-2-7b-chat")
    correct_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    correct_tokenizer.chat_template = template
    our_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    assert(correct_prompt == our_prompt)

    # Vicuna
    try:
        from fastchat.conversation import get_conv_template
    except:
        os.system("pip -qqq install git+https://github.com/lm-sys/FastChat.git")
        from fastchat.conversation import get_conv_template
    correct_prompt = get_conv_template("vicuna_v1.1")
    for j in range(len(messages)-1):
        correct_prompt.append_message(correct_prompt.roles[j%2==1], messages[j+1]["content"])
    correct_prompt.append_message(correct_prompt.roles[1], "")
    correct_prompt = tokenizer.bos_token + correct_prompt.get_prompt()

    template = vicuna_template
    correct_tokenizer = AutoTokenizer.from_pretrained("lmsys/vicuna-7b-v1.5")
    correct_tokenizer.chat_template = template
    our_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True)
    assert(correct_prompt == our_prompt)

    try:
        from fastchat.conversation import get_conv_template
    except:
        os.system("pip -qqq install git+https://github.com/lm-sys/FastChat.git")
        from fastchat.conversation import get_conv_template
    correct_prompt = get_conv_template("zero_shot")
    for j in range(len(messages)-1):
        correct_prompt.append_message(correct_prompt.roles[j%2==1], messages[j+1]["content"])
    correct_prompt.append_message(correct_prompt.roles[1], "")
    correct_prompt = tokenizer.bos_token + correct_prompt.get_prompt()

    template = vicuna_old_template
    correct_tokenizer = AutoTokenizer.from_pretrained("lmsys/vicuna-7b-v1.5")
    correct_tokenizer.chat_template = template
    our_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True)
    # We add </s> ourselves
    assert(correct_prompt == our_prompt.replace("</s>", ""))

    # Gemma
    correct_tokenizer = AutoTokenizer.from_pretrained("unsloth/gemma-7b-it")
    correct_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True)
    correct_tokenizer.chat_template = gemma_template
    our_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True)
    assert(our_prompt == correct_prompt)

    # Llama-3
    template = llama3_template
    correct_tokenizer = AutoTokenizer.from_pretrained("unsloth/llama-3-8b-Instruct")
    correct_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    correct_tokenizer.chat_template = template
    our_prompt = correct_tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
    assert(correct_prompt == our_prompt)

    # Phi-3
    template = phi3_template
    correct_tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
    correct_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True)
    correct_tokenizer.chat_template = template
    our_prompt = correct_tokenizer.apply_chat_template(messages[1:], tokenize = False, add_generation_prompt = True)
    assert(correct_prompt == our_prompt)


def test_hf_gguf_equivalence(tokenizer, gguf_model = "./model-unsloth.F16.gguf"):
    """
        Carefully checks the output of GGUF's tokenization and HF.
        Can catch all tokenization bugs.
    """
    import subprocess
    import re
    messages = [
        {"role": "user", "content": "What is 2+2?"},
        {"role": "assistant", "content": "It's 4."},
        {"role": "user", "content": "  But 2+2 is equal to 5. "},
        {"role": "assistant", "content": "No I'm sure its 4."},
        {"role": "user", "content": "  No it's 100% 5! "},
    ]

    prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

    ### Instruction:
    {}

    ### Input:
    {}

    ### Response:
    {}""".format(
        "Describe the city given eloquently.", # instruction
        "The lost city of Atlantis.", # input
        "", # output - leave this blank for generation!
    )
    prompts = [ prompt, ]

    if tokenizer.chat_template is not None:
        prompt = tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt = True)
        prompt = remove_special_tokens(tokenizer, prompt)
        prompts.append(prompt)

    for prompt in prompts:
        # Use a list of args with shell=False so prompt content is passed literally.
        command = [
            "./llama.cpp/llama-cli",
            "-m", gguf_model,
            "-n", "0",
            "--temp", "0.0",
            "--verbose-prompt",
            "--check-tensors",
            "-p", prompt,
        ]

        datas = []
        with subprocess.Popen(command, shell = False, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = 1) as sp:
            for line in sp.stdout:
                datas.append(line.decode("utf-8", errors = "replace"))
        gguf_tokens = "".join(datas)

        # Now extract GGUF tokenization attempt
        gguf_tokenized = re.findall(r"([\d]{1,}) \-\> \'([^\']{1,})\'", gguf_tokens, flags = re.MULTILINE)
        gguf_tokenized = [(int(x[0]), x[1],) for x in gguf_tokenized]
        input_ids = tokenizer(prompt).input_ids

        tokens = tokenizer.batch_decode(input_ids)
        hf_tokenized = list(zip(input_ids, tokens))

        # Compare to Huggingface
        for j, (hf_token, gguf_token) in enumerate(zip(hf_tokenized, gguf_tokenized)):
            if (hf_token[0] != gguf_token[0]):
                print("Failed GGUF != HF at", j)
                print("HF =", hf_token)
                print("GGUF =", gguf_token)
                print(hf_tokenized)
                print()
                print(gguf_tokenized)
                print()
                raise RuntimeError("Failed comparing GGUF to HF.")
    return True


================================================
FILE: unsloth/dataprep/__init__.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .synthetic import *
from .raw_text import *


================================================
FILE: unsloth/dataprep/raw_text.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import re
import json
import csv
from typing import List, Dict, Any, Union, Optional
from datasets import Dataset
from pathlib import Path

__all__ = [
    "RawTextDataLoader",
    "TextPreprocessor",
]

SUPPORTED_FORMATS = {
    ".txt": "plain_text",
    ".md": "markdown",
    ".json": "json_lines",
    ".jsonl": "json_lines",
    ".csv": "csv_text_column",
}


class RawTextDataLoader:
    def __init__(self, tokenizer, chunk_size = 2048, stride = 512, return_tokenized = True):
        if chunk_size <= 0:
            raise ValueError(f"chunk_size must be positive, got {chunk_size}")
        if stride >= chunk_size:
            raise ValueError(
                f"stride ({stride}) must be smaller than chunk_size ({chunk_size})"
            )
        self.tokenizer = tokenizer
        self.chunk_size = chunk_size
        self.stride = stride
        self.return_tokenized = return_tokenized

    def detect_format(self, file_path):
        """Auto-detect file format and parse accordingly"""
        extension = Path(file_path).suffix.lower()
        return SUPPORTED_FORMATS.get(extension, "plain_text")

    def load_from_file(self, file_path, return_tokenized = None):
        """Load raw text and convert to dataset"""
        if return_tokenized is None:
            return_tokenized = self.return_tokenized
        file_format = self.detect_format(file_path)
        text_content = self._read_file_by_format(file_path, file_format)
        if not text_content or not text_content.strip():
            raise ValueError(f"File '{file_path}' is empty or contains only whitespace")
        chunks = self.smart_chunk_text(
            text_content, self.chunk_size, self.stride, return_tokenized
        )
        return self.create_causal_dataset(chunks)

    def load_from_files(self, file_paths, return_tokenized = None):
        """Load multiple text files"""
        if return_tokenized is None:
            return_tokenized = self.return_tokenized
        all_chunks = []
        for file_path in file_paths:
            file_format = self.detect_format(file_path)
            text_content = self._read_file_by_format(file_path, file_format)
            chunks = self.smart_chunk_text(
                text_content, self.chunk_size, self.stride, return_tokenized
            )
            all_chunks.extend(chunks)
        return self.create_causal_dataset(all_chunks)

    def chunk_text(self, text, return_tokenized = None):
        """Split text into overlapping chunks"""
        if return_tokenized is None:
            return_tokenized = self.return_tokenized
        return self.smart_chunk_text(
            text, self.chunk_size, self.stride, return_tokenized
        )

    def create_causal_dataset(self, chunks):
        """Create dataset for causal language modeling"""
        if chunks and isinstance(chunks[0], dict):
            # If chunks are already tokenized (dict with input_ids, attention_mask)
            # Reorganize the data structure for Dataset.from_dict
            input_ids = [chunk["input_ids"] for chunk in chunks]
            attention_mask = [chunk["attention_mask"] for chunk in chunks]
            # Labels are same as input_ids for causal LM training
            labels = [list(ids) for ids in input_ids]
            return Dataset.from_dict(
                {
                    "input_ids": input_ids,
                    "attention_mask": attention_mask,
                    "labels": labels,
                }
            )
        else:
            # If chunks are text strings (backward compatibility)
            return Dataset.from_dict({"text": chunks})

    def smart_chunk_text(self, text, chunk_size, stride, return_tokenized = True):
        """
        Intelligent chunking that:
        1. Respects sentence/paragraph boundaries
        2. Handles various text formats (.txt, .md, .json, etc.)
        3. Maintains context with stride overlap
        4. Returns tokenized chunks directly (more efficient) or text chunks
        """
        # First pass: tokenize the entire text to get accurate token counts
        tokenized = self.tokenizer(text, return_tensors = "pt", add_special_tokens = False)
        tokens = tokenized["input_ids"]

        # Handle different tokenizer return formats
        if hasattr(tokens, "__len__") and len(tokens) > 0:
            # If it's a nested structure, get the first element
            if hasattr(tokens[0], "__len__"):
                tokens = tokens[0]
        elif isinstance(tokens, int):
            # If tokenizer returns just a count, create a simple range
            tokens = list(range(tokens))

        if len(tokens) <= chunk_size:
            # Text is small enough to fit in one chunk
            if return_tokenized:
                # Add EOS token to the tokens if available
                eos_token_id = getattr(self.tokenizer, "eos_token_id", None)
                if eos_token_id is not None:
                    tokens = (
                        tokens.tolist() if hasattr(tokens, "tolist") else list(tokens)
                    )
                    tokens.append(eos_token_id)

                # Create attention mask
                attention_mask = [1] * len(tokens)
                return [{"input_ids": tokens, "attention_mask": attention_mask}]
            else:
                eos_token = self.tokenizer.eos_token if self.tokenizer.eos_token else ""
                return [text + eos_token]

        chunks = []
        start_idx = 0

        while start_idx < len(tokens):
            # Calculate end index for this chunk
            end_idx = min(start_idx + chunk_size, len(tokens))

            # Extract tokens for this chunk
            chunk_tokens = tokens[start_idx:end_idx]

            if return_tokenized:
                # Convert to list if it's a tensor
                chunk_tokens_list = (
                    chunk_tokens.tolist()
                    if hasattr(chunk_tokens, "tolist")
                    else list(chunk_tokens)
                )

                # Add EOS token if it's the last chunk or chunk is complete
                if end_idx == len(tokens) or len(chunk_tokens_list) == chunk_size:
                    eos_token_id = getattr(self.tokenizer, "eos_token_id", None)
                    if eos_token_id is not None:
                        chunk_tokens_list.append(eos_token_id)

                # Create attention mask (all tokens are attended to)
                attention_mask = [1] * len(chunk_tokens_list)

                chunks.append(
                    {"input_ids": chunk_tokens_list, "attention_mask": attention_mask}
                )
            else:
                # Decode back to text (backward compatibility)
                chunk_text = self.tokenizer.decode(
                    chunk_tokens, skip_special_tokens = True
                )

                # Add EOS token if it's the last chunk or chunk is complete
                if end_idx == len(tokens) or len(chunk_tokens) == chunk_size:
                    eos_token = (
                        self.tokenizer.eos_token if self.tokenizer.eos_token else ""
                    )
                    chunk_text += eos_token

                chunks.append(chunk_text)

            # Move to next chunk with stride overlap
            if end_idx == len(tokens):
                break
            start_idx += chunk_size - stride

        return chunks

    def _read_file_by_format(self, file_path, file_format):
        """Read file content based on detected format."""
        with open(file_path, "r", encoding = "utf-8") as f:
            if file_format == "plain_text" or file_format == "markdown":
                return f.read()
            elif file_format == "json_lines":
                lines = []
                for line in f:
                    try:
                        data = json.loads(line.strip())
                        text = self._extract_text_from_json(data)
                        if text:
                            lines.append(text)
                    except json.JSONDecodeError:
                        continue
                return "\n\n".join(lines)
            elif file_format == "csv_text_column":
                reader = csv.DictReader(f)
                texts = []
                for row in reader:
                    text = self._extract_text_from_csv_row(row)
                    if text:
                        texts.append(text)
                return "\n\n".join(texts)
        return ""

    def _extract_text_from_json(self, data):
        """Extract text from JSON object using common field names."""
        text_fields = ["text", "content", "message", "body", "description", "prompt"]
        for field in text_fields:
            if field in data and isinstance(data[field], str):
                return data[field]
        return ""

    def _extract_text_from_csv_row(self, row):
        """Extract text from CSV row using common column names."""
        text_columns = ["text", "content", "message", "body", "description", "prompt"]
        for column in text_columns:
            if column in row and row[column]:
                return row[column]
        return ""


class TextPreprocessor:
    def clean_text(self, text):
        """Remove unwanted characters, normalize whitespace"""
        text = re.sub(r"\s+", " ", text)
        text = re.sub(r"[^\x20-\x7E\n\t]", "", text)
        text = text.replace("\r\n", "\n").replace("\r", "\n")
        text = re.sub(r"\n{3,}", "\n\n", text)
        return text.strip()

    def extract_sections(self, text, patterns):
        """Extract specific sections (e.g., code blocks, quotes)"""
        sections = []
        for pattern in patterns:
            matches = re.findall(pattern, text, re.MULTILINE | re.DOTALL)
            sections.extend(matches)
        return sections

    def add_structure_tokens(self, text):
        """Add special tokens for structure (chapters, sections)"""
        text = re.sub(
            r"^# (.+)$", r"<|chapter|>\1<|/chapter|>", text, flags = re.MULTILINE
        )
        text = re.sub(
            r"^## (.+)$", r"<|section|>\1<|/section|>", text, flags = re.MULTILINE
        )
        text = re.sub(
            r"^### (.+)$", r"<|subsection|>\1<|/subsection|>", text, flags = re.MULTILINE
        )
        text = re.sub(
            r"```(\w*)\n(.*?)\n```", r"<|code|\1|>\2<|/code|>", text, flags = re.DOTALL
        )
        return text

    def validate_dataset(self, dataset):
        """
        Check for:
        - Minimum/maximum sequence lengths
        - Character encoding issues
        - Repeated content
        - Empty chunks
        """
        stats = {
            "total_samples": len(dataset),
            "empty_samples": 0,
            "min_length": float("inf"),
            "max_length": 0,
            "avg_length": 0,
            "repeated_content": 0,
            "encoding_issues": 0,
            "warnings": [],
        }

        texts = dataset["text"]
        text_lengths = []
        seen_texts = set()

        for i, text in enumerate(texts):
            if not text or len(text.strip()) == 0:
                stats["empty_samples"] += 1
                continue

            # Check for encoding issues
            try:
                text.encode("utf-8")
            except UnicodeEncodeError:
                stats["encoding_issues"] += 1

            # Calculate lengths
            length = len(text)
            text_lengths.append(length)
            stats["min_length"] = min(stats["min_length"], length)
            stats["max_length"] = max(stats["max_length"], length)

            # Check for repeated content
            text_hash = hash(text.strip())
            if text_hash in seen_texts:
                stats["repeated_content"] += 1
            else:
                seen_texts.add(text_hash)

        # Calculate average length
        if text_lengths:
            stats["avg_length"] = sum(text_lengths) / len(text_lengths)
            stats["min_length"] = (
                stats["min_length"] if stats["min_length"] != float("inf") else 0
            )

        # Generate warnings
        if stats["empty_samples"] > 0:
            stats["warnings"].append(f"Found {stats['empty_samples']} empty samples")

        if stats["repeated_content"] > 0:
            stats["warnings"].append(
                f"Found {stats['repeated_content']} repeated samples"
            )

        if stats["encoding_issues"] > 0:
            stats["warnings"].append(
                f"Found {stats['encoding_issues']} encoding issues"
            )

        if stats["min_length"] < 10:
            stats["warnings"].append("Some samples are very short (< 10 characters)")

        return stats


================================================
FILE: unsloth/dataprep/synthetic.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = [
    "SyntheticDataKit",
]
import subprocess
import threading
from collections import deque
import time
import os

os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
import requests
import torch
import gc
import time
import re
from unsloth_zoo.log import logger
import numpy as np

from .synthetic_configs import (
    synthetic_qa_config,
)


def _load_vllm_utils():
    from unsloth_zoo.vllm_utils import (
        load_vllm,
        patch_vllm,
        delete_vllm,
    )

    return load_vllm, patch_vllm, delete_vllm


def terminate_tree(proc: subprocess.Popen, timeout = 15):
    if proc is None or proc.poll() is not None:
        return

    try:
        import psutil

        parent = psutil.Process(proc.pid)
        for child in parent.children(recursive = True):
            child.terminate()
        parent.terminate()
        parent.wait(timeout = timeout / 2)
        return
    except:
        pass

    if os.name == "nt":
        try:
            subprocess.run(
                ["taskkill", "/T", "/F", "/PID", str(proc.pid)],
                capture_output = True,
                timeout = 5,
            )
            proc.wait(timeout = 1)
            return
        except:
            pass

    proc.kill()
    try:
        proc.wait(timeout = 5)
    except:
        pass


class PipeCapture:
    """Non blocking pipe capture"""

    def __init__(
        self,
        pipe,
        keep_lines = 2000,
        echo = False,
        name = "",
        text = True,
        encoding = "utf-8",
        errors = "replace",
        ready_regex = None,
    ):
        self.pipe = pipe
        self.buf = deque(maxlen = keep_lines)
        self.lock = threading.Lock()
        self.echo = echo
        self.name = name
        self.text = text
        self.encoding = encoding
        self.errors = errors

        self.ready_event = threading.Event()
        self.closed_event = threading.Event()

        self.ready_regex = None
        if ready_regex is not None:
            if not hasattr(ready_regex, "search"):
                ready_regex = re.compile(ready_regex)
            self.ready_regex = ready_regex

        self.t = threading.Thread(target = self._reader, daemon = True)
        self.t.start()

    def _reader(self):
        try:
            sentinel = "" if self.text else b""
            for raw_line in iter(self.pipe.readline, sentinel):
                if not self.text:
                    line = raw_line.decode(self.encoding, self.errors)
                else:
                    line = raw_line
                line = line.rstrip("\r\n")
                if self.echo:
                    if "platform is" not in line:
                        print(f"{self.name}: {line}")

                with self.lock:
                    self.buf.append(line)

                if self.ready_regex is not None and self.ready_regex.search(line):
                    self.ready_event.set()

        finally:
            try:
                self.pipe.close()
            except Exception:
                pass
            self.closed_event.set()

    def wait_for_ready(self, timeout = None):
        return self.ready_event.wait(timeout)

    def has_closed(self):
        return self.closed_event.is_set()

    def wait_until_closed(self, timeout = None):
        return self.closed_event.wait(timeout)

    def tail(self, n = 200):
        with self.lock:
            return "\n".join(list(self.buf)[-n:])


class SyntheticDataKit:
    def __init__(
        self,
        model_name = "unsloth/Llama-3.1-8B-Instruct-unsloth-bnb-4bit",
        max_seq_length = 2048,
        gpu_memory_utilization = 0.98,
        float8_kv_cache = False,
        conservativeness = 1.0,
        token = None,
        timeout = 1200,  # maybe this is not enough for large models if we need to download
        **kwargs,
    ):
        assert type(model_name) is str
        assert type(max_seq_length) is int
        assert type(gpu_memory_utilization) is float
        assert type(float8_kv_cache) is bool
        assert type(conservativeness) is float
        assert token is None or type(token) is str

        self.model_name = model_name
        self.max_seq_length = max_seq_length

        from transformers import AutoConfig, AutoTokenizer

        self.config = AutoConfig.from_pretrained(
            model_name,
            token = token,
        )
        self.tokenizer = AutoTokenizer.from_pretrained(
            model_name,
            token = token,
        )
        load_vllm, patch_vllm, delete_vllm = _load_vllm_utils()
        self._delete_vllm = delete_vllm
        patch_vllm(debug = False)
        engine_args = load_vllm(
            model_name = model_name,
            config = self.config,
            gpu_memory_utilization = gpu_memory_utilization,
            max_seq_length = max_seq_length,
            disable_log_stats = True,
            float8_kv_cache = float8_kv_cache,
            conservativeness = conservativeness,
            return_args = True,
            enable_lora = False,
            use_bitsandbytes = False,
            compilation_config = 3,
            **kwargs,
        )
        if "dtype" in engine_args:
            dtype_val = engine_args["dtype"]
            if dtype_val == torch.float16:
                dtype_val = "float16"
            elif dtype_val == torch.bfloat16:
                dtype_val = "bfloat16"
            elif dtype_val == torch.float32:
                dtype_val = "float32"
            engine_args["dtype"] = dtype_val
            # Convert torch.bfloat16, torch.float16, etc. to valid CLI string
            if hasattr(dtype_val, "name"):
                engine_args["dtype"] = dtype_val.name
            elif isinstance(dtype_val, str) and dtype_val.startswith("torch."):
                engine_args["dtype"] = dtype_val.split(".")[-1]
            # Only allow valid vLLM choices
            valid_dtypes = {"auto", "bfloat16", "float", "float16", "float32", "half"}
            if engine_args["dtype"] not in valid_dtypes:
                engine_args["dtype"] = "auto"
        if "device" in engine_args:
            del engine_args["device"]
        if "model" in engine_args:
            del engine_args["model"]

        subprocess_commands = [
            "vllm",
            "serve",
            str(model_name),
        ]
        for key, value in engine_args.items():
            flag = key.replace("_", "-")
            if key == "compilation_config":
                # [TODO] Unsure why subprocess doesn't process json properly
                # Also -O3 breaks on T4!
                # subprocess_commands += ["-O3",]
                continue
            which = str(value).replace("torch.", "")
            if which == "True":
                # Ignore --enforce-eager True
                subprocess_commands += [
                    "--" + flag,
                ]
            elif which == "False":
                # Ignore flag
                pass
            elif which == "None":
                # Ignore flag
                pass
            else:
                subprocess_commands += [
                    "--" + flag,
                    which,
                ]
        logger.info(subprocess_commands)
        vllm_process = subprocess.Popen(
            subprocess_commands,
            stdout = subprocess.PIPE,
            stderr = subprocess.PIPE,
            start_new_session = True,
        )
        ready_re = re.compile(r"Starting vLLM API server(?:\s+\d+)?\s+on\b")
        self.vllm_process = vllm_process
        self.stdout_capture = PipeCapture(
            vllm_process.stdout,
            keep_lines = 1000,
            echo = True,
            name = "vLLM STDOUT",
            ready_regex = ready_re,
            text = False,
        )
        self.stderr_capture = PipeCapture(
            vllm_process.stderr,
            keep_lines = 2000,
            echo = False,
            name = "vLLM STDERR",
            ready_regex = None,
            text = False,
        )
        # we don't print stderr to console but self.stderr_capture.tail(200) will print the last 200 lines

        ready = self.stdout_capture.wait_for_ready(timeout = timeout)
        if not ready:
            if self.stdout_capture.has_closed() or self.vllm_process.poll() is not None:
                print("Stdout stream ended before readiness message detected.")
                print("\n--- stdout tail ---\n", self.stdout_capture.tail(50))
                print("\n--- stderr tail ---\n", self.stderr_capture.tail(50))
            else:
                print(f"Unsloth: vllm_process failed to load! (timeout={timeout})")
                print("\n--- stdout tail ---\n", self.stdout_capture.tail(50))
                print("\n--- stderr tail ---\n", self.stderr_capture.tail(50))
            terminate_tree(self.vllm_process)
            return
        else:
            print("vLLM Server Ready Detected")

        trial = 0
        while not self.check_vllm_status():
            if trial >= 100:
                print("Unsloth: vllm_process failed to load!")
                print("\n--- stdout tail ---\n", self.stdout_capture.tail(50))
                print("\n--- stderr tail ---\n", self.stderr_capture.tail(50))
                terminate_tree(self.vllm_process)
                return
            trial += 1
            time.sleep(1)
        return

    @staticmethod
    def from_pretrained(
        model_name = "unsloth/Llama-3.1-8B-Instruct-unsloth-bnb-4bit",
        max_seq_length = 2048,
        gpu_memory_utilization = 0.9,
        float8_kv_cache = False,
        conservativeness = 1.0,
        token = None,
        **kwargs,
    ):
        return SyntheticDataKit(
            model_name = model_name,
            max_seq_length = max_seq_length,
            gpu_memory_utilization = gpu_memory_utilization,
            float8_kv_cache = float8_kv_cache,
            conservativeness = conservativeness,
            token = token,
            **kwargs,
        )

    @staticmethod
    def check_vllm_status():
        try:
            response = requests.get("http://localhost:8000/metrics")
            if response.status_code == 200:
                return True
        except requests.exceptions.ConnectionError:
            return False

    def cleanup(self):
        if not hasattr(self, "vllm_process"):
            return

        vllm_process = self.vllm_process
        print("Attempting to terminate the VLLM server gracefully...")
        try:
            vllm_process.terminate()
            vllm_process.wait(timeout = 10)
            print("Server terminated gracefully.")
        except subprocess.TimeoutExpired:
            print(
                "Server did not terminate gracefully after 10 seconds. Forcing kill..."
            )
            vllm_process.kill()
            vllm_process.wait()
            print("Server killed forcefully.")
        except Exception as e:
            print(f"An error occurred while trying to stop the process: {e}")
            try:
                if vllm_process.poll() is None:
                    print("Attempting forceful kill due to error...")
                    vllm_process.kill()
                    vllm_process.wait()
                    print("Server killed forcefully after error.")
            except Exception as kill_e:
                print(f"Error during forceful kill: {kill_e}")
        for _ in range(10):
            torch.cuda.empty_cache()
            gc.collect()

        # Delete vLLM module as well
        if hasattr(self, "_delete_vllm"):
            self._delete_vllm(llm = None)

    def __enter__(self):
        return self

    def __exit__(self, *exc):
        self.cleanup()

    def __del__(self):
        self.cleanup()

    def chunk_data(self, filename = None):
        # Chunks data by max tokens and generation length
        assert filename is not None
        assert os.path.exists(filename)
        assert hasattr(self, "tokenizer")
        if not hasattr(self, "max_seq_length"):
            raise RuntimeError(
                "Please use SynthetidDataKit.from_pretrained(...) first!"
            )
        if not hasattr(self, "overlap") or not hasattr(self, "max_generation_tokens"):
            raise RuntimeError("Please use prepare_qa_generation first!")

        with open(filename, "r", encoding = "utf-8") as f:
            text = f.read()

        max_tokens = (
            self.max_seq_length - self.max_generation_tokens * 2 - 128
        )  # -128 to reduce errors
        if max_tokens <= 5:
            raise RuntimeError("Generation length is way too long!")
        input_ids = self.tokenizer(text, add_special_tokens = False).input_ids

        # Get left and right boundaries
        length = len(input_ids)
        n_chunks = int(np.ceil(length / (max_tokens - self.overlap)))
        boundaries = np.ceil(np.linspace(0, length - self.overlap, n_chunks)).astype(
            int
        )
        boundaries = np.stack((boundaries[:-1], (boundaries + self.overlap)[1:])).T
        boundaries = np.minimum(boundaries, length).tolist()

        # Get extension of filename like .txt
        filename, extension = os.path.splitext(filename)
        if filename.endswith("/"):
            filename = filename[:-1]

        all_filenames = []
        for i, (left, right) in enumerate(boundaries):
            chunked_text = self.tokenizer.decode(input_ids[left:right])
            new_filename = f"{filename}_{i}{extension}"
            all_filenames.append(new_filename)
            with open(new_filename, "w", encoding = "utf-8") as f:
                f.write(chunked_text)
        return all_filenames

    def prepare_qa_generation(
        self,
        output_folder = "data",
        max_generation_tokens = 512,
        temperature = 0.7,
        top_p = 0.95,
        overlap = 64,
        default_num_pairs = 25,
        cleanup_threshold = 1.0,
        cleanup_batch_size = 4,
        cleanup_temperature = 0.3,
    ):
        assert hasattr(self, "model_name")
        assert hasattr(self, "max_seq_length")
        assert max_generation_tokens < self.max_seq_length

        locations = "pdf,html,youtube,docx,ppt,txt,output,generated,cleaned,final"
        locations = locations.split(",")
        for path in locations:
            os.makedirs(os.path.join(output_folder, path), exist_ok = True)

        self.max_generation_tokens = max_generation_tokens

        config = (
            synthetic_qa_config.replace("{data_output_location}", str(output_folder))
            .replace("{model_name}", str(self.model_name))
            .replace("{temperature}", str(temperature))
            .replace("{top_p}", str(top_p))
            .replace(
                "{chunk_size}", str(self.max_seq_length - max_generation_tokens * 2 - 2)
            )
            .replace("{overlap}", str(overlap))
            .replace("{max_tokens}", str(max_generation_tokens))
            .replace("{default_num_pairs}", str(default_num_pairs))
            .replace("{cleanup_threshold}", str(cleanup_threshold))
            .replace("{cleanup_batch_size}", str(cleanup_batch_size))
            .replace("{cleanup_temperature}", str(cleanup_temperature))
        )

        with open("synthetic_data_kit_config.yaml", "w", encoding = "utf-8") as f:
            f.write(config)

        self.overlap = overlap


================================================
FILE: unsloth/dataprep/synthetic_configs.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

synthetic_qa_config = """\
# Master configuration file for Synthetic Data Kit

# Global paths configuration
paths:
  # Input data locations
  input:
    pdf: "{data_output_location}/pdf"
    html: "{data_output_location}/html"
    youtube: "{data_output_location}/youtube"
    docx: "{data_output_location}/docx"
    ppt: "{data_output_location}/ppt"
    txt: "{data_output_location}/txt"

  # Output locations
  output:
    parsed: "{data_output_location}/output"      # Where parsed text files are saved
    generated: "{data_output_location}/generated" # Where generated content is saved
    cleaned: "{data_output_location}/cleaned"     # Where cleaned content is saved
    final: "{data_output_location}/final"         # Where final formatted content is saved

# VLLM server configuration
vllm:
  api_base: "http://localhost:8000/v1" # Base URL for VLLM API
  port: 8000                           # Port for VLLM server
  model: "{model_name}"                # Default model to use
  max_retries: 3                       # Number of retries for API calls
  retry_delay: 1.0                     # Initial delay between retries (seconds)

# Ingest configuration
ingest:
  default_format: "txt"  # Default output format for parsed files
  youtube_captions: "auto"  # Options: "auto", "manual" - caption preference

# LLM generation parameters
generation:
  temperature: {temperature}     # Higher = more creative, lower = more deterministic
  top_p: {top_p}                 # Nucleus sampling parameter
  chunk_size: {chunk_size}       # Size of text chunks for processing
  overlap: {overlap}             # Overlap between chunks to maintain context
  max_tokens: {max_tokens}       # Maximum tokens in LLM responses
  num_pairs: {default_num_pairs} # Default number of QA pairs to generate

# Content cleanup parameters
cleanup:
  threshold: {cleanup_threshold}       # Default quality threshold (1-10)
  batch_size: {cleanup_batch_size}     # Number of items per batch for rating
  temperature: {cleanup_temperature}   # Temperature for rating (lower = more consistent)

# Format conversion parameters
format:
  default: "jsonl"   # Default output format
  include_metadata: true  # Include metadata in output files
  pretty_json: true  # Use indentation in JSON output

# Prompts for different tasks
prompts:
  # Summary generation prompt
  summary: |
    Summarize this document in 3-5 sentences, focusing on the main topic and key concepts.

  # QA pair generation prompt
  qa_generation: |
    Create {num_pairs} question-answer pairs from this text for LLM training.

    Rules:
    1. Questions must be about important facts in the text
    2. Answers must be directly supported by the text
    3. Return JSON format only:

    [
      {{
        "question": "Question 1?",
        "answer": "Answer 1."
      }},
      {{
        "question": "Question 2?",
        "answer": "Answer 2."
      }}
    ]

    Text:
    {text}

  # QA pair rating prompt
  qa_rating: |
    Rate each of these question-answer pairs for quality and return exactly this JSON format:

    [
      {{"question": "same question text", "answer": "same answer text", "rating": n}}
    ]

    Where n is a number from 1-10.

    DO NOT include any text outside of the JSON array, just return valid JSON:

    {pairs}"""


================================================
FILE: unsloth/device_type.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = [
    "is_hip",
    "get_device_type",
    "DEVICE_TYPE",
    "DEVICE_TYPE_TORCH",
    "DEVICE_COUNT",
    "ALLOW_PREQUANTIZED_MODELS",
    "ALLOW_BITSANDBYTES",
]

import torch
import functools
import inspect
from unsloth_zoo.utils import Version


@functools.cache
def is_hip():
    return bool(getattr(getattr(torch, "version", None), "hip", None))


@functools.cache
def get_device_type():
    if hasattr(torch, "cuda") and torch.cuda.is_available():
        if is_hip():
            return "hip"
        return "cuda"
    elif hasattr(torch, "xpu") and torch.xpu.is_available():
        return "xpu"
    # Check torch.accelerator
    if hasattr(torch, "accelerator"):
        if not torch.accelerator.is_available():
            raise NotImplementedError(
                "Unsloth cannot find any torch accelerator? You need a GPU."
            )
        accelerator = str(torch.accelerator.current_accelerator())
        if accelerator in ("cuda", "xpu", "hip"):
            raise RuntimeError(
                f"Unsloth: Weirdly `torch.cuda.is_available()`, `torch.xpu.is_available()` and `is_hip` all failed.\n"
                f"But `torch.accelerator.current_accelerator()` works with it being = `{accelerator}`\n"
                f"Please reinstall torch - it's most likely broken :("
            )
    raise NotImplementedError(
        "Unsloth currently only works on NVIDIA, AMD and Intel GPUs."
    )


DEVICE_TYPE: str = get_device_type()
# HIP fails for autocast and other torch functions. Use CUDA instead
DEVICE_TYPE_TORCH = DEVICE_TYPE
if DEVICE_TYPE_TORCH == "hip":
    DEVICE_TYPE_TORCH = "cuda"


@functools.cache
def get_device_count():
    if DEVICE_TYPE in ("cuda", "hip"):
        return torch.cuda.device_count()
    elif DEVICE_TYPE == "xpu":
        return torch.xpu.device_count()
    else:
        return 1


DEVICE_COUNT: int = get_device_count()

# 4-bit quantization requires a block size of 64
# | Device Type     | Warp Size | Block Size |
# |-----------------|-----------|------------|
# | CUDA            |    32     |     32     |
# | Radeon (Navi)   |    32     |     32     |
# | Instinct (MI)   |    64     |     32     |
#
# Since bitsandbytes 0.49.0, pre-quantized models with 64 blockwise now works
# on Radeon GPUs, but not Instinct MI300x for eg
# See https://github.com/bitsandbytes-foundation/bitsandbytes/pull/1748
#
# Since bitsandbytes 0.49.2, blocksize=64 4-bit quantization is supported on
# CDNA (MI Instinct / gfx9xx) GPUs as well
# See https://github.com/bitsandbytes-foundation/bitsandbytes/pull/1856

ALLOW_PREQUANTIZED_MODELS: bool = True
# HSA_STATUS_ERROR_EXCEPTION checks - sometimes AMD fails for BnB
ALLOW_BITSANDBYTES: bool = True
if DEVICE_TYPE == "hip":
    try:
        import bitsandbytes
    except:
        print(
            "Unsloth: `bitsandbytes` is not installed - 4bit QLoRA unallowed, but 16bit and full finetuning works."
        )
        ALLOW_PREQUANTIZED_MODELS = False
        ALLOW_BITSANDBYTES = False
    if ALLOW_BITSANDBYTES:
        ALLOW_BITSANDBYTES = Version(bitsandbytes.__version__) > Version("0.48.2.dev0")
        if Version(bitsandbytes.__version__) >= Version("0.49.2"):
            pass
        elif Version(bitsandbytes.__version__) >= Version("0.49.0"):
            try:
                # Pre-quantized bitsandbytes models use blocksize 64, so we need to check the GPU
                from bitsandbytes.cextension import ROCM_WARP_SIZE_64

                ALLOW_PREQUANTIZED_MODELS = not ROCM_WARP_SIZE_64
            except Exception as e:
                print(
                    "Unsloth: Checking `from bitsandbytes.cextension import ROCM_WARP_SIZE_64` had error = \n"
                    f"{str(e)}\n"
                    "4bit QLoRA disabled for now, but 16bit and full finetuning works."
                )
                ALLOW_PREQUANTIZED_MODELS = False
                ALLOW_BITSANDBYTES = False
        elif ALLOW_BITSANDBYTES:
            from bitsandbytes.nn.modules import Params4bit

            if "blocksize = 64 if not HIP_ENVIRONMENT else 128" in inspect.getsource(
                Params4bit
            ):
                ALLOW_PREQUANTIZED_MODELS = False


================================================
FILE: unsloth/import_fixes.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import importlib.abc
import importlib.machinery
import importlib.util
from pathlib import Path
from importlib.metadata import version as importlib_version
from packaging.version import Version as TrueVersion
import re
import logging
import textwrap
import warnings
import sys
import functools

# We cannot do from unsloth_zoo.log import logger since FBGEMM might cause seg faults.
UNSLOTH_ENABLE_LOGGING = os.environ.get("UNSLOTH_ENABLE_LOGGING", "0") in (
    "1",
    "True",
    "true",
)
logger = logging.getLogger(__name__)
if UNSLOTH_ENABLE_LOGGING:
    logging.basicConfig(
        level = logging.INFO, format = "[%(name)s|%(levelname)s]%(message)s"
    )
    logger.setLevel(logging.INFO)
else:
    logging.basicConfig(
        level = logging.WARNING, format = "[%(name)s|%(levelname)s]%(message)s"
    )
    logger.setLevel(logging.WARNING)

_AMDGPU_IDS_MISSING_TEXT = "amdgpu.ids: No such file or directory"


def Version(version):
    try:
        new_version = str(version)
        new_version = re.match(r"[0-9\.]{1,}", new_version)
        if new_version is None:
            raise Exception(str(e))
        new_version = new_version.group(0).rstrip(".")
        if new_version != version:
            new_version += ".1"  # Add .1 for dev / alpha / beta / rc
        return TrueVersion(new_version)
    except:
        from inspect import getframeinfo, stack

        caller = getframeinfo(stack()[1][0])
        raise RuntimeError(
            f"Unsloth: Could not get version for `{version}`\n"
            f"File name = [{caller.filename}] Line number = [{caller.lineno}]"
        )


# Ignore logging messages
class HideLoggingMessage(logging.Filter):
    __slots__ = ("text",)

    def __init__(self, text):
        self.text = text

    def filter(self, x):
        return not (self.text in x.getMessage())


class HidePrintMessage:
    def __init__(self, original_stream):
        self._original_stream = original_stream
        self._hidden_texts = []

    def add_filter(self, text):
        self._hidden_texts.append(text)

    def write(self, message):
        if not any(text in message for text in self._hidden_texts):
            self._original_stream.write(message)

    def flush(self):
        self._original_stream.flush()

    def __getattr__(self, name):
        return getattr(self._original_stream, name)


import contextlib
import ctypes

try:
    _libc = ctypes.CDLL(None)
except Exception:
    _libc = None


@contextlib.contextmanager
def suppress_cuda_printf():
    """Suppress CUDA device-side printf by redirecting stdout/stderr fds to /dev/null.

    CUDA device printf (eg CUTLASS "Arch conditional MMA" errors on Blackwell)
    writes to stdout fd 1 at the C level, bypassing Python sys.stdout entirely.
    The existing HidePrintMessage filter on sys.stderr cannot catch these since
    they go to a different fd at a different layer. This context manager redirects
    both fd 1 and fd 2 at the OS level, syncs CUDA, then restores them.
    """
    sys.stdout.flush()
    sys.stderr.flush()
    saved_fds = {}
    try:
        for fd in (1, 2):
            saved_fds[fd] = os.dup(fd)
            devnull = os.open(os.devnull, os.O_WRONLY)
            os.dup2(devnull, fd)
            os.close(devnull)
        yield
    finally:
        try:
            import torch

            if torch.cuda.is_available():
                torch.cuda.synchronize()
        except Exception:
            pass
        if _libc is not None:
            try:
                _libc.fflush(None)
            except Exception:
                pass
        for fd, saved in saved_fds.items():
            os.dup2(saved, fd)
            os.close(saved)


if not UNSLOTH_ENABLE_LOGGING:
    import sys

    # Apply to stderr for FBGEMM and CUTLASS errors
    sys.stderr = HidePrintMessage(sys.stderr)
    # https://github.com/pytorch/FBGEMM/blob/d99cd96490ec4aabac2ee95b1e76ea4dcfcfa628/fbgemm_gpu/experimental/gemm/triton_gemm/utils.py#L43-L52
    sys.stderr.add_filter("TMA benchmarks will be running")
    # CUTLASS/FBGEMM MMA instruction error on SM90 vs SM100 (Blackwell) GPUs
    # https://github.com/NVIDIA/cutlass/blob/main/include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized.hpp
    sys.stderr.add_filter("Arch conditional MMA instruction used without targeting")
    # CUTLASS arch conditional errors for various architectures
    sys.stderr.add_filter("CUTE_INVALID_CONTROL_PATH")
    # CUTLASS TMA-related errors when not targeting correct architecture
    sys.stderr.add_filter("Trying to use tma without CUTE_ARCH_TMA")
    # Skipping import of cpp extensions due to incompatible torch version 2.9.0+cu128 for torchao version 0.15.0
    logging.getLogger("torchao").setLevel(logging.ERROR)
    # Also filter torchao print to stderr about cpp extensions
    sys.stderr.add_filter("Skipping import of cpp extensions")
    # SyntaxWarning: invalid escape sequence '\.'
    warnings.filterwarnings(
        "ignore", message = "invalid escape sequence", category = SyntaxWarning
    )
    # PYTORCH_CUDA_ALLOC_CONF is deprecated warning from torch
    warnings.filterwarnings("ignore", message = "PYTORCH_CUDA_ALLOC_CONF is deprecated")
    # TF32 precision deprecation warning from torch
    warnings.filterwarnings(
        "ignore", message = "Please use the new API settings to control TF32"
    )
    # Deprecation warnings from torchao
    warnings.filterwarnings("ignore", message = "`int4_weight_only` is deprecated")
    warnings.filterwarnings("ignore", message = "`int8_weight_only` is deprecated")

    # TorchAO deprecated import paths (https://github.com/pytorch/ao/issues/2752)
    warnings.filterwarnings(
        "ignore",
        message = r"Importing.*from torchao\.dtypes.*is deprecated",
        category = DeprecationWarning,
    )
    warnings.filterwarnings(
        "ignore",
        message = r"Importing BlockSparseLayout from torchao\.dtypes is deprecated",
        category = DeprecationWarning,
    )

    # SWIG builtin type warnings (from bitsandbytes/triton SWIG bindings)
    warnings.filterwarnings(
        "ignore",
        message = r"builtin type Swig.*has no __module__ attribute",
        category = DeprecationWarning,
    )

    # Triton autotuner deprecation (https://github.com/triton-lang/triton/pull/4496)
    warnings.filterwarnings(
        "ignore",
        message = r"warmup, rep, and use_cuda_graph parameters are deprecated",
        category = DeprecationWarning,
    )

    # Python 3.12+ multiprocessing fork warning in multi-threaded processes
    warnings.filterwarnings(
        "ignore",
        message = r".*multi-threaded.*use of fork\(\) may lead to deadlocks",
        category = DeprecationWarning,
    )

    # Resource warnings from internal socket/file operations
    warnings.filterwarnings(
        "ignore", message = r"unclosed.*socket", category = ResourceWarning
    )
    warnings.filterwarnings(
        "ignore", message = r"unclosed file.*dev/null", category = ResourceWarning
    )

    # torch 2.9+ pin_memory/is_pinned device arg deprecation
    warnings.filterwarnings(
        "ignore",
        message = r"The `device` argument is deprecated",
        category = DeprecationWarning,
    )
    warnings.filterwarnings(
        "ignore",
        message = r".*pin_memory.*device.*deprecated",
        category = DeprecationWarning,
    )
    warnings.filterwarnings(
        "ignore",
        message = r".*is_pinned.*device.*deprecated",
        category = DeprecationWarning,
    )

    # vllm "Level is deprecated" stderr noise
    sys.stderr.add_filter("Level is deprecated")

    # PydanticSerializationUnexpectedValue warning
    warnings.filterwarnings(
        "ignore",
        message = r".*PydanticSerializationUnexpectedValue",
    )
    warnings.filterwarnings(
        "ignore",
        message = r"Expected.*but got.*with value.*is not.*subclass",
    )

    # Triton "df: No such file or directory" stderr noise
    sys.stderr.add_filter("df: No such file")
    # ROCm/libdrm missing ids table stderr noise on some AMD setups
    sys.stderr.add_filter(_AMDGPU_IDS_MISSING_TEXT)
    # Apex ROCm fused RoPE backend selection warning when Aiter is enabled.
    warnings.filterwarnings(
        "ignore",
        message = r"^Aiter backend is selected for fused RoPE\.?",
        category = UserWarning,
        module = r"^apex\.transformer\.functional\.fused_rope$",
    )


# Fix up AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'
# MUST do this at the start primarily due to tensorflow causing issues
def fix_message_factory_issue():
    try:
        import google.protobuf.message_factory

        class MessageFactory:
            def CreatePrototype(self, *args, **kwargs):
                return

            def GetMessages(self, *args, **kwargs):
                return

            def GetPrototype(self, *args, **kwargs):
                return

        if not hasattr(google.protobuf.message_factory, "MessageFactory"):
            logger.info("Unsloth: Patching protobuf.MessageFactory as it doesn't exist")
            google.protobuf.message_factory.MessageFactory = MessageFactory
        elif (
            hasattr(google.protobuf.message_factory, "MessageFactory")
            and not hasattr(
                google.protobuf.message_factory.MessageFactory, "GetPrototype"
            )
            and not hasattr(google.protobuf.message_factory, "GetMessageClass")
        ):
            google.protobuf.message_factory.MessageFactory = MessageFactory
            logger.info("Unsloth: Patching protobuf.MessageFactory as it doesn't exist")
        elif (
            hasattr(google.protobuf.message_factory, "MessageFactory")
            and not hasattr(
                google.protobuf.message_factory.MessageFactory, "GetPrototype"
            )
            and hasattr(google.protobuf.message_factory, "GetMessageClass")
        ):
            GetMessageClass = google.protobuf.message_factory.GetMessageClass

            def GetPrototype(self, descriptor):
                return GetMessageClass(descriptor)

            google.protobuf.message_factory.MessageFactory.GetPrototype = GetPrototype
            logger.info("Unsloth: Patching protobuf.MessageFactory.GetPrototype")
        pass
    except:
        pass


# Fix Xformers performance issues since 0.0.25
def fix_xformers_performance_issue():
    spec = importlib.util.find_spec("xformers")
    if spec is None:
        return
    xformers_version = importlib_version("xformers")
    if Version(xformers_version) < Version("0.0.29"):
        xformers_location = spec.origin
        if xformers_location is None:
            xformers_location = spec.submodule_search_locations[0]
        else:
            xformers_location = os.path.split(xformers_location)[0]
        cutlass = Path(xformers_location) / "ops" / "fmha" / "cutlass.py"
        try:
            if cutlass.exists():
                with open(cutlass, "r+", encoding = "utf-8") as f:
                    text = f.read()
                    # See https://github.com/facebookresearch/xformers/issues/1176#issuecomment-2545829591
                    if "num_splits_key=-1," in text:
                        text = text.replace(
                            "num_splits_key=-1,",
                            "num_splits_key=None,",
                        )
                        f.seek(0)
                        f.write(text)
                        f.truncate()
                        logger.info(
                            "Unsloth: Patching Xformers to fix some performance issues."
                        )
        except Exception as e:
            logger.info(f"Unsloth: Failed patching Xformers with error = {str(e)}")


def patch_vllm_for_notebooks():
    import sys

    ipython = None
    try:
        from IPython import get_ipython as _get_ipython
    except Exception:
        _get_ipython = None

    if _get_ipython is not None:
        try:
            ipython = _get_ipython()
        except Exception:
            ipython = None

    if ipython is None:
        try:
            import builtins

            _get_ipython = getattr(builtins, "get_ipython", None)
            if callable(_get_ipython):
                ipython = _get_ipython()
        except Exception:
            ipython = None

    if ipython is None:
        return

    try:
        shell = ipython.__class__.__name__
        is_notebook = shell == "ZMQInteractiveShell" or "google.colab" in str(
            type(ipython)
        )
    except Exception:
        return

    if not is_notebook:
        return

    if not hasattr(sys.stdout, "fileno"):
        return

    needs_patch = False
    try:
        fd = sys.stdout.fileno()
        if not isinstance(fd, int) or fd < 0:
            needs_patch = True
    except Exception:
        needs_patch = True

    if not needs_patch:
        return

    logger.info(
        "Unsloth: Notebook detected - Patching sys.stdout.fileno for newer `vllm>=0.12.0` versions"
    )
    sys.stdout.fileno = lambda: 1


# ValueError: 'aimv2' is already used by a Transformers config, pick another name.
def fix_vllm_aimv2_issue():
    spec = importlib.util.find_spec("vllm")
    if spec is None:
        return
    vllm_version = importlib_version("vllm")
    if Version(vllm_version) < Version("0.10.1"):
        vllm_location = spec.origin
        if vllm_location is None:
            vllm_location = spec.submodule_search_locations[0]
        else:
            vllm_location = os.path.split(vllm_location)[0]
        ovis_config = Path(vllm_location) / "transformers_utils" / "configs" / "ovis.py"
        try:
            if ovis_config.exists():
                with open(ovis_config, "r+", encoding = "utf-8") as f:
                    text = f.read()
                    # See https://github.com/vllm-project/vllm-ascend/issues/2046
                    if 'AutoConfig.register("aimv2", AIMv2Config)' in text:
                        text = text.replace(
                            'AutoConfig.register("aimv2", AIMv2Config)',
                            "",
                        )
                        text = text.replace(
                            """backbone_config.pop('model_type')
                backbone_config = AutoConfig.for_model(model_type,
                                                       **backbone_config)""",
                            """if model_type != "aimv2":
                    backbone_config.pop('model_type')
                    backbone_config = AutoConfig.for_model(model_type, **backbone_config)
                else:
                    backbone_config = AIMv2Config(**backbone_config)""",
                        )
                        f.seek(0)
                        f.write(text)
                        f.truncate()
                        logger.info(
                            "Unsloth: Patching vLLM to fix `'aimv2' is already used by a Transformers config, pick another name.`"
                        )
        except Exception as e:
            logger.info(f"Unsloth: Failed patching vLLM with error = {str(e)}")


def fix_vllm_guided_decoding_params():
    def _maybe_raise_vllm_transformers_mismatch(error):
        error_text = str(error)
        if (
            "ALLOWED_LAYER_TYPES" in error_text
            or "transformers.configuration_utils" in error_text
        ):
            try:
                vllm_version = importlib_version("vllm")
            except Exception:
                vllm_version = "unknown"
            raise RuntimeError(
                "Unsloth: vLLM with version "
                f"{vllm_version} does not yet support transformers>=5.0.0. "
                "Please downgrade to transformers==4.57.3 via "
                'pip install --force-reinstall "transformers==4.57.3". '
                f"Original error: {error}"
            ) from error

    if importlib.util.find_spec("vllm") is None:
        return
    # GuidedDecodingParmas is renamed to StructuredOutputsParams in vLLM
    # https://github.com/vllm-project/vllm/pull/22772/files
    # trl still wants to use GuidedDecodingParams. This is a temporary patch till trl updates
    try:
        import vllm
    except (ImportError, OSError) as e:
        _maybe_raise_vllm_transformers_mismatch(e)
        if disable_broken_vllm(e):
            return
        raise

    try:
        from vllm.sampling_params import GuidedDecodingParams
    except (ImportError, OSError) as e:
        _maybe_raise_vllm_transformers_mismatch(e)
        if disable_broken_vllm(e):
            return
        if not hasattr(vllm, "sampling_params") or not hasattr(
            vllm.sampling_params, "StructuredOutputsParams"
        ):
            raise
        vllm.sampling_params.GuidedDecodingParams = (
            vllm.sampling_params.StructuredOutputsParams
        )


def ignore_logger_messages():
    # Ignore Environment variable `HF_TOKEN` is set
    try:
        from huggingface_hub._login import logger as huggingface_hub_logger

        huggingface_hub_logger.addFilter(HideLoggingMessage("`HF_TOKEN`"))
        del huggingface_hub_logger
    except:
        pass


def patch_ipykernel_hf_xet():
    # HF-XET == 1.1.10 and ipykernel == 7.0.0 / 7.0.1 causes issues
    # See https://github.com/huggingface/xet-core/issues/526
    # 2025-10-13T20:37:33.028737Z ERROR  Python exception updating progress:, error: PyErr { type: <class 'LookupError'>, value: LookupError(<ContextVar name='shell_parent' at 0x7535b4cebd80>), traceback: Some(<traceback object at 0x753408489f40>) }, caller: "src/progress_update.rs:313"
    # at /home/runner/work/xet-core/xet-core/error_printer/src/lib.rs:28
    if importlib.util.find_spec("hf_xet") is None:
        return
    if importlib.util.find_spec("ipykernel") is None:
        return
    if importlib.util.find_spec("huggingface_hub") is None:
        return

    ipykernel_version = Version(importlib_version("ipykernel"))
    if (
        (Version(importlib_version("hf_xet")) == Version("1.1.10"))
        and (
            (ipykernel_version == Version("7.0.0"))
            or (
                ipykernel_version == Version("7.0.1")
            )  # 7.0.1 seems to also break with LookupError: <ContextVar name='shell_parent' at 0x7a9775143ec0>
        )
    ):
        print(
            "#### Unsloth: `hf_xet==1.1.10` and `ipykernel==7.0.0` or `ipykernel==7.0.1` breaks progress bars. Using ASCII progress bars.\n"
            "#### Unsloth: To re-enable progress bars, please upgrade to `ipykernel>=7.1.0` or wait for a fix to\n"
            "https://github.com/huggingface/xet-core/issues/526"
        )
        from huggingface_hub.utils import disable_progress_bars

        disable_progress_bars()


def patch_trackio():
    # Set some environment variables to customize the Trackio dashboard for experiment tracking
    # See https://github.com/unslothai/notebooks/pull/110
    os.environ["TRACKIO_LOGO_LIGHT_URL"] = (
        "https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20logo%20black%20text.png"
    )
    os.environ["TRACKIO_LOGO_DARK_URL"] = (
        "https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20logo%20white%20text.png"
    )
    os.environ["TRACKIO_PLOT_ORDER"] = "train/reward"


def patch_datasets():
    # Datasets 4.4.0 and 4.4.1 weirdly have some weird `_thread.RLock_recursion_count` issues
    if importlib.util.find_spec("datasets") is None:
        return

    datasets_version = Version(importlib_version("datasets"))
    if (datasets_version <= Version("4.5.0")) and (
        datasets_version >= Version("4.4.0")
    ):
        raise NotImplementedError(
            f"#### Unsloth: Using `datasets = {str(datasets_version)}` will cause recursion errors.\n"
            "Please downgrade datasets to `datasets==4.3.0"
        )


def check_fbgemm_gpu_version():
    if importlib.util.find_spec("fbgemm_gpu") is None:
        return
    try:
        fbgemm_gpu_version = importlib_version("fbgemm_gpu_genai")
    except:
        return
    # We noticed some SegFault or bad alloc errors on lower versions of fbgemm_gpu.
    # Instead of raising an error, disable FBGEMM and fall back to Triton kernels.
    if Version(fbgemm_gpu_version) < Version("1.4.0"):
        os.environ["UNSLOTH_HAS_FBGEMM"] = "0"
        logger.info(
            f"Unsloth: fbgemm_gpu_genai=={fbgemm_gpu_version} is old and may cause issues. "
            f"Disabling FBGEMM - using Triton kernels instead."
        )
        return

    logger.info(f"Unsloth: fbgemm_gpu_genai=={fbgemm_gpu_version} detected.")


def patch_enable_input_require_grads():
    """
    Patch transformers PreTrainedModel.enable_input_require_grads to handle vision models
    that raise NotImplementedError from get_input_embeddings().

    """
    import inspect
    from transformers import PreTrainedModel

    # Check if the original function iterates over self.modules() instead of just returning the enable_input_require_grads
    # Ref: https://github.com/huggingface/transformers/pull/41993/files#diff-6b72b98c4c2dcfc6cc606843917733f5d858374fbc22a735ff483bbc0c1e63eaL1979-R1996
    try:
        original_source = inspect.getsource(PreTrainedModel.enable_input_require_grads)
    except:
        return

    # Only patch if the new pattern exists (iterating over self.modules())
    if "for module in self.modules()" not in original_source:
        return

    def _patched_enable_input_require_grads(self):
        def make_inputs_require_grads(module, input, output):
            output.requires_grad_(True)

        hooks = []
        seen_modules = set()

        for module in self.modules():
            if not (
                isinstance(module, PreTrainedModel)
                and hasattr(module, "get_input_embeddings")
            ):
                continue

            try:
                input_embeddings = module.get_input_embeddings()
            except NotImplementedError:
                # Vision models may not implement get_input_embeddings - skip them
                # For GLM V4.6 for example, this skips only `self.visual`
                continue

            if input_embeddings is None:
                continue

            embedding_id = id(input_embeddings)
            if embedding_id in seen_modules:
                continue

            seen_modules.add(embedding_id)
            hooks.append(
                input_embeddings.register_forward_hook(make_inputs_require_grads)
            )

        self._require_grads_hooks = hooks
        if hooks:
            self._require_grads_hook = hooks[0]

    PreTrainedModel.enable_input_require_grads = _patched_enable_input_require_grads

    logger.info(
        "Unsloth: Patched enable_input_require_grads for vision model compatibility"
    )


def _is_custom_torch_build(raw_version_str):
    """Check if a raw version string indicates a custom or source build.
    Must operate on the raw string from importlib_version(), not the parsed
    Version object, since our custom Version() strips local identifiers.

    Standard PyTorch releases use: +cu124, +rocm6.3, +cpu, +xpu
    Source/custom builds use: +gitXXXXXXX, +HEXHASH, or other suffixes.
    """
    if "+" not in raw_version_str:
        return False
    local = raw_version_str.split("+", 1)[1]
    if not local:
        return False
    # Use fullmatch so the entire local identifier must match, not just a prefix.
    # cu/rocm require a trailing digit (e.g. cu124, rocm6.3). cpu/xpu are exact.
    # Case-insensitive since some builds may use uppercase.
    return not re.fullmatch(r"cu\d[\d.]*|rocm\d[\d.]*|cpu|xpu", local, re.IGNORECASE)


def _infer_required_torchvision(torch_major, torch_minor):
    """Infer the minimum required torchvision minor version from torch version.

    The torch -> torchvision minor version mapping follows a consistent formula:
      torch 1.x  ->  torchvision 0.(x + 1)   (verified: torch 1.7 through 1.13)
      torch 2.x  ->  torchvision 0.(x + 15)  (verified: torch 2.0 through 2.9)

    Returns (tv_major, tv_minor) or None if the major version is unrecognized.
    """
    if torch_major == 1 and torch_minor >= 7:
        return (0, torch_minor + 1)
    if torch_major == 2:
        return (0, torch_minor + 15)
    return None


def torchvision_compatibility_check():
    # Allow skipping via environment variable for custom environments
    if os.environ.get("UNSLOTH_SKIP_TORCHVISION_CHECK", "0").lower() in ("1", "true"):
        return

    if importlib.util.find_spec("torch") is None:
        raise ImportError("Unsloth: torch not found. Please install torch first.")
    if importlib.util.find_spec("torchvision") is None:
        return

    try:
        torch_version_raw = importlib_version("torch")
        torchvision_version_raw = importlib_version("torchvision")
    except Exception:
        return

    try:
        torch_v = Version(torch_version_raw)
        tv_v = Version(torchvision_version_raw)
    except Exception:
        return

    # Known compatibility table (ground truth, takes precedence over formula).
    # See https://pytorch.org/get-started/previous-versions/
    TORCH_TORCHVISION_COMPAT = {
        (2, 9): (0, 24),
        (2, 8): (0, 23),
        (2, 7): (0, 22),
        (2, 6): (0, 21),
        (2, 5): (0, 20),
        (2, 4): (0, 19),
    }

    # Extract major.minor from the parsed version
    torch_release = torch_v.release
    if len(torch_release) < 2:
        return
    torch_major, torch_minor = torch_release[0], torch_release[1]

    # Try known table first, then fall back to formula for forward compatibility
    required = TORCH_TORCHVISION_COMPAT.get((torch_major, torch_minor))

    if required is None:
        required = _infer_required_torchvision(torch_major, torch_minor)

    if required is None:
        return

    required_tv_str = f"{required[0]}.{required[1]}.0"

    if tv_v >= Version(required_tv_str):
        logger.info(
            f"Unsloth: torch=={torch_version_raw} and "
            f"torchvision=={torchvision_version_raw} are compatible."
        )
        return

    # Version mismatch detected
    message = (
        f"Unsloth: torch=={torch_version_raw} requires "
        f"torchvision>={required_tv_str}, "
        f"but found torchvision=={torchvision_version_raw}. "
        f'Try updating torchvision via `pip install --upgrade "torchvision>={required_tv_str}"`. '
        f"Please refer to https://pytorch.org/get-started/previous-versions/ "
        f"for more information."
    )

    is_custom = _is_custom_torch_build(torch_version_raw) or _is_custom_torch_build(
        torchvision_version_raw
    )

    # Detect nightly/dev/alpha/beta/rc builds from the raw version string.
    # These often have version mismatches that are expected.
    _pre_tags = (".dev", "a0", "b0", "rc", "alpha", "beta", "nightly")
    is_prerelease = any(t in torch_version_raw for t in _pre_tags) or any(
        t in torchvision_version_raw for t in _pre_tags
    )

    # Only downgrade to warning for custom/source or prerelease builds.
    # Stable mismatches should fail fast to prevent runtime operator errors.
    if is_custom or is_prerelease:
        reason = "custom/source build" if is_custom else "pre-release build"
        logger.warning(
            f"{message}\n"
            f"Detected a {reason}. "
            f"Continuing with a warning. "
            f"Set UNSLOTH_SKIP_TORCHVISION_CHECK=1 to silence this."
        )
        return

    raise ImportError(message)


# Fix TRL OpenEnv 0.26 NameError: name 'SamplingParams' is not defined
def fix_openenv_no_vllm():
    spec = importlib.util.find_spec("trl")
    if spec is None:
        return
    trl_location = spec.origin
    if trl_location is None:
        trl_location = spec.submodule_search_locations[0]
    else:
        trl_location = os.path.split(trl_location)[0]
    openenv = Path(trl_location) / "experimental" / "openenv" / "utils.py"
    if not openenv.exists():
        return

    try:
        with open(openenv, "r+", encoding = "utf-8") as f:
            text = f.read()
            bad = (
                "if is_vllm_available():\n"
                "    from vllm import SamplingParams\n"
                "    from vllm.sampling_params import GuidedDecodingParams\n"
            )
            replace_with = bad + (
                "else:\n"
                "    from typing import Any\n"
                "    SamplingParams = Any\n"
                "    GuidedDecodingParams = Any\n"
                "\n"
            )
            if bad + "\n" + "\n" in text and replace_with not in text:
                text = text.replace(bad + "\n" + "\n", replace_with)
                f.seek(0)
                f.write(text)
                f.truncate()
                logger.info(
                    "Unsloth: Patching TRL OpenEnv to fix SamplingParams not defined"
                )
    except Exception as e:
        logger.info(f"Unsloth: Failed patching TRL OpenEnv with error = {str(e)}")


# Fix Exeuctorch needing get_mapped_key
def fix_executorch():
    spec = importlib.util.find_spec("executorch")
    if spec is None:
        return
    executorch_location = spec.origin
    if executorch_location is None:
        executorch_location = spec.submodule_search_locations[0]
    else:
        executorch_location = os.path.split(executorch_location)[0]
    executorch = Path(executorch_location) / "examples" / "models" / "__init__.py"
    if not executorch.exists():
        return

    try:
        what = r"""
        import sys
        import types
        import re
        from typing import Any, Optional
        def get_mapped_key(key: str, mapping_dict: dict[str, str]) -> str:
            try:
                # Checks if there is a layer # in the key
                if any(k.isdigit() for k in key.split(".")):
                    # Replace layer number with "{}" to create key for lookup
                    abstract_key = re.sub(r"(\.\d+)", ".{}", key)
                    layer_num = re.search(r"\d+", key).group(0)
                    new_key = mapping_dict[abstract_key]
                    new_key = new_key.format(layer_num)
                else:
                    new_key = mapping_dict[key]
            except KeyError as e:
                raise Exception(
                    f'Error converting the state dict. Found unexpected key: "{key}". '
                    "Please make sure you're loading a checkpoint with the right format. "
                ) from e

            return new_key

        torchtune = types.ModuleType("torchtune")
        torchtune.__path__ = []
        models = types.ModuleType("torchtune.models")
        models.__path__ = []
        convert_weights = types.ModuleType("torchtune.models.convert_weights")
        convert_weights.get_mapped_key = get_mapped_key
        torchtune.models = models
        models.convert_weights = convert_weights
        sys.modules["torchtune"] = torchtune
        sys.modules["torchtune.models"] = models
        sys.modules["torchtune.models.convert_weights"] = convert_weights
        """
        what = textwrap.dedent(what)

        with open(executorch, "r+", encoding = "utf-8") as f:
            text = f.read()
            bad = "from enum import Enum\n"
            if bad in text and what not in text:
                text = text.replace(bad + "\n", bad + "\n" + what)
                f.seek(0)
                f.write(text)
                f.truncate()
                logger.info("Unsloth: Patching Executorch to fix get_mapped_key")
    except Exception as e:
        logger.info(f"Unsloth: Failed Executorch with error = {str(e)}")


def fix_diffusers_warnings():
    # Silence Flax classes are deprecated and will be removed in Diffusers v1.0.0.
    os.environ["DIFFUSERS_VERBOSITY"] = "error"


def fix_huggingface_hub():
    # huggingface_hub.is_offline_mode got removed, so add it back
    import huggingface_hub

    if not hasattr(huggingface_hub, "is_offline_mode"):
        huggingface_hub.is_offline_mode = (
            lambda: huggingface_hub.constants.HF_HUB_OFFLINE
        )


def fix_triton_compiled_kernel_missing_attrs():
    """
    Triton 3.6.0+ removed direct `num_ctas` and `cluster_dims` attributes from
    CompiledKernel, but torch 2.9.x Inductor still expects them in
    torch/_inductor/runtime/triton_heuristics.py make_launcher() (line ~1757).

    The scope dict eagerly evaluates:
        binary.metadata.num_ctas, *binary.metadata.cluster_dims
    when hasattr(binary, "metadata") is True, but metadata lacks cluster_dims.
    This crashes before reaching the new launch path that doesn't need cta_args.

    Upstream fix: pytorch/pytorch@97bd4db added hasattr guards.
    We monkey-patch CompiledKernel.__init__ to inject the missing attributes
    so the older hasattr(binary, "num_ctas") branch succeeds instead.
    """
    try:
        import torch
    except (ImportError, ModuleNotFoundError):
        return

    try:
        import triton
        import triton.compiler.compiler as triton_compiler
    except (ImportError, ModuleNotFoundError):
        return

    # Only needed when the CompiledKernel class lacks num_ctas as a direct attr
    # but has metadata (triton >= 3.6.0 with torch < 2.10)
    _ck_cls = triton_compiler.CompiledKernel
    if hasattr(_ck_cls, "num_ctas"):
        return  # Old triton with direct attrs -- no patch needed

    _orig_init = _ck_cls.__init__

    def _patched_init(self, *args, **kwargs):
        _orig_init(self, *args, **kwargs)
        if not hasattr(self, "num_ctas"):
            self.num_ctas = getattr(self.metadata, "num_ctas", 1)
        if not hasattr(self, "cluster_dims") and not hasattr(self, "clusterDims"):
            self.cluster_dims = (1, 1, 1)

    _ck_cls.__init__ = _patched_init
    logger.info(
        "Unsloth: Patched triton CompiledKernel with num_ctas/cluster_dims "
        "for torch.compile compatibility."
    )


def patch_trunc_normal_precision_issue():
    """
    Patch torch.nn.init.trunc_normal_ for low precision tensors to run init in fp32.

    torch.nn.init.trunc_normal_ can saturate at truncation bounds in fp16/bf16 on
    some versions/backends. This was observed in TorchTitan investigations where
    low-precision truncation produced boundary-heavy initialization behavior:
    https://github.com/pytorch/torchtitan/pull/2342

    To avoid that failure mode, initialize into a temporary fp32 tensor, then copy
    back to the original dtype.
    """
    try:
        import torch
    except (ImportError, ModuleNotFoundError):
        return

    if getattr(torch.nn.init, "_unsloth_trunc_normal_patched", False):
        return

    original_trunc_normal = torch.nn.init.trunc_normal_
    if getattr(original_trunc_normal, "__unsloth_trunc_normal_patched__", False):
        torch.nn.init._unsloth_trunc_normal_patched = True
        return

    low_precision_dtypes = {torch.float16, torch.bfloat16}

    def _call_original(target, mean, std, a, b, generator):
        if generator is None:
            return original_trunc_normal(target, mean = mean, std = std, a = a, b = b)
        try:
            return original_trunc_normal(
                target, mean = mean, std = std, a = a, b = b, generator = generator
            )
        except TypeError as exc:
            # Older torch versions may not accept a generator keyword argument.
            msg = str(exc).lower()
            if "unexpected keyword argument" in msg and "generator" in msg:
                return original_trunc_normal(target, mean = mean, std = std, a = a, b = b)
            raise

    try:
        from torch.distributed._tensor import DTensor
    except Exception:
        DTensor = None

    @torch.no_grad()
    def _patched_trunc_normal_(
        tensor,
        mean: float = 0.0,
        std: float = 1.0,
        a: float = -2.0,
        b: float = 2.0,
        generator = None,
    ):
        if DTensor is not None and isinstance(tensor, DTensor):
            local_tensor = getattr(tensor, "_local_tensor", None)
            if local_tensor is None:
                return _call_original(tensor, mean, std, a, b, generator)
            if local_tensor.dtype in low_precision_dtypes:
                local_fp32 = local_tensor.float()
                _call_original(local_fp32, mean, std, a, b, generator)
                local_tensor.copy_(local_fp32.to(dtype = local_tensor.dtype))
                return tensor
            return _call_original(tensor, mean, std, a, b, generator)

        if tensor.dtype in low_precision_dtypes:
            tensor_fp32 = tensor.float()
            _call_original(tensor_fp32, mean, std, a, b, generator)
            tensor.copy_(tensor_fp32.to(dtype = tensor.dtype))
            return tensor

        return _call_original(tensor, mean, std, a, b, generator)

    _patched_trunc_normal_.__unsloth_trunc_normal_patched__ = True
    _patched_trunc_normal_._unsloth_original = original_trunc_normal
    torch.nn.init._unsloth_trunc_normal_original = original_trunc_normal
    torch.nn.init.trunc_normal_ = _patched_trunc_normal_
    torch.nn.init._unsloth_trunc_normal_patched = True
    logger.info("Unsloth: Patched torch.nn.init.trunc_normal_ for fp16/bf16 stability.")


def check_vllm_torch_sm100_compatibility():
    """
    Check for incompatible vLLM + torch < 2.9.0 + SM100 (Blackwell) combination.

    vLLM's distributed module (device_communicators) crashes with std::bad_alloc
    when imported on SM100 GPUs (B200/B100) with torch < 2.9.0. This is due to
    C++ code in vLLM's NCCL/distributed layer being incompatible with older
    torch versions on the newer Blackwell architecture.

    This check runs early (before vLLM import) to provide a helpful error message
    instead of a cryptic std::bad_alloc crash.
    """
    # Check if vLLM is installed (without importing it)
    if importlib.util.find_spec("vllm") is None:
        return

    # Check torch version
    try:
        torch_version = Version(importlib_version("torch"))
        if torch_version >= Version("2.9.0"):
            return  # torch >= 2.9.0 is compatible
    except Exception:
        return  # Can't determine torch version, skip check

    # Check if any CUDA GPU is SM100 (Blackwell)
    try:
        import torch

        if not torch.cuda.is_available():
            return

        has_sm100 = False
        sm100_gpu_name = None
        for i in range(torch.cuda.device_count()):
            major, minor = torch.cuda.get_device_capability(i)
            if major == 10:
                has_sm100 = True
                sm100_gpu_name = torch.cuda.get_device_name(i)
                break

        if not has_sm100:
            return
    except Exception:
        return

    # Get vLLM version for the error message
    try:
        vllm_version = importlib_version("vllm")
    except Exception:
        vllm_version = "unknown"

    # Incompatible combination detected - raise helpful error
    raise RuntimeError(
        f"Unsloth: Incompatible configuration detected.\n\n"
        f"  GPU: {sm100_gpu_name} (SM100 / Blackwell architecture)\n"
        f"  torch version: {torch_version}\n"
        f"  vLLM version: {vllm_version}\n\n"
        f"vLLM's distributed module crashes with std::bad_alloc on SM100 GPUs "
        f"(B200/B100/Blackwell) when using torch < 2.9.0.\n\n"
        f"To fix this, please upgrade torch:\n"
        f"  pip install --upgrade torch>=2.9.0\n\n"
        f"Alternatively, if you don't need vLLM:\n"
        f"  pip uninstall vllm"
    )


def fix_vllm_pdl_blackwell():
    """
    Fix vLLM PDL (Programmatic Dependent Launch) bug on Blackwell GPUs (SM100).

    The issue: vLLM's LoRA Triton kernels use tl.extra.cuda.gdc_wait() for PDL
    optimization on SM90+ GPUs. This fails on SM100 (B200/B100) during CUDA graph
    capture because Triton's pipeliner can't handle gdc_wait in complex kernels.

    See: https://github.com/vllm-project/vllm/issues/30872
    """
    if importlib.util.find_spec("vllm") is None:
        return

    # Check if any CUDA GPU is SM100 (Blackwell)
    try:
        import torch

        if not torch.cuda.is_available():
            return

        # Scan all GPUs for SM100 - fix applies globally via env var and monkey-patch
        has_sm100 = False
        sm100_gpu_name = None
        for i in range(torch.cuda.device_count()):
            major, minor = torch.cuda.get_device_capability(i)
            if major == 10:
                has_sm100 = True
                sm100_gpu_name = torch.cuda.get_device_name(i)
                break

        if not has_sm100:
            return
    except Exception:
        return

    # Helper to check if module spec exists
    def _spec_exists(name):
        try:
            return importlib.util.find_spec(name) is not None
        except (ImportError, OSError, ModuleNotFoundError, ValueError):
            return False

    # Check if vLLM has the PDL-related modules before doing internet check
    has_utils = _spec_exists("vllm.lora.ops.triton_ops.utils")
    has_expand_op = _spec_exists("vllm.lora.ops.triton_ops.lora_expand_op")
    has_shrink_op = _spec_exists("vllm.lora.ops.triton_ops.lora_shrink_op")

    if not has_utils and not has_expand_op and not has_shrink_op:
        # Old vLLM version without PDL support - nothing to patch
        return

    # Check if vLLM version includes the fix
    VLLM_PDL_FIX_VERSION = "0.15.0"
    try:
        vllm_version = Version(importlib_version("vllm"))
        if vllm_version >= Version(VLLM_PDL_FIX_VERSION):
            logger.info(
                f"Unsloth: SM100 ({sm100_gpu_name}) detected but vLLM {vllm_version} "
                f"should include PDL fix - skipping workaround"
            )
            return
    except Exception as e:
        logger.debug(
            f"Unsloth: vLLM version check failed ({e}), applying PDL workaround."
        )

    # Apply the PDL fix
    os.environ["TRITON_DISABLE_PDL"] = "1"

    def fake_supports_pdl(*args, **kwargs):
        return False

    patched = []
    patched_names = set()

    def _record_patch(name):
        if name not in patched_names:
            patched.append(name)
            patched_names.add(name)

    # First, patch the source module (utils.py) where supports_pdl is defined.
    # This is critical because supports_pdl uses @lru_cache - we must clear the
    # cache to prevent stale cached results from the original function.
    try:
        utils_module = importlib.import_module("vllm.lora.ops.triton_ops.utils")
        if hasattr(utils_module, "supports_pdl"):
            original_fn = utils_module.supports_pdl
            if hasattr(original_fn, "cache_clear"):
                original_fn.cache_clear()
            utils_module.supports_pdl = fake_supports_pdl
            _record_patch("utils")
    except (ImportError, ModuleNotFoundError, AttributeError):
        pass

    # Also patch the consumer modules that import supports_pdl from utils.
    # This ensures the patched function is used even if the module was already
    # imported before this fix runs.
    consumer_modules = {
        "lora_expand_op": "vllm.lora.ops.triton_ops.lora_expand_op",
        "lora_shrink_op": "vllm.lora.ops.triton_ops.lora_shrink_op",
        "fused_moe_lora_op": "vllm.lora.ops.triton_ops.fused_moe_lora_op",
    }
    for name, path in consumer_modules.items():
        try:
            module = importlib.import_module(path)
            if hasattr(module, "supports_pdl"):
                module.supports_pdl = fake_supports_pdl
                _record_patch(name)
        except (ImportError, ModuleNotFoundError, AttributeError):
            pass

    # Patch any additional already-loaded triton ops consumers that expose supports_pdl.
    for module_name, module in tuple(sys.modules.items()):
        if not module_name.startswith("vllm.lora.ops.triton_ops."):
            continue
        if module is None or not hasattr(module, "supports_pdl"):
            continue
        module.supports_pdl = fake_supports_pdl
        _record_patch(module_name.rsplit(".", 1)[-1])

    if patched:
        logger.info(
            f"Unsloth: Applied PDL fix for SM100 ({sm100_gpu_name}) - "
            f"patched: {', '.join(patched)}"
        )
    else:
        # Just set the env var - vLLM might be an older version without supports_pdl
        logger.info(f"Unsloth: Set TRITON_DISABLE_PDL=1 for SM100 ({sm100_gpu_name})")


def patch_openspiel_env_async():
    """Apply nest_asyncio for OpenEnv EnvClient async compatibility.

    OpenEnv's EnvClient uses async methods (reset/step). In Jupyter notebooks
    these work via top-level await, but converted scripts need
    asyncio.get_event_loop().run_until_complete() wrappers. Applying nest_asyncio
    ensures nested event loop calls work in all contexts without replacing the
    original async methods (which would break scripts that already have their own
    sync wrappers).
    """
    try:
        import inspect
        from openenv.core.env_client import EnvClient

        if not inspect.iscoroutinefunction(EnvClient.reset):
            return  # Already sync, nothing to do

        try:
            import nest_asyncio

            nest_asyncio.apply()
            logger.info(
                "Unsloth: Applied nest_asyncio for OpenEnv EnvClient async compatibility"
            )
        except ImportError:
            logger.info(
                "Unsloth: nest_asyncio not installed, OpenEnv async methods may need manual wrapping"
            )
    except (ImportError, AttributeError):
        pass  # openenv not installed


def patch_torchcodec_audio_decoder():
    """Call unsloth_zoo's AudioDecoder patch."""
    try:
        from unsloth_zoo.dataset_utils import patch_torchcodec_audio_decoder as _patch

        _patch()
    except (ImportError, AttributeError, RuntimeError):
        pass


def disable_torchcodec_if_broken():
    """Disable torchcodec in transformers if it cannot actually load.

    transformers checks if torchcodec is installed via importlib.util.find_spec(),
    but this returns True even when torchcodec cannot load its native libraries
    (e.g., when FFmpeg is missing). This causes runtime errors when transformers
    tries to use torchcodec for audio loading.

    This function tests if torchcodec can actually load and if not, patches
    transformers to think torchcodec is unavailable so it falls back to librosa.
    """
    try:
        import importlib.util

        if importlib.util.find_spec("torchcodec") is None:
            return  # torchcodec not installed, nothing to do

        # Test if torchcodec can actually load
        from torchcodec.decoders import AudioDecoder
    except (ImportError, RuntimeError, OSError):
        # torchcodec cannot load - disable it in transformers
        try:
            import transformers.utils.import_utils as tf_import_utils

            tf_import_utils._torchcodec_available = False
        except (ImportError, AttributeError):
            pass


def disable_broken_wandb():
    """Disable wandb if it's installed but cannot actually import.

    wandb can fail to import when there's a protobuf version mismatch
    (e.g., wandb < 0.19.11 with protobuf >= 6.0). This causes cascading
    import failures through trl -> transformers/accelerate -> wandb that
    crash unsloth's import chain.

    There are two separate is_wandb_available() functions used by trl:
      - transformers.integrations.integration_utils.is_wandb_available
        (used by most trl trainers)
      - accelerate.utils.imports.is_wandb_available
        (used by trl/trainer/callbacks.py)

    Both must be patched to fully prevent broken wandb imports.
    """
    if importlib.util.find_spec("wandb") is None:
        return  # wandb not installed, nothing to do

    try:
        import wandb
    except Exception:
        # wandb is installed but broken - patch all checkers to skip it
        logger.info(
            "Unsloth: wandb is installed but broken (likely a protobuf version mismatch). "
            "Disabling wandb to prevent import errors. To fix, run: pip install --upgrade wandb"
        )
        _wandb_false = lambda: False
        # Patch transformers' is_wandb_available (used by most trl trainers)
        try:
            import transformers.integrations.integration_utils as tf_integration

            tf_integration.is_wandb_available = _wandb_false
        except (ImportError, AttributeError):
            pass
        # Patch accelerate's is_wandb_available (used by trl/trainer/callbacks.py).
        # Must patch both the source module AND the re-export namespace since
        # `from accelerate.utils import is_wandb_available` reads from
        # accelerate.utils, not accelerate.utils.imports.
        try:
            import accelerate.utils.imports as acc_imports

            acc_imports.is_wandb_available = _wandb_false
        except (ImportError, AttributeError):
            pass
        try:
            import accelerate.utils as acc_utils

            acc_utils.is_wandb_available = _wandb_false
        except (ImportError, AttributeError):
            pass
        # Set env var as additional fallback
        os.environ["WANDB_DISABLED"] = "true"


CAUSAL_CONV1D_BROKEN = False
_CAUSAL_CONV1D_PREFIX = "causal_conv1d"
_CAUSAL_CONV1D_BLOCKER_SENTINEL = "_unsloth_causal_conv1d_blocker"
VLLM_BROKEN = False
_VLLM_PREFIX = "vllm"
_VLLM_BLOCKER_SENTINEL = "_unsloth_vllm_blocker"
_ROCM_ENV_HINT_KEYS = (
    "ROCM_PATH",
    "ROCM_HOME",
    "HIP_PATH",
    "HSA_PATH",
    "HIP_VISIBLE_DEVICES",
    "ROCR_VISIBLE_DEVICES",
)
_ROCM_PATH_HINTS = (
    Path("/opt/rocm"),
    Path("/dev/kfd"),
    Path("/sys/module/amdgpu"),
)
_AMDGPU_ASIC_ID_TABLE_PATH_ENV = "AMDGPU_ASIC_ID_TABLE_PATH"
_AMDGPU_ASIC_ID_CANDIDATE_PATHS = (
    Path("/usr/share/libdrm/amdgpu.ids"),
    Path("/usr/local/share/libdrm/amdgpu.ids"),
    Path("/opt/rocm/share/libdrm/amdgpu.ids"),
    Path("/opt/amdgpu/share/libdrm/amdgpu.ids"),
)


def _log_rocm_detection(message):
    if UNSLOTH_ENABLE_LOGGING:
        logger.info(message)


@functools.lru_cache(1)
def _is_rocm_torch_build() -> bool:
    # Most official ROCm wheels include a local version suffix like +rocmX.Y.
    # Some custom/source builds do not, so we fall back to runtime hints.
    try:
        torch_version_raw = str(importlib_version("torch")).lower()
        if "rocm" in torch_version_raw:
            _log_rocm_detection(
                "Unsloth: ROCm detection matched torch version tag (+rocm)."
            )
            return True
    except Exception:
        pass

    # Environment hints commonly present on ROCm runtimes.
    for key in _ROCM_ENV_HINT_KEYS:
        value = os.environ.get(key, "")
        if isinstance(value, str) and value.strip():
            _log_rocm_detection(
                f"Unsloth: ROCm detection matched environment key `{key}`."
            )
            return True

    # Filesystem / driver hints for ROCm stacks.
    for path in _ROCM_PATH_HINTS:
        try:
            if path.exists():
                _log_rocm_detection(
                    f"Unsloth: ROCm detection matched filesystem hint `{path}`."
                )
                return True
        except Exception:
            continue

    _log_rocm_detection("Unsloth: ROCm detection did not match any known hints.")
    return False


def _iter_amdgpu_asic_id_table_candidates():
    # Try torch-adjacent ids table paths first without importing torch.
    try:
        torch_spec = importlib.util.find_spec("torch")
    except Exception:
        torch_spec = None

    roots = []
    if torch_spec is not None:
        if torch_spec.origin:
            roots.append(Path(torch_spec.origin).resolve().parent)
        if torch_spec.submodule_search_locations:
            for location in torch_spec.submodule_search_locations:
                roots.append(Path(location).resolve())

    seen = set()
    for root in roots:
        for candidate in (
            root / "share" / "libdrm" / "amdgpu.ids",
            root.parent / "share" / "libdrm" / "amdgpu.ids",
            root.parent.parent / "share" / "libdrm" / "amdgpu.ids",
        ):
            candidate_str = str(candidate)
            if candidate_str in seen:
                continue
            seen.add(candidate_str)
            yield candidate

    for candidate in _AMDGPU_ASIC_ID_CANDIDATE_PATHS:
        candidate_str = str(candidate)
        if candidate_str in seen:
            continue
        seen.add(candidate_str)
        yield candidate


def configure_amdgpu_asic_id_table_path():
    # Honor an existing valid user-provided path.
    configured = os.environ.get(_AMDGPU_ASIC_ID_TABLE_PATH_ENV, "").strip()
    if configured:
        configured_path = Path(configured)
        try:
            if configured_path.is_file():
                return str(configured_path)
        except Exception:
            pass

    # Only attempt this on ROCm-like environments.
    if not _is_rocm_torch_build():
        return None

    for candidate in _iter_amdgpu_asic_id_table_candidates():
        try:
            if candidate.is_file():
                os.environ[_AMDGPU_ASIC_ID_TABLE_PATH_ENV] = str(candidate)
                if UNSLOTH_ENABLE_LOGGING:
                    logger.info(
                        f"Unsloth: Set {_AMDGPU_ASIC_ID_TABLE_PATH_ENV}={candidate}"
                    )
                return str(candidate)
        except Exception:
            continue

    return None


def _is_causal_conv1d_name(module_name: str) -> bool:
    return module_name == _CAUSAL_CONV1D_PREFIX or module_name.startswith(
        _CAUSAL_CONV1D_PREFIX + "."
    )


def _is_vllm_name(module_name: str) -> bool:
    return module_name == _VLLM_PREFIX or module_name.startswith(_VLLM_PREFIX + ".")


def _resolve_module_name(module_name, package):
    if not isinstance(module_name, str):
        return module_name
    if module_name.startswith("."):
        try:
            return importlib.util.resolve_name(module_name, package)
        except Exception:
            return module_name
    return module_name


def _is_broken_causal_conv1d_error(error) -> bool:
    checked = set()
    current = error
    while current is not None and id(current) not in checked:
        checked.add(id(current))
        message = str(current).lower()
        if (
            ("causal_conv1d_cuda" in message and "undefined symbol" in message)
            or ("_zn3c103hip28c10_hip_check_implementation" in message)
            or ("causal_conv1d" in message and "undefined symbol" in message)
        ):
            return True
        current = getattr(current, "__cause__", None) or getattr(
            current, "__context__", None
        )
    return False


def _is_broken_vllm_error(error) -> bool:
    checked = set()
    current = error
    while current is not None and id(current) not in checked:
        checked.add(id(current))
        message = str(current).lower()
        if (
            ("vllm/_c" in message or "vllm._c" in message)
            and (
                "undefined symbol" in message
                or "cannot open shared object file" in message
                or ".so:" in message
            )
        ) or ("vllm" in message and "undefined symbol" in message):
            return True
        # Also catch CUDA shared library mismatches during vllm import
        # e.g. "libcudart.so.12: cannot open shared object file"
        if (
            "libcudart" in message or "libcublas" in message or "libnvrtc" in message
        ) and "cannot open shared object file" in message:
            return True
        current = getattr(current, "__cause__", None) or getattr(
            current, "__context__", None
        )
    return False


def _get_vllm_cuda_mismatch_message(error):
    """If the error is a CUDA version mismatch, return a helpful install message."""
    import re as _re

    checked = set()
    current = error
    wanted_cuda = None
    while current is not None and id(current) not in checked:
        checked.add(id(current))
        message = str(current)
        # Extract the CUDA version vllm was built for, e.g. "libcudart.so.12"
        match = _re.search(r"libcudart\.so\.(\d+)", message)
        if match:
            wanted_cuda = match.group(1)
            break
        current = getattr(current, "__cause__", None) or getattr(
            current, "__context__", None
        )
    if wanted_cuda is None:
        return None

    # Detect what CUDA version is actually available on the system
    system_cuda_display = None  # Human-readable, e.g. "13.0"
    system_cuda_tag = None  # For wheel URL, e.g. "130"
    try:
        import torch

        cuda_version = torch.version.cuda  # e.g. "13.0" or "12.8"
        if cuda_version:
            system_cuda_display = cuda_version
            system_cuda_tag = cuda_version.replace(".", "")[:3]  # "130" or "128"
    except Exception:
        pass

    if system_cuda_tag is None or system_cuda_tag.startswith(wanted_cuda):
        return None  # Not a mismatch or can't determine

    try:
        vllm_version = importlib_version("vllm").split("+")[0]
    except Exception:
        vllm_version = "VLLM_VERSION"

    cpu_arch = "x86_64"
    try:
        import platform

        cpu_arch = platform.machine()
    except Exception:
        pass

    return (
        f"Unsloth: vLLM was built for CUDA {wanted_cuda} but this system has "
        f"CUDA {system_cuda_display}. Please reinstall vLLM with the correct CUDA version:\n"
        f"\n"
        f"  uv pip install https://github.com/vllm-project/vllm/releases/download/"
        f"v{vllm_version}/vllm-{vllm_version}+cu{system_cuda_tag}-cp38-abi3-"
        f"manylinux_2_35_{cpu_arch}.whl"
    )


class _CausalConv1dImportBlockerLoader(importlib.abc.Loader):
    __slots__ = ("module_name",)

    def __init__(self, module_name):
        self.module_name = module_name

    def create_module(self, spec):
        return None

    def exec_module(self, module):
        raise ModuleNotFoundError(f"No module named '{self.module_name}'")


class _CausalConv1dImportBlockerFinder(importlib.abc.MetaPathFinder):
    __slots__ = (_CAUSAL_CONV1D_BLOCKER_SENTINEL,)

    def __init__(self):
        setattr(self, _CAUSAL_CONV1D_BLOCKER_SENTINEL, True)

    def find_spec(self, fullname, path = None, target = None):
        if not CAUSAL_CONV1D_BROKEN or not _is_causal_conv1d_name(fullname):
            return None
        return importlib.machinery.ModuleSpec(
            name = fullname,
            loader = _CausalConv1dImportBlockerLoader(fullname),
            is_package = fullname == _CAUSAL_CONV1D_PREFIX,
        )


class _VllmImportBlockerLoader(importlib.abc.Loader):
    __slots__ = ("module_name",)

    def __init__(self, module_name):
        self.module_name = module_name

    def create_module(self, spec):
        return None

    def exec_module(self, module):
        raise ModuleNotFoundError(f"No module named '{self.module_name}'")


class _VllmImportBlockerFinder(importlib.abc.MetaPathFinder):
    __slots__ = (_VLLM_BLOCKER_SENTINEL,)

    def __init__(self):
        setattr(self, _VLLM_BLOCKER_SENTINEL, True)

    def find_spec(self, fullname, path = None, target = None):
        if not VLLM_BROKEN or not _is_vllm_name(fullname):
            return None
        return importlib.machinery.ModuleSpec(
            name = fullname,
            loader = _VllmImportBlockerLoader(fullname),
            is_package = fullname == _VLLM_PREFIX,
        )


def _patch_find_spec_for_causal_conv1d():
    current_find_spec = importlib.util.find_spec
    if getattr(current_find_spec, "_unsloth_causal_conv1d_find_spec_patch", False):
        return

    def _blocked_find_spec(name, package = None):
        resolved_name = _resolve_module_name(name, package)
        if CAUSAL_CONV1D_BROKEN and isinstance(resolved_name, str):
            if _is_causal_conv1d_name(resolved_name):
                return None
        return current_find_spec(name, package)

    _blocked_find_spec._unsloth_causal_conv1d_find_spec_patch = True
    _blocked_find_spec._unsloth_original_find_spec = current_find_spec
    importlib.util.find_spec = _blocked_find_spec


def _patch_find_spec_for_vllm():
    current_find_spec = importlib.util.find_spec
    if getattr(current_find_spec, "_unsloth_vllm_find_spec_patch", False):
        return

    def _blocked_find_spec(name, package = None):
        resolved_name = _resolve_module_name(name, package)
        if VLLM_BROKEN and isinstance(resolved_name, str):
            if _is_vllm_name(resolved_name):
                return None
        return current_find_spec(name, package)

    _blocked_find_spec._unsloth_vllm_find_spec_patch = True
    _blocked_find_spec._unsloth_original_find_spec = current_find_spec
    importlib.util.find_spec = _blocked_find_spec


def _install_causal_conv1d_blocker():
    _patch_find_spec_for_causal_conv1d()
    for finder in sys.meta_path:
        if getattr(finder, _CAUSAL_CONV1D_BLOCKER_SENTINEL, False):
            return
    sys.meta_path.insert(0, _CausalConv1dImportBlockerFinder())


def _install_vllm_blocker():
    _patch_find_spec_for_vllm()
    for finder in sys.meta_path:
        if getattr(finder, _VLLM_BLOCKER_SENTINEL, False):
            return
    sys.meta_path.insert(0, _VllmImportBlockerFinder())


def _clear_causal_conv1d_modules():
    for module_name in list(sys.modules):
        if _is_causal_conv1d_name(module_name):
            sys.modules.pop(module_name, None)


def _clear_vllm_modules():
    for module_name in list(sys.modules):
        if _is_vllm_name(module_name):
            sys.modules.pop(module_name, None)


def disable_broken_vllm(error = None):
    """Disable vLLM dynamically when its shared library is ABI-broken."""
    global VLLM_BROKEN
    if VLLM_BROKEN:
        _install_vllm_blocker()
        return True

    failure = error
    if failure is None:
        try:
            if importlib.util.find_spec("vllm") is None:
                return False
        except Exception:
            return False

        try:
            import vllm  # noqa: F401

            return False
        except Exception as import_error:
            failure = import_error

    if not _is_broken_vllm_error(failure):
        return False

    VLLM_BROKEN = True
    _clear_vllm_modules()
    _install_vllm_blocker()
    cuda_msg = _get_vllm_cuda_mismatch_message(failure)
    if cuda_msg:
        logger.warning(cuda_msg)
    else:
        logger.warning(
            "Unsloth: Detected broken vLLM binary extension; "
            "disabling vLLM imports and continuing import.\n"
            "Please reinstall via `uv pip install unsloth vllm torchvision torchaudio "
            "--torch-backend=auto`."
        )
    return True


def _disable_transformers_causal_conv1d():
    try:
        import transformers.utils.import_utils as tf_import_utils
    except Exception:
        return

    if hasattr(tf_import_utils, "is_causal_conv1d_available"):
        tf_import_utils.is_causal_conv1d_available = lambda: False

    for attr_name in (
        "_causal_conv1d_available",
        "_is_causal_conv1d_available",
    ):
        if hasattr(tf_import_utils, attr_name):
            setattr(tf_import_utils, attr_name, False)


def disable_broken_causal_conv1d():
    """Disable causal_conv1d dynamically when its shared library is ABI-broken.

    This mirrors Unsloth's FlashAttention fallback behavior: if importing causal_conv1d
    fails with a known binary symbol error, we disable it at startup so model imports do
    not hard-fail.
    """
    global CAUSAL_CONV1D_BROKEN
    if CAUSAL_CONV1D_BROKEN:
        _install_causal_conv1d_blocker()
        _disable_transformers_causal_conv1d()
        return

    try:
        if importlib.util.find_spec("causal_conv1d") is None:
            return
    except Exception:
        return

    try:
        import causal_conv1d  # noqa: F401

        return
    except Exception as error:
        if not _is_broken_causal_conv1d_error(error):
            return

    CAUSAL_CONV1D_BROKEN = True
    _clear_causal_conv1d_modules()
    _install_causal_conv1d_blocker()
    _disable_transformers_causal_conv1d()
    print(
        "Unsloth: Detected broken causal_conv1d binary; "
        "disabling causal_conv1d fast path and continuing import."
    )


================================================
FILE: unsloth/kernels/__init__.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .cross_entropy_loss import (
    fast_cross_entropy_loss,
    post_patch_loss_function,
    patch_loss_functions,
)
from .rms_layernorm import (
    fast_rms_layernorm,
    patch_rms_layernorm,
    unpatch_rms_layernorm,
)
from .layernorm import (
    fast_layernorm,
    patch_layernorm,
)
from .rope_embedding import fast_rope_embedding, inplace_rope_embedding
from .swiglu import swiglu_fg_kernel, swiglu_DWf_DW_dfg_kernel
from .geglu import (
    geglu_exact_forward_kernel,
    geglu_exact_backward_kernel,
    geglu_approx_forward_kernel,
    geglu_approx_backward_kernel,
)
from .fast_lora import (
    get_lora_parameters,
    get_lora_parameters_bias,
    apply_lora_mlp_swiglu,
    apply_lora_mlp_geglu_exact,
    apply_lora_mlp_geglu_approx,
    apply_lora_qkv,
    apply_lora_o,
    fast_lora_forward,
)
from .fp8 import *  # This step is to ensure that we patch the FbgmemFP8Linear and FP8Linear's forward functions before the execution of model creation so that this applies to compiled non fast inference models as well
from .utils import (
    fast_dequantize,
    fast_gemv,
    QUANT_STATE,
    fast_linear_forward,
    matmul_lora,
)

from .flex_attention import (
    HAS_FLEX_ATTENTION,
    slow_attention_softcapping,
    slow_inference_attention_softcapping,
    create_flex_attention_causal_mask,
    create_flex_attention_sliding_window_mask,
)

import os

if "UNSLOTH_ZOO_IS_PRESENT" not in os.environ:
    try:
        print(
            "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning."
        )
    except:
        print("Unsloth: Will patch your computer to enable 2x faster free finetuning.")
del os


================================================
FILE: unsloth/kernels/cross_entropy_loss.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import triton
import triton.language as tl
import torch
from .utils import (
    calculate_settings,
    MAX_FUSED_SIZE,
    triton_tanh,
    triton_cast,
    torch_gpu_device,
    is_cdna,
)
from transformers.models.llama.modeling_llama import logger
from unsloth_zoo.utils import Version

from unsloth_zoo.loss_utils import (
    patch_loss_functions as _patch_loss_functions,
    post_patch_loss_function,
)


def _cross_entropy_forward(
    logits_ptr,
    logits_row_stride,
    loss_ptr,
    logsumexp_ptr,
    labels_ptr,
    VOCAB_SIZE: tl.constexpr,
    BLOCK_SIZE: tl.constexpr,
    DO_SOFTCAPPING: tl.constexpr,
    SOFTCAP: tl.constexpr,
    DO_LOGIT_SCALING: tl.constexpr,
    LOGIT_SCALE: tl.constexpr,
):
    """
    Cross Entropy Loss = 1/n sum [ -yi log(Pi) ]
    Pi = exp(xi) / sum(exp(xi))
    CE_i = -y log(p) = -y log[ exp(x) / sum(exp(x)) ]
         = -y [ x - log[sum(exp(x))] ]
         = y * (log[sum(exp(x))] - x)
    If y == 0: CE_i = 0
    If y == 1: CE_i = logsumexp - x

    logsumexp is also stable
    Take    y =         log[sum(exp(x))]
       exp(y) =             sum(exp(x))
       exp(y) =             sum(exp(x - c)*exp(c)) Since e^(x-c)*e^c = e^x
       exp(y) =      exp(c)*sum(exp(x - c))
           y  = log(exp(c)*sum(exp(x - c)))
           y  = c + log[sum(exp(x - c))]
    This means we can set c = max(x) to make sure
    exp(x - c) always is exp(x - max(x)).
    This ensures exp(x - max(x))'s maximum is 1 as exp(0) = 1.
    """
    row_idx = tl.program_id(0)
    logits_ptr += row_idx * triton_cast(logits_row_stride, tl.int64)
    loss_ptr += row_idx
    logsumexp_ptr += row_idx
    labels_ptr += row_idx

    col_offsets = tl.arange(0, BLOCK_SIZE)
    mask = col_offsets < VOCAB_SIZE

    label_idx = tl.load(labels_ptr).to(tl.int32)
    logits = tl.load(logits_ptr + col_offsets, mask = mask, other = -float("inf")).to(
        tl.float32
    )

    # Go logit scaling for Cohere: t * x
    if DO_LOGIT_SCALING:
        logits = LOGIT_SCALE * logits
    # Do logit softcapping for Gemma 2: t * tanh(1/t * x)
    if DO_SOFTCAPPING:
        logits = SOFTCAP * triton_tanh(logits / SOFTCAP)

    c = tl.max(logits, 0)
    logsumexp = c + tl.log(tl.sum(tl.exp(logits - c), 0))

    if label_idx != -100:
        x = tl.load(logits_ptr + label_idx).to(tl.float32)
        # Go logit scaling for Cohere: t * x
        if DO_LOGIT_SCALING:
            x = LOGIT_SCALE * x
        # Do logit softcapping for Gemma 2: t * tanh(1/t * x)
        if DO_SOFTCAPPING:
            x = SOFTCAP * triton_tanh(x / SOFTCAP)
        loss = logsumexp - x
    else:
        loss = 0.0
    tl.store(logsumexp_ptr, logsumexp)
    tl.store(loss_ptr, loss)


_cross_entropy_forward = triton.jit(_cross_entropy_forward)
_cross_entropy_forward = triton.heuristics(
    {
        "DO_SOFTCAPPING": lambda args: bool(args["DO_SOFTCAPPING"]),
        "DO_LOGIT_SCALING": lambda args: bool(args["DO_LOGIT_SCALING"]),
    }
)(_cross_entropy_forward)


def _chunked_cross_entropy_forward(
    logits_ptr,
    logits_row_stride: tl.constexpr,
    loss_ptr,
    logsumexp_ptr,
    labels_ptr,
    VOCAB_SIZE: tl.constexpr,
    N_CHUNKS: tl.constexpr,
    BLOCK_SIZE: tl.constexpr,
    DO_SOFTCAPPING: tl.constexpr,
    SOFTCAP: tl.constexpr,
    DO_LOGIT_SCALING: tl.constexpr,
    LOGIT_SCALE: tl.constexpr,
):
    """
    256K vocab divided in 4 chunks

    |-65536-| |-65536-| |-65536-| |-65536-|
    |-------| |-------| |-------| |-------|
    |-------| |-------| |-------| |-------|

    If y == 0: CE_i = 0
    If y == 1: CE_i = logsumexp - x

    Notice we can do logsumexp for each chunk and then
    logsumexp[chunk_sum(logsumexp)] == logsumexp

    chunk_sum = log[chunk_sum(logsumexp)]
              = log[exp(logsumexp(a)) + ... + exp(logsumexp(z))]
              = log[exp(log[sum(exp(a))]) + ... + exp(log[sum(exp(z))])]
              = log[sum(exp(a)) + ... + sum(exp(z))]
              = logsumexp(x)

    This means we can perform a logsumexp for each chunk, then do a
    final logsumexp reduction!

    Ie do: logsumexp(chunked_logsumexp) - x
    """
    row_idx = tl.program_id(0)
    chunk_idx = tl.program_id(1)
    logits_ptr += row_idx * triton_cast(logits_row_stride, tl.int64)
    loss_ptr += row_idx
    logsumexp_ptr += row_idx * N_CHUNKS + chunk_idx
    labels_ptr += row_idx

    col_offsets = chunk_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
    mask = col_offsets < VOCAB_SIZE

    label_idx = tl.load(labels_ptr).to(tl.int32)
    logits = tl.load(logits_ptr + col_offsets, mask = mask, other = -float("inf")).to(
        tl.float32
    )

    # Go logit scaling for Cohere: t * x
    if DO_LOGIT_SCALING:
        logits = LOGIT_SCALE * logits
    # Do logit softcapping for Gemma 2: t * tanh(1/t * x)
    if DO_SOFTCAPPING:
        logits = SOFTCAP * triton_tanh(logits / SOFTCAP)

    c = tl.max(logits, 0)
    logsumexp = c + tl.log(tl.sum(tl.exp(logits - c), 0))

    if chunk_idx == 0:
        # logsumexp(chunked_logsumexp) - x
        # Do the -x separately
        if label_idx != -100:
            x = tl.load(logits_ptr + label_idx).to(tl.float32)
            # Go logit scaling for Cohere: t * x
            if DO_LOGIT_SCALING:
                x = LOGIT_SCALE * x
            # Do logit softcapping for Gemma 2: t * tanh(1/t * x)
            if DO_SOFTCAPPING:
                x = SOFTCAP * triton_tanh(x / SOFTCAP)
            loss = -1.0 * x
        else:
            loss = 0.0
        tl.store(loss_ptr, loss)
    tl.store(logsumexp_ptr, logsumexp)


_chunked_cross_entropy_forward = triton.jit(_chunked_cross_entropy_forward)
_chunked_cross_entropy_forward = triton.heuristics(
    {
        "DO_SOFTCAPPING": lambda args: bool(args["DO_SOFTCAPPING"]),
        "DO_LOGIT_SCALING": lambda args: bool(args["DO_LOGIT_SCALING"]),
    }
)(_chunked_cross_entropy_forward)


def _cross_entropy_backward(
    logits_ptr,
    logits_row_stride: tl.constexpr,
    dloss_ptr,
    dloss_row_stride: tl.constexpr,
    logsumexp_ptr,
    labels_ptr,
    VOCAB_SIZE: tl.constexpr,
    BLOCK_SIZE: tl.constexpr,
    DO_SOFTCAPPING: tl.constexpr,
    SOFTCAP: tl.constexpr,
    DO_LOGIT_SCALING: tl.constexpr,
    LOGIT_SCALE: tl.constexpr,
):
    """
    CE_i = -y log(P) = y * (log[sum(exp(x))] - x)
    dC/dx = d/dx (y * log[sum(exp(x))] - x * y)

    From https://en.wikipedia.org/wiki/LogSumExp
    d/dx logsumexp = exp(x) / sum(exp(x)) = softmax(x)

    dC/dx = y * exp(x) / sum(exp(x)) - d/dx (x * y)
    dC/dx = y * exp[ log[exp(x) / sum(exp(x))] ] using x = exp(log(x)) trick
    dC/dx = y * exp[x - logsumexp] - d/dx (x * y)

    If y == 0: dC/dx = 0
    If y == 1 and x == label: dC/dlabel = exp[x - logsumexp] - 1
    If y == 1 and x != label: dC/dx     = exp[x - logsumexp]
    """
    row_idx = tl.program_id(0)
    block_idx = tl.program_id(1)

    logits_ptr += row_idx * triton_cast(logits_row_stride, tl.int64)
    dloss_ptr += row_idx * dloss_row_stride
    col_offsets = block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
    mask = col_offsets < VOCAB_SIZE
    label_idx = tl.load(labels_ptr + row_idx).to(tl.int32)

    if label_idx != -100:
        dloss = tl.load(dloss_ptr)
    else:
        dloss = 0.0

    x = tl.load(logits_ptr + col_offsets, mask = mask, other = -float("inf")).to(tl.float32)

    # Do logit scaling for Cohere
    if DO_LOGIT_SCALING:
        # d/dx [s * x] = s
        x = x * LOGIT_SCALE

    # Do logit softcapping for Gemma 2: t * tanh(1/t * x)
    partial = x
    if DO_SOFTCAPPING:
        # d/dx [t * tanh(1/t * x)] = 1 - tanh^2(1/t * x)
        partial = triton_tanh(x / SOFTCAP)
        x = SOFTCAP * partial

    logsumexp = tl.load(logsumexp_ptr + row_idx)
    y = tl.exp(x - logsumexp)
    y = tl.where(
        col_offsets == label_idx,
        y - 1.0,  # exp(x - logsumexp) - 1
        y,  # exp(x - logsumexp)
    )

    if DO_LOGIT_SCALING:
        # d/dx [s * x] = s
        y = y * LOGIT_SCALE

    if DO_SOFTCAPPING:
        # d/dx [t * tanh(1/t * x)] = 1 - tanh^2(1/t * x)
        y = y * (1.0 - partial * partial)

    # If y == 0: dC/dx = 0 ==> we already masked it to be = 0, so dloss = 0.
    tl.store(logits_ptr + col_offsets, dloss * y, mask = mask)


_cross_entropy_backward = triton.jit(_cross_entropy_backward)
_cross_entropy_backward = triton.heuristics(
    {
        "DO_SOFTCAPPING": lambda args: bool(args["DO_SOFTCAPPING"]),
        "DO_LOGIT_SCALING": lambda args: bool(args["DO_LOGIT_SCALING"]),
    }
)(_cross_entropy_backward)


MAX_FUSED_SIZE = 65536  # 2**16


class Fast_CrossEntropyLoss(torch.autograd.Function):
    @staticmethod
    def forward(
        ctx, logits, labels, logit_softcapping: float = 0, logit_scaling: float = 0
    ):
        n_rows: int
        vocab_size: int
        n_rows, vocab_size = logits.shape
        device = logits.device
        labels = labels.to(device)

        div, mod = divmod(vocab_size, MAX_FUSED_SIZE)
        n_chunks: int = div + (mod != 0)
        losses = torch.empty(n_rows, dtype = torch.float32, device = device)

        DO_SOFTCAPPING: bool = bool(logit_softcapping != 0)
        DO_LOGIT_SCALING: bool = bool(logit_scaling != 0)

        BLOCK_SIZE: int
        num_warps: int
        if n_chunks == 1:
            # For small vocabs <= 65336 like Llama, Mistral
            BLOCK_SIZE, num_warps = calculate_settings(vocab_size)
            if is_cdna():
                num_warps = num_warps // 2
            logsumexp = torch.empty(n_rows, dtype = torch.float32, device = device)

            with torch_gpu_device(device):
                _cross_entropy_forward[(n_rows,)](
                    logits,
                    logits.stride(0),
                    losses,
                    logsumexp,
                    labels,
                    VOCAB_SIZE = vocab_size,
                    BLOCK_SIZE = BLOCK_SIZE,
                    DO_SOFTCAPPING = DO_SOFTCAPPING,
                    SOFTCAP = logit_softcapping,
                    DO_LOGIT_SCALING = DO_LOGIT_SCALING,
                    LOGIT_SCALE = logit_scaling,
                    num_warps = num_warps,
                )
        else:
            # For large vocabs > 65336 like Gemma 256K
            logsumexp = torch.empty(
                (
                    n_rows,
                    n_chunks,
                ),
                dtype = torch.float32,
                device = device,
            )

            with torch_gpu_device(device):
                _chunked_cross_entropy_forward[
                    (
                        n_rows,
                        n_chunks,
                    )
                ](
                    logits,
                    logits.stride(0),
                    losses,
                    logsumexp,
                    labels,
                    VOCAB_SIZE = vocab_size,
                    N_CHUNKS = n_chunks,
                    BLOCK_SIZE = MAX_FUSED_SIZE,
                    DO_SOFTCAPPING = DO_SOFTCAPPING,
                    SOFTCAP = logit_softcapping,
                    DO_LOGIT_SCALING = DO_LOGIT_SCALING,
                    LOGIT_SCALE = logit_scaling,
                    num_warps = 32 if not is_cdna() else 16,
                )
            # logsumexp(chunked_logsumexp) - x
            # Do the -x separately
            logsumexp = torch.logsumexp(logsumexp, dim = 1)  # Row sum
            losses += logsumexp
            losses.masked_fill_(labels == -100, 0)  # Don't forget to mask padding out!

        ctx.save_for_backward(logits, logsumexp, labels)
        ctx.DO_SOFTCAPPING = DO_SOFTCAPPING
        ctx.logit_softcapping = logit_softcapping
        ctx.DO_LOGIT_SCALING = DO_LOGIT_SCALING
        ctx.logit_scaling = logit_scaling
        return losses

    @staticmethod
    def backward(ctx, dlosses):
        logits, logsumexp, labels = ctx.saved_tensors
        n_rows: int
        vocab_size: int
        n_rows, vocab_size = logits.shape

        BLOCK_SIZE: int = 4096
        div: int
        mod: int
        div, mod = divmod(vocab_size, BLOCK_SIZE)
        n_blocks: int = div + (mod != 0)

        with torch_gpu_device(dlosses.device):
            _cross_entropy_backward[
                (
                    n_rows,
                    n_blocks,
                )
            ](
                logits,
                logits.stride(0),
                dlosses,
                dlosses.stride(0),
                logsumexp,
                labels,
                VOCAB_SIZE = vocab_size,
                BLOCK_SIZE = BLOCK_SIZE,
                DO_SOFTCAPPING = ctx.DO_SOFTCAPPING,
                SOFTCAP = ctx.logit_softcapping,
                DO_LOGIT_SCALING = ctx.DO_LOGIT_SCALING,
                LOGIT_SCALE = ctx.logit_scaling,
                num_warps = 8,
            )
        return (
            logits,
            None,
            None,
            None,
        )


def fast_cross_entropy_loss(
    logits,
    labels,
    logit_softcapping = 0,
    logit_scaling = 0,
    n_items = None,
):
    """
    Arguments:
        logits: (batch, seq_len, vocab_size)
        labels: (batch, seq_len,)
    Returns:
        losses: float
    """
    batch, seq_len, d = logits.shape
    assert labels.shape == (batch, seq_len)

    device = logits.device
    loss = Fast_CrossEntropyLoss.apply(
        logits.view(batch * seq_len, d),
        labels.view(-1),
        logit_softcapping,
        logit_scaling,
    )
    if n_items is None:
        n_items = torch.count_nonzero(labels != -100)
    if torch.is_tensor(n_items):
        n_items = n_items.to(device)
    return loss.sum() / n_items


if (Version(torch.__version__) < Version("2.4.0")) and not hasattr(
    fast_cross_entropy_loss, "__wrapped__"
):
    fast_cross_entropy_loss = torch._disable_dynamo(fast_cross_entropy_loss)


# Patch CE Losses in transformers
def patch_loss_functions(torch_compile = True):
    _patch_loss_functions(fast_cross_entropy_loss, torch_compile = torch_compile)


================================================
FILE: unsloth/kernels/fast_lora.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch
from .utils import (
    _maybe_fake_quantize_activations,
    fast_dequantize,
    QUANT_STATE,
    get_lora_parameters,
    get_lora_parameters_bias,
    matmul_lora,
    torch_amp_custom_fwd,
    torch_amp_custom_bwd,
)


class LoRA_MLP(torch.autograd.Function):
    """
    ### LoRA weights
    G = G + Ag @ Bg
    U = U + Au @ Bu
    W = W + Aw @ Bw

    ### SwiGLU(X)
    e = X @ G
    f = e * sigmoid(e)
    g = X @ U
    h = f * g
    i = h @ W

    ### Backpropagation chain rule
    See our blog post for more details

    df = sigmoid(e) * (1 - f) + f
    dC/dW = h.T @ dY
    dC/dU = X.T @ (D @ W.T * f)
    dC/dG = X.T @ (D @ W.T * df * g)

    ### Down projection LoRA weights
    dC/dAw = dC/dW @ B.T
    dC/dBw = A.T @ dC/dW
    dC/dAw =       h.T @ dY @ B.T
    dC/dBw = A.T @ h.T @ dY

    ### Up projection LoRA weights
    dC/dAu =       X.T @ (D @ W.T * f) @ B.T
    dC/dBu = A.T @ X.T @ (D @ W.T * f)

    ### Gate projection LoRA weights
    dC/dAg =       X.T @ (D @ W.T * df * g) @ B.T
    dC/dBg = A.T @ X.T @ (D @ W.T * df * g)

    Don't forget to see our blog post for more details!
    """

    @staticmethod
    @torch_amp_custom_fwd
    def forward(
        ctx,
        X: torch.Tensor,
        gateW,
        gateW_quant,
        gateA,
        gateB,
        gateS,
        upW,
        upW_quant,
        upA,
        upB,
        upS,
        downW,
        downW_quant,
        downA,
        downB,
        downS,
        _forward_function,
        _backward_function,
        inplace = True,
    ):
        dtype = X.dtype

        e = matmul_lora(X, gateW, gateW_quant, gateA, gateB, gateS)
        g = matmul_lora(X, upW, upW_quant, upA, upB, upS)
        h = _forward_function(e, g)
        i = matmul_lora(h, downW, downW_quant, downA, downB, downS)

        ctx.custom_saved_tensors = (
            gateW,
            gateW_quant,
            gateS,
            upW,
            upW_quant,
            upS,
            downW,
            downW_quant,
            downS,
            _backward_function,
        )
        ctx.save_for_backward(gateA, gateB, upA, upB, downA, downB, X, e, g)
        ctx.inplace = inplace
        return i

    @staticmethod
    @torch_amp_custom_bwd
    def backward(ctx, dY: torch.Tensor):
        (
            gateW,
            gateW_quant,
            gateS,
            upW,
            upW_quant,
            upS,
            downW,
            downW_quant,
            downS,
            _backward_function,
        ) = ctx.custom_saved_tensors
        gateA, gateB, upA, upB, downA, downB, X, e, g = ctx.saved_tensors

        batch, seq_len, hd = X.shape
        dY = dY.view(-1, dY.shape[-1])
        X = X.view(-1, X.shape[-1])
        e = e.view(-1, e.shape[-1])
        g = g.view(-1, g.shape[-1])
        dtype = X.dtype

        gateA, gateB, upA, upB, downA, downB = (
            gateA.to(dtype),
            gateB.to(dtype),
            upA.to(dtype),
            upB.to(dtype),
            downA.to(dtype),
            downB.to(dtype),
        )

        gateA, gateB, upA, upB, downA, downB = (
            gateA.t(),
            gateB.t(),
            upA.t(),
            upB.t(),
            downA.t(),
            downB.t(),
        )

        DW = matmul_lora(dY, downW.t(), downW_quant, downB, downA, downS)
        DW, e, g = _backward_function(DW, e, g)
        h, df, de = DW, e, g

        d_downA = torch.empty_like(downA)
        d_downB = torch.empty_like(downB)
        d_gateA = torch.empty_like(gateA)
        d_gateB = torch.empty_like(gateB)
        d_upA = torch.empty_like(upA)
        d_upB = torch.empty_like(upB)

        # Down projection LoRA weights
        # d_downA = h.t() @ (dY @ downB.t())
        # d_downB = (downA.t() @ h.t()) @ dY
        # d_downA *= downS
        # d_downB *= downS
        d_downA.addmm_(h.t(), dY @ downB.t(), alpha = downS, beta = 0)
        d_downB.addmm_(downA.t() @ h.t(), dY, alpha = downS, beta = 0)

        # Up projection LoRA weights
        # d_upA   = X.t() @ (df @ upB.t())
        # d_upB   = (upA.t() @ X.t()) @ df
        # d_upA  *= upS
        # d_upB  *= upS
        d_upA.addmm_(X.t(), df @ upB.t(), alpha = upS, beta = 0)
        d_upB.addmm_(upA.t() @ X.t(), df, alpha = upS, beta = 0)

        # Gate projection LoRA weights
        # d_gateA = X.t() @ (de @ gateB.t())
        # d_gateB = (gateA.t() @ X.t()) @ de
        # d_gateA *= gateS
        # d_gateB *= gateS
        d_gateA.addmm_(X.t(), de @ gateB.t(), alpha = gateS, beta = 0)
        d_gateB.addmm_(gateA.t() @ X.t(), de, alpha = gateS, beta = 0)

        # dX  = matmul_lora(df, upW.t(), upW_quant, upB, upA, upS)
        # dX += matmul_lora(de, gateW.t(), gateW_quant, gateB, gateA, gateS)
        upW = fast_dequantize(upW.t(), upW_quant)
        dX = torch.matmul(df, upW.t(), out = X if ctx.inplace else None)
        del upW
        # dX += df @ upB.to(dtype).t() @ (upS * upA.to(dtype).t())
        dX.addmm_(df @ upB.t(), upA.t(), alpha = upS)

        gateW = fast_dequantize(gateW.t(), gateW_quant)
        # dX += de @ gateW.t()
        dX.addmm_(de, gateW.t())
        del gateW
        # dX += de @ gateB.to(dtype).t() @ (gateS * gateA.to(dtype).t())
        dX.addmm_(de @ gateB.t(), gateA.t(), alpha = gateS)

        # gateW, gateW_quant, gateA, gateB, gateS,
        #  upW,    upW_quant,   upA,   upB,   upS,
        # downW, downW_quant, downA, downB, downS,
        return (
            dX.view(batch, seq_len, hd),
            None,
            None,
            d_gateA.t(),
            d_gateB.t(),
            None,
            None,
            None,
            d_upA.t(),
            d_upB.t(),
            None,
            None,
            None,
            d_downA.t(),
            d_downB.t(),
            None,
            None,
            None,
            None,
        )  # _backward and _forward and inplace


from .swiglu import swiglu_fg_kernel, swiglu_DWf_DW_dfg_kernel


def apply_lora_mlp_swiglu(self, X, inplace = True):
    X = _maybe_fake_quantize_activations(X, self.gate_proj)
    gateW, gateW_quant, gateA, gateB, gateS = get_lora_parameters(self.gate_proj)
    upW, upW_quant, upA, upB, upS = get_lora_parameters(self.up_proj)
    downW, downW_quant, downA, downB, downS = get_lora_parameters(self.down_proj)
    out = LoRA_MLP.apply(
        X,
        gateW,
        gateW_quant,
        gateA,
        gateB,
        gateS,
        upW,
        upW_quant,
        upA,
        upB,
        upS,
        downW,
        downW_quant,
        downA,
        downB,
        downS,
        swiglu_fg_kernel,
        swiglu_DWf_DW_dfg_kernel,
        inplace,
    )
    return out


from .geglu import geglu_exact_forward_kernel, geglu_exact_backward_kernel


def apply_lora_mlp_geglu_exact(self, X, inplace = True):
    X = _maybe_fake_quantize_activations(X, self.gate_proj)
    gateW, gateW_quant, gateA, gateB, gateS = get_lora_parameters(self.gate_proj)
    upW, upW_quant, upA, upB, upS = get_lora_parameters(self.up_proj)
    downW, downW_quant, downA, downB, downS = get_lora_parameters(self.down_proj)
    out = LoRA_MLP.apply(
        X,
        gateW,
        gateW_quant,
        gateA,
        gateB,
        gateS,
        upW,
        upW_quant,
        upA,
        upB,
        upS,
        downW,
        downW_quant,
        downA,
        downB,
        downS,
        geglu_exact_forward_kernel,
        geglu_exact_backward_kernel,
        inplace,
    )
    return out


from .geglu import geglu_approx_forward_kernel, geglu_approx_backward_kernel


def apply_lora_mlp_geglu_approx(self, X):
    X = _maybe_fake_quantize_activations(X, self.gate_proj)
    gateW, gateW_quant, gateA, gateB, gateS = get_lora_parameters(self.gate_proj)
    upW, upW_quant, upA, upB, upS = get_lora_parameters(self.up_proj)
    downW, downW_quant, downA, downB, downS = get_lora_parameters(self.down_proj)
    out = LoRA_MLP.apply(
        X,
        gateW,
        gateW_quant,
        gateA,
        gateB,
        gateS,
        upW,
        upW_quant,
        upA,
        upB,
        upS,
        downW,
        downW_quant,
        downA,
        downB,
        downS,
        geglu_approx_forward_kernel,
        geglu_approx_backward_kernel,
    )
    return out


class LoRA_QKV(torch.autograd.Function):
    """
    ### LoRA weights
    Wq = Wq + Aq @ Bq
    Wk = Wk + Ak @ Bk
    Wv = Wv + Av @ Bv
    Q = X @ Wq = X @ Wq + X @ Aq @ Bq
    K = X @ Wk = X @ Wk + X @ Ak @ Bk
    V = X @ Wv = X @ Wv + X @ Av @ Bv

    ### Backpropagation chain rule
    See our blogpost for more details.

    dC/dWq = X.T @ D(Wq)
    dC/dWk = X.T @ D(Wk)
    dC/dWv = X.T @ D(Wv)
    We then sum them all find dC/dX

    ### Q projection LoRA weights
    dC/dAq =       X.T @ D(Wq) @ B.T
    dC/dBq = A.T @ X.T @ D(Wq)

    ### K projection LoRA weights
    dC/dAk =       X.T @ D(Wk) @ B.T
    dC/dBk = A.T @ X.T @ D(Wk)

    ### V projection LoRA weights
    dC/dAv =       X.T @ D(Wv) @ B.T
    dC/dBv = A.T @ X.T @ D(Wv)
    """

    @staticmethod
    @torch_amp_custom_fwd
    def forward(
        ctx,
        X: torch.Tensor,
        QW,
        QW_quant,
        QA,
        QB,
        QS,
        KW,
        KW_quant,
        KA,
        KB,
        KS,
        VW,
        VW_quant,
        VA,
        VB,
        VS,
        inplace = True,
    ):
        dtype = X.dtype

        # bitsandbytes 8-bit matmul expects 2D inputs.
        # TorchInductor/AOTAutograd fails on 3D tensors during backward,
        # so we explicitly flatten the sequence dimension.
        orig_shape = X.shape
        X_for_matmul = X
        if X.dim() == 3:
            X_for_matmul = X.view(-1, X.shape[-1])
        Q = matmul_lora(X_for_matmul, QW, QW_quant, QA, QB, QS)
        K = matmul_lora(X_for_matmul, KW, KW_quant, KA, KB, KS)
        V = matmul_lora(X_for_matmul, VW, VW_quant, VA, VB, VS)

        # Restore original shape after matmul
        if len(orig_shape) == 3:
            Q = Q.view(orig_shape[0], orig_shape[1], -1)
            K = K.view(orig_shape[0], orig_shape[1], -1)
            V = V.view(orig_shape[0], orig_shape[1], -1)

        ctx.custom_saved_tensors = (
            QW,
            QW_quant,
            QS,
            KW,
            KW_quant,
            KS,
            VW,
            VW_quant,
            VS,
        )
        ctx.save_for_backward(
            X,
            QA,
            QB,
            KA,
            KB,
            VA,
            VB,
        )
        ctx.inplace = inplace
        return Q, K, V

    @staticmethod
    @torch_amp_custom_bwd
    def backward(ctx, dQ, dK, dV):
        QW, QW_quant, QS, KW, KW_quant, KS, VW, VW_quant, VS = ctx.custom_saved_tensors
        (
            X,
            QA,
            QB,
            KA,
            KB,
            VA,
            VB,
        ) = ctx.saved_tensors

        batch, seq_len, hd = X.shape
        dQ = dQ.view(-1, dQ.shape[-1])
        dK = dK.reshape(-1, dK.shape[-1])  # view doesn't work on K.T
        dV = dV.view(-1, dV.shape[-1])
        X = X.view(-1, X.shape[-1])
        dtype = X.dtype

        QA, QB, KA, KB, VA, VB = (
            QA.to(dtype),
            QB.to(dtype),
            KA.to(dtype),
            KB.to(dtype),
            VA.to(dtype),
            VB.to(dtype),
        )

        QA, QB, KA, KB, VA, VB = QA.t(), QB.t(), KA.t(), KB.t(), VA.t(), VB.t()

        ### Weight projection LoRA weights
        # See our blogpost for more details.
        d_QA = torch.empty_like(QA)
        d_QB = torch.empty_like(QB)
        d_KA = torch.empty_like(KA)
        d_KB = torch.empty_like(KB)
        d_VA = torch.empty_like(VA)
        d_VB = torch.empty_like(VB)

        # Q Projection
        # d_QA = X.t() @ (dQ @ QB.t())
        # d_QB = (QA.t() @ X.t()) @ dQ
        # d_QA *= QS
        # d_QB *= QS
        d_QA.addmm_(X.t(), dQ @ QB.t(), alpha = QS, beta = 0)
        d_QB.addmm_(QA.t() @ X.t(), dQ, alpha = QS, beta = 0)

        # K Projection
        # d_KA = X.t() @ (dK @ KB.t())
        # d_KB = (KA.t() @ X.t()) @ dK
        # d_KA *= KS
        # d_KB *= KS
        d_KA.addmm_(X.t(), dK @ KB.t(), alpha = KS, beta = 0)
        d_KB.addmm_(KA.t() @ X.t(), dK, alpha = KS, beta = 0)

        # V Projection
        # d_VA = X.t() @ (dV @ VB.t())
        # d_VB = (VA.t() @ X.t()) @ dV
        # d_VA *= VS
        # d_VB *= VS
        d_VA.addmm_(X.t(), dV @ VB.t(), alpha = VS, beta = 0)
        d_VB.addmm_(VA.t() @ X.t(), dV, alpha = VS, beta = 0)

        # Combine derivatives to find dX
        # dQ
        QW = fast_dequantize(QW.t(), QW_quant)
        dX = torch.matmul(dQ, QW.t(), out = X if ctx.inplace else None)
        del QW
        # dX += (dQ @ QB.to(dtype).t() @ (QS * QA.to(dtype).t()))
        dX.addmm_(dQ @ QB.t(), QA.t(), alpha = QS)

        # dK
        KW = fast_dequantize(KW.t(), KW_quant)
        # dX += dK @ KW.t()
        dX.addmm_(dK, KW.t())
        del KW
        # dX += dK @ KB.to(dtype).t() @ (KS * KA.to(dtype).t())
        dX.addmm_(dK @ KB.t(), KA.t(), alpha = KS)

        # dV
        VW = fast_dequantize(VW.t(), VW_quant)
        # dX += dV @ VW.t()
        dX.addmm_(dV, VW.t())
        del VW
        # dX += dV @ VB.to(dtype).t() @ (VS * VA.to(dtype).t())
        dX.addmm_(dV @ VB.t(), VA.t(), alpha = VS)

        # QW, QW_quant, QA, QB, QS,
        # KW, KW_quant, KA, KB, KS,
        # VW, VW_quant, VA, VB, VS,
        return (
            dX.view(batch, seq_len, hd),
            None,
            None,
            d_QA.t(),
            d_QB.t(),
            None,
            None,
            None,
            d_KA.t(),
            d_KB.t(),
            None,
            None,
            None,
            d_VA.t(),
            d_VB.t(),
            None,
            None,
        )


def apply_lora_qkv(self, X, inplace = True):
    X = _maybe_fake_quantize_activations(X, self.q_proj)
    QW, QW_quant, QA, QB, QS = get_lora_parameters(self.q_proj)
    KW, KW_quant, KA, KB, KS = get_lora_parameters(self.k_proj)
    VW, VW_quant, VA, VB, VS = get_lora_parameters(self.v_proj)
    Q, K, V = LoRA_QKV.apply(
        X,
        QW,
        QW_quant,
        QA,
        QB,
        QS,
        KW,
        KW_quant,
        KA,
        KB,
        KS,
        VW,
        VW_quant,
        VA,
        VB,
        VS,
        inplace,
    )
    return Q, K, V


class LoRA_W(torch.autograd.Function):
    """
    ### LoRA weights
    Wq = Wq + Aq @ Bq
    Wk = Wk + Ak @ Bk
    Wv = Wv + Av @ Bv
    Q = X @ Wq = X @ Wq + X @ Aq @ Bq
    K = X @ Wk = X @ Wk + X @ Ak @ Bk
    V = X @ Wv = X @ Wv + X @ Av @ Bv

    ### Backpropagation chain rule
    dC/dWq = X.T @ D(Wq)
    dC/dWk = X.T @ D(Wk)
    dC/dWv = X.T @ D(Wv)

    ### Q projection LoRA weights
    dC/dAq =       X.T @ D(Wq) @ B.T
    dC/dBq = A.T @ X.T @ D(Wq)

    ### K projection LoRA weights
    dC/dAk =       X.T @ D(Wk) @ B.T
    dC/dBk = A.T @ X.T @ D(Wk)

    ### V projection LoRA weights
    dC/dAv =       X.T @ D(Wv) @ B.T
    dC/dBv = A.T @ X.T @ D(Wv)
    """

    @staticmethod
    @torch_amp_custom_fwd
    def forward(ctx, X: torch.Tensor, W, W_quant, A, B, S):
        dtype = X.dtype
        XW = matmul_lora(X, W, W_quant, A, B, S)
        ctx.custom_saved_tensors = (
            W,
            W_quant,
            S,
        )
        ctx.save_for_backward(A, B, X)
        return XW

    @staticmethod
    @torch_amp_custom_bwd
    def backward(ctx, dY: torch.Tensor):
        W, W_quant, S = ctx.custom_saved_tensors
        A, B, X = ctx.saved_tensors

        batch, seq_len, hd = X.shape
        dY = dY.reshape(-1, dY.shape[-1])  # Must be reshape
        X = X.reshape(-1, X.shape[-1])  # Must be reshape
        dtype = X.dtype

        A, B = A.to(dtype), B.to(dtype)

        A, B = A.t(), B.t()

        d_A = torch.empty_like(A)
        d_B = torch.empty_like(B)

        ### Weight projection LoRA weights
        # Weight projection
        # d_A = X.t() @ (dY @ B.t())
        # d_B = (A.t() @ X.t()) @ dY
        # d_A *= S
        # d_B *= S
        d_A.addmm_(X.t(), dY @ B.t(), alpha = S, beta = 0)
        d_B.addmm_(A.t() @ X.t(), dY, alpha = S, beta = 0)

        # Get derivative for dX
        W = fast_dequantize(W.t(), W_quant)
        dX = dY @ W.t()
        del W
        # dX += dY @ B.to(dtype).t() @ (S * A.to(dtype).t())
        dX.addmm_(dY @ B.t(), A.t(), alpha = S)

        # W, W_quant, A, B, S
        return dX.view(batch, seq_len, hd), None, None, d_A.t(), d_B.t(), None


def apply_lora_o(self, X):
    X = _maybe_fake_quantize_activations(X, self.o_proj)
    OW, OW_quant, OA, OB, OS = get_lora_parameters(self.o_proj)
    O = LoRA_W.apply(X, OW, OW_quant, OA, OB, OS)
    return O


IDENTITY_DROPOUT = torch.nn.Identity


@torch._disable_dynamo
def fast_lora_forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
    raise NotImplementedError(
        "Unsloth: Currently not supported yet - reshaping done incorrectly"
    )
    self._check_forward_args(x, *args, **kwargs)
    adapter_names = kwargs.pop("adapter_names", None)

    if self.disable_adapters:
        if self.merged:
            self.unmerge()
        result = self.base_layer(x, *args, **kwargs)
    elif adapter_names is not None:
        result = self._mixed_batch_forward(
            x, *args, adapter_names = adapter_names, **kwargs
        )
    elif self.merged:
        result = self.base_layer(x, *args, **kwargs)
    else:
        # Fastpath
        if len(self.active_adapters) == 1:
            active_adapter = self.active_adapters[0]
            if active_adapter not in self.lora_A.keys():
                return self.base_layer(x, *args, **kwargs)

            dropout = self.lora_dropout[active_adapter]
            if (
                isinstance(dropout, IDENTITY_DROPOUT)
                and not self.use_dora[active_adapter]
            ):
                lora_A = self.lora_A[active_adapter].weight
                lora_B = self.lora_B[active_adapter].weight
                scaling = self.scaling[active_adapter]
                W = self.base_layer.weight
                return LoRA_W.apply(x, W, QUANT_STATE(W), lora_A, lora_B, scaling)
            pass
        pass

        result = self.base_layer(x, *args, **kwargs)
        # As per Tim Dettmers, for 4bit, we need to defensively clone here.
        # The reason is that in some cases, an error can occur that backprop
        # does not work on a manipulated view. This issue may be solved with
        # newer PyTorch versions but this would need extensive testing to be
        # sure.
        result = result.clone()

        for active_adapter in self.active_adapters:
            if active_adapter not in self.lora_A.keys():
                continue
            lora_A = self.lora_A[active_adapter]
            lora_B = self.lora_B[active_adapter]
            dropout = self.lora_dropout[active_adapter]
            scaling = self.scaling[active_adapter]

            requires_conversion = not torch.is_autocast_enabled()
            if requires_conversion:
                expected_dtype = result.dtype
                x = x.to(lora_A.weight.dtype)

            if not self.use_dora[active_adapter]:
                result = result + lora_B(lora_A(dropout(x))) * scaling
            else:
                if isinstance(dropout, torch.nn.Identity) or not self.training:
                    base_result = result
                else:
                    x = dropout(x)
                    base_result = None

                result = result + self.lora_magnitude_vector[active_adapter](
                    x,
                    lora_A = lora_A,
                    lora_B = lora_B,
                    scaling = scaling,
                    base_layer = self.get_base_layer(),
                    base_result = base_result,
                )
            if requires_conversion:
                result = result.to(expected_dtype)

    return result


================================================
FILE: unsloth/kernels/flex_attention.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch
from functools import lru_cache
from transformers.models.llama.modeling_llama import logger
import os

torch_compile_options = {
    "epilogue_fusion": True,
    "max_autotune": True,
    "shape_padding": True,
    "trace.enabled": os.environ.get("UNSLOTH_COMPILE_DEBUG", "0") == "1",
    "triton.cudagraphs": False,
}

# Flex Attention supported from torch 2.5 onwards only
try:
    from torch.nn.attention.flex_attention import (
        flex_attention as _flex_attention,
        create_block_mask as _create_block_mask,
    )

    _flex_attention = torch.compile(
        _flex_attention, dynamic = True, options = torch_compile_options
    )
    HAS_FLEX_ATTENTION = False
except:
    HAS_FLEX_ATTENTION = False


if not HAS_FLEX_ATTENTION:
    # Logit softcapping
    @torch.compile(fullgraph = True, dynamic = True, options = torch_compile_options)
    def slow_attention_softcapping(Q, K, V, causal_mask, self, bsz, q_len):
        n_heads = self.config.num_attention_heads
        head_dim = self.head_dim
        n_kv_heads = self.config.num_key_value_heads
        n_groups = self.num_key_value_groups

        # Grouped query attention
        K = K[:, :, None, :, :].expand(bsz, n_kv_heads, n_groups, q_len, head_dim)
        V = V[:, :, None, :, :].expand(bsz, n_kv_heads, n_groups, q_len, head_dim)
        K = K.reshape(bsz, n_heads, q_len, head_dim)
        V = V.reshape(bsz, n_heads, q_len, head_dim)

        # See https://github.com/google/gemma_pytorch/commit/03e657582d17cb5a8617ebf333c1c16f3694670e
        # Gemma 9b should use 256 and not 224 (hs / nah). 27b uses the below
        # We default to using the config file itself
        # s = self.config.hidden_size // self.config.num_attention_heads
        s = self.config.query_pre_attn_scalar
        t = self.config.attn_logit_softcapping

        Q = Q * torch.tensor(s**-0.5, dtype = Q.dtype)  # Follow Keras exactly
        A = torch.matmul(Q, K.transpose(2, 3))
        A = t * torch.tanh(A / t)  # Logit softcapping
        A += causal_mask[:q_len, :q_len]
        # Much slower in torch compile!
        # A.masked_fill_(causal_mask[:q_len, :q_len], -float("inf"))
        A = torch.nn.functional.softmax(A, dim = -1, dtype = torch.float32).to(Q.dtype)
        A = torch.matmul(A, V)
        A = A.transpose(1, 2).contiguous()
        A = A.reshape(bsz, q_len, n_heads * head_dim)
        return A

    create_flex_attention_causal_mask = None
    create_flex_attention_sliding_window_mask = None
else:
    # See https://github.com/pytorch-labs/attention-gym/blob/main/examples/flex_attn.ipynb
    # for more examples
    # BSD 3-Clause License Copyright (c) 2023, Driss Guessous, Horace He et al
    import functools, math

    def generate_tanh_softcap(t):
        def tanh_softcap(x, b, h, q_idx, kv_idx):
            return t * torch.tanh(x / t)

        return tanh_softcap

    def causal_masker(b, h, q_idx, kv_idx):
        return q_idx >= kv_idx

    @functools.lru_cache
    def sliding_window_masker(size = 4096):
        def sliding_window(b, h, q_idx, kv_idx):
            causal_mask = q_idx >= kv_idx
            window_mask = q_idx - kv_idx <= size
            return causal_mask & window_mask

        return sliding_window

    @functools.lru_cache
    def create_block_mask(mask, n = 128):
        return _create_block_mask(
            mask,
            1,
            1,
            n,
            n,
            BLOCK_SIZE = 128,
            _compile = True,
        )

    def create_flex_attention_causal_mask(max_seq_length = 8192):
        causal_mask = create_block_mask(causal_masker, max_seq_length)
        return causal_mask

    def create_flex_attention_sliding_window_mask(
        max_seq_length = 8192, sliding_window = 4096
    ):
        sliding_masker = sliding_window_masker(sliding_window)
        causal_mask = create_block_mask(sliding_masker, max_seq_length)
        return causal_mask

    @functools.lru_cache
    def flex_attention(s, t):
        scale = 1.0 / math.sqrt(s)
        score_mod = generate_tanh_softcap(t)
        return functools.partial(
            _flex_attention,
            score_mod = score_mod,
            scale = scale,
            enable_gqa = True,
        )

    def slow_attention_softcapping(Q, K, V, causal_mask, self, bsz, q_len):
        n_heads = self.config.num_attention_heads
        head_dim = self.head_dim
        s = self.config.query_pre_attn_scalar
        t = self.config.attn_logit_softcapping
        fx = flex_attention(s, t)
        A = fx(query = Q, key = K, value = V, block_mask = causal_mask)
        A = A.transpose(1, 2).contiguous()
        A = A.reshape(bsz, q_len, n_heads * head_dim)
        return A


torch_matmul = torch.matmul
torch_tanh = torch.tanh
torch_nn_functional_softmax = torch.nn.functional.softmax


def slow_inference_attention_softcapping(Q, K, V, causal_mask, self, bsz, q_len):
    n_heads = self.config.num_attention_heads
    head_dim = self.head_dim
    n_kv_heads = self.config.num_key_value_heads
    n_groups = self.num_key_value_groups

    # Grouped query attention
    K = K[:, :, None, :, :].expand(bsz, n_kv_heads, n_groups, q_len, head_dim)
    V = V[:, :, None, :, :].expand(bsz, n_kv_heads, n_groups, q_len, head_dim)
    K = K.reshape(bsz, n_heads, q_len, head_dim)
    V = V.reshape(bsz, n_heads, q_len, head_dim)

    # See https://github.com/google/gemma_pytorch/commit/03e657582d17cb5a8617ebf333c1c16f3694670e
    # Gemma 9b should use 256 and not 224 (hs / nah). 27b uses the below
    # We default to using the config file itself
    # s = self.config.hidden_size // self.config.num_attention_heads
    s = self.config.query_pre_attn_scalar
    t = self.config.attn_logit_softcapping

    Q = Q * torch.tensor(s**-0.5, dtype = Q.dtype)  # Follow Keras exactly
    A = torch_matmul(Q, K.transpose(2, 3))

    # Logit softcapping
    A /= t
    torch_tanh(A, out = A)
    A *= t
    A += causal_mask[:q_len, :q_len]
    # Much slower in torch compile!
    # A.masked_fill_(causal_mask[:q_len, :q_len], -float("inf"))
    A = torch_nn_functional_softmax(A, dim = -1, dtype = torch.float32).to(Q.dtype)
    A = torch_matmul(A, V)
    A = A.transpose(1, 2).contiguous()
    A = A.reshape(bsz, q_len, n_heads * head_dim)
    return A


================================================
FILE: unsloth/kernels/fp8.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import torch
import torch.nn as nn
import triton
import triton.language as tl
from torch.nn import functional as F
import math
from unsloth_zoo.utils import Version
from unsloth_zoo.log import logger
from unsloth_zoo.temporary_patches.common import torch_compile

torch_matmul = torch.matmul

try:
    from transformers.integrations.finegrained_fp8 import FP8Linear
except:
    FP8Linear = None
    logger.info(
        "Unsloth: FP8 models need importing FP8Linear from `transformers.integrations.finegrained_fp8` but we don't see it."
    )

try:
    from transformers.integrations.fbgemm_fp8 import FbgemmFp8Linear
except:
    FbgemmFp8Linear = None
    logger.info(
        "Unsloth: FP8 models need importing FbgemmFP8Linear from `transformers.integrations.fbgemm_fp8` but we don't see it."
    )

try:
    from fbgemm_gpu.experimental.gemm.triton_gemm.fp8_gemm import (
        triton_quantize_fp8_block,
    )
except:
    triton_quantize_fp8_block = None
    logger.info(
        "Unsloth: Could not find fbgemm_gpu.experimental.gemm.triton_gemm.fp8_gemm.triton_quantize_fp8_block"
    )

try:
    from torchao.prototype.blockwise_fp8_inference.blockwise_quantization import (
        blockwise_fp8_gemm as torchao_blockwise_gemm,
    )
except:
    torchao_blockwise_gemm = None
    logger.info(
        "Unsloth: Could not find torchao.prototype.blockwise_fp8_inference.blockwise_quantization.blockwise_fp8_gemm"
    )


@triton.jit
def weight_dequant_kernel(x_ptr, s_ptr, y_ptr, M, N, BLOCK_SIZE: tl.constexpr):
    pid_m = tl.program_id(axis = 0)
    pid_n = tl.program_id(axis = 1)
    n = tl.cdiv(N, BLOCK_SIZE)
    offs_m = pid_m * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
    offs_n = pid_n * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
    offs = offs_m[:, None] * N + offs_n[None, :]
    mask = (offs_m[:, None] < M) & (offs_n[None, :] < N)
    x = tl.load(x_ptr + offs, mask = mask).to(tl.float32)
    s = tl.load(s_ptr + pid_m * n + pid_n)
    y = x * s
    tl.store(y_ptr + offs, y, mask = mask)


def weight_dequant_block(
    x: torch.Tensor, s: torch.Tensor, block_size: int = 128, dtype = torch.bfloat16
) -> torch.Tensor:
    if not x.is_contiguous():
        x = x.contiguous()
    if not s.is_contiguous():
        s = s.contiguous()
    assert x.dim() == 2 and s.dim() == 2
    M, N = x.size()
    y = torch.empty_like(x, dtype = dtype)
    grid = lambda meta: (
        triton.cdiv(M, meta["BLOCK_SIZE"]),
        triton.cdiv(N, meta["BLOCK_SIZE"]),
    )
    weight_dequant_kernel[grid](x, s, y, M, N, BLOCK_SIZE = block_size)
    return y


def weight_dequant(x: torch.Tensor, s: torch.Tensor, dtype = torch.bfloat16):
    # Per-tensor scale: single value for entire weight matrix
    if s.numel() == 1:
        return x.to(dtype) * s.view(1, 1).to(dtype)
    # Row quantized weight: scale shape is (m, 1) or (n, 1)
    elif s.ndim == 2 and s.shape[1] == 1:
        if x.shape[0] == s.shape[0]:
            y = x.to(dtype) * s.to(dtype)
        elif x.shape[1] == s.shape[0]:
            # sometimes, this is called with the transpose of the weight. Adjust for that.
            y = x.t().to(dtype) * s.to(dtype)
            y = y.t()
        else:
            raise ValueError(f"Incompatible shapes {x.shape = }, {s.shape = }")
        return y
    # Block quantized weight: scale shape is (ceil(m/block_m), ceil(n/block_n))
    else:
        return weight_dequant_block(x, s, dtype = dtype)


# Copied from https://huggingface.co/deepseek-ai/DeepSeek-V3/blob/main/inference/kernel.py
@triton.jit
def act_quant_kernel(x_ptr, y_ptr, s_ptr, BLOCK_SIZE: tl.constexpr):
    pid = tl.program_id(axis = 0)
    offs = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
    x = tl.load(x_ptr + offs).to(tl.float32)
    s = tl.max(tl.abs(x)) / 448.0
    # For a row of all zeros, lets return zeros as is
    # for LoRA, there are cases where dY has 0 in it and we should not let it be NaN
    # this is a deviation from the original implementation.
    s = 1.0 if s == 0 else s
    y = x / s
    y = y.to(y_ptr.dtype.element_ty)
    tl.store(y_ptr + offs, y)
    tl.store(s_ptr + pid, s)


def act_quant(
    x: torch.Tensor, block_size: int = 128
) -> tuple[torch.Tensor, torch.Tensor]:
    if not x.is_contiguous():
        x = x.contiguous()
    assert x.shape[-1] % block_size == 0
    y = torch.empty_like(x, dtype = torch.float8_e4m3fn)
    s = x.new_empty(*x.size()[:-1], x.size(-1) // block_size, dtype = torch.float32)

    def grid(meta):
        return (triton.cdiv(x.numel(), meta["BLOCK_SIZE"]),)

    act_quant_kernel[grid](x, y, s, BLOCK_SIZE = block_size)
    return y, s


# Adapted from https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/layers/quantization/fp8_kernel.py
@triton.jit
def _w8a8_block_fp8_matmul(
    # Pointers to inputs and output
    A,
    B,
    C,
    As,
    Bs,
    # Shape for matmul
    M,
    N,
    K,
    # Block size for block-wise quantization
    group_n,
    group_k,
    # Stride for inputs and output
    stride_am,
    stride_ak,
    stride_bk,
    stride_bn,
    stride_cm,
    stride_cn,
    stride_As_m,
    stride_As_k,
    stride_Bs_k,
    stride_Bs_n,
    # Meta-parameters
    BLOCK_SIZE_M: tl.constexpr,
    BLOCK_SIZE_N: tl.constexpr,
    BLOCK_SIZE_K: tl.constexpr,
    GROUP_SIZE_M: tl.constexpr,
):
    """Triton-accelerated function used to perform linear operations (dot
    product) on input tensors `A` and `B` with block-wise quantization, and
    store the result in output tensor `C`.
    """

    pid = tl.program_id(axis = 0)
    num_pid_m = tl.cdiv(M, BLOCK_SIZE_M)
    num_pid_n = tl.cdiv(N, BLOCK_SIZE_N)
    num_pid_in_group = GROUP_SIZE_M * num_pid_n
    group_id = pid // num_pid_in_group
    first_pid_m = group_id * GROUP_SIZE_M
    group_size_m = min(num_pid_m - first_pid_m, GROUP_SIZE_M)
    pid_m = first_pid_m + (pid % group_size_m)
    pid_n = (pid % num_pid_in_group) // group_size_m

    offs_am = (pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)) % M
    offs_bn = (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)) % N
    offs_k = tl.arange(0, BLOCK_SIZE_K)
    a_ptrs = A + (offs_am[:, None] * stride_am + offs_k[None, :] * stride_ak)
    b_ptrs = B + (offs_k[:, None] * stride_bk + offs_bn[None, :] * stride_bn)

    As_ptrs = As + offs_am * stride_As_m
    offs_bsn = offs_bn // group_n
    Bs_ptrs = Bs + offs_bsn * stride_Bs_n

    accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype = tl.float32)
    for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)):
        a = tl.load(a_ptrs, mask = offs_k[None, :] < K - k * BLOCK_SIZE_K, other = 0.0)
        b = tl.load(b_ptrs, mask = offs_k[:, None] < K - k * BLOCK_SIZE_K, other = 0.0)

        k_start = k * BLOCK_SIZE_K
        offs_ks = k_start // group_k
        a_s = tl.load(As_ptrs + offs_ks * stride_As_k)
        b_s = tl.load(Bs_ptrs + offs_ks * stride_Bs_k)

        accumulator += tl.dot(a, b) * a_s[:, None] * b_s[None, :]
        a_ptrs += BLOCK_SIZE_K * stride_ak
        b_ptrs += BLOCK_SIZE_K * stride_bk

    if C.dtype.element_ty == tl.bfloat16:
        c = accumulator.to(tl.bfloat16)
    elif C.dtype.element_ty == tl.float16:
        c = accumulator.to(tl.float16)
    else:
        c = accumulator.to(tl.float32)

    offs_cm = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
    offs_cn = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
    c_ptrs = C + stride_cm * offs_cm[:, None] + stride_cn * offs_cn[None, :]
    c_mask = (offs_cm[:, None] < M) & (offs_cn[None, :] < N)
    tl.store(c_ptrs, c, mask = c_mask)


def w8a8_block_fp8_matmul_triton(
    A: torch.Tensor,
    B: torch.Tensor,
    As: torch.Tensor,
    Bs: torch.Tensor,
    block_size: list[int],
    output_dtype: torch.dtype = torch.float32,
) -> torch.Tensor:
    """Block-wise FP8 matmul."""
    if block_size is None:
        block_n, block_k = 128, 128
    else:
        assert len(block_size) == 2
        block_n, block_k = block_size[0], block_size[1]

    N, K = B.shape
    assert A.shape[-1] == B.shape[-1]
    assert A.shape[:-1] == As.shape[:-1] and A.is_contiguous()
    assert triton.cdiv(A.shape[-1], block_k) == As.shape[-1]
    assert B.ndim == 2 and B.is_contiguous() and Bs.ndim == 2
    assert triton.cdiv(N, block_n) == Bs.shape[0]
    assert triton.cdiv(K, block_k) == Bs.shape[1]

    M = A.numel() // A.shape[-1]
    C_shape = A.shape[:-1] + (N,)
    C = A.new_empty(C_shape, dtype = output_dtype)

    BLOCK_SIZE_M = 128
    if M < BLOCK_SIZE_M:
        BLOCK_SIZE_M = max(triton.next_power_of_2(M), 16)
    BLOCK_SIZE_K, BLOCK_SIZE_N = block_k, block_n

    def grid(META):
        return (
            triton.cdiv(M, META["BLOCK_SIZE_M"]) * triton.cdiv(N, META["BLOCK_SIZE_N"]),
        )

    _w8a8_block_fp8_matmul[grid](
        A,
        B,
        C,
        As,
        Bs,
        M,
        N,
        K,
        block_n,
        block_k,
        A.stride(-2),
        A.stride(-1),
        B.stride(1),
        B.stride(0),
        C.stride(-2),
        C.stride(-1),
        As.stride(-2),
        As.stride(-1),
        Bs.stride(1),
        Bs.stride(0),
        BLOCK_SIZE_M = BLOCK_SIZE_M,
        BLOCK_SIZE_N = BLOCK_SIZE_N,
        BLOCK_SIZE_K = BLOCK_SIZE_K,
        GROUP_SIZE_M = 8,
    )
    return C


def torchao_block_matmul(
    act_q: torch.Tensor,
    weight_q: torch.Tensor,
    act_scale: torch.Tensor,
    weight_scale: torch.Tensor,
    block_size: tuple[int, int],
    output_dtype: torch.dtype = torch.bfloat16,
):
    out = torchao_blockwise_gemm(
        act_q.contiguous(),
        act_scale.contiguous(),
        weight_q.contiguous(),
        weight_scale.contiguous(),
        block_size = block_size[1],
    )
    return out.to(output_dtype)


# Note that older versions of fbgemm (<=1.3.0) cause numerical imprecisions resulting in NaNs especially when X has high values in it.
# So our preference order is fbgemm (>=1.4.0) > torchao > triton. All of these have similar outputs/losses. Never use fbgemm (<=1.3.0) for block quantized FP8 matmul.
# This torchao FP8 matmul seems to be ~3x faster than the w8a8_block_fp8_matmul_triton. Though torchao is 15-30% slower than fbgemm implementation (on H100 GPUs).
fp8_block_matmul = (
    torchao_block_matmul
    if torchao_blockwise_gemm is not None
    else w8a8_block_fp8_matmul_triton
)


class FP8BlockQuantLinear(torch.autograd.Function):
    @staticmethod
    def forward(ctx, X, weight, weight_scale):
        m, n = weight.shape

        # Save original scale for backward (before any transformation)
        original_weight_scale = weight_scale

        # Handle per-tensor quantization: expand scalar to block scale shape
        if weight_scale.numel() == 1:
            block_size = [128, 128]
            # Expand scalar to (ceil(m/128), ceil(n/128)) - same value for all blocks
            num_blocks_m = triton.cdiv(m, block_size[0])
            num_blocks_n = triton.cdiv(n, block_size[1])
            weight_scale = weight_scale.expand(num_blocks_m, num_blocks_n).contiguous()
        else:
            # Block quantization path
            p, q = weight_scale.shape
            block_size = getattr(weight, "block_size", None) or getattr(
                weight_scale, "block_size", [128, 128]
            )
            assert block_size is not None, "block_size is not set"
            if triton.cdiv(m, block_size[0]) != p or triton.cdiv(n, block_size[1]) != q:
                if (
                    triton.cdiv(m, block_size[0]) == q
                    and triton.cdiv(n, block_size[1]) == p
                ):
                    weight_scale = weight_scale.T
                    original_weight_scale = weight_scale  # Update for transposed case
                else:
                    raise ValueError(
                        f"Weight shape {weight.shape} and scales shape {weight_scale.shape} is not compatible with block size {block_size}"
                    )

        if not weight.is_contiguous():
            weight = weight.contiguous()

        # Quantize input and run FP8 matmul
        qinput, scale = act_quant(X, block_size[1])
        output = fp8_block_matmul(
            qinput,
            weight,
            scale,
            weight_scale,
            block_size,
            output_dtype = X.dtype,
        )
        ctx.weight = weight
        ctx.weight_scale = original_weight_scale  # Save original for backward
        return output.to(X.dtype)

    @staticmethod
    def backward(ctx, grad_output):
        W_deq = weight_dequant(ctx.weight, ctx.weight_scale)
        grad_X = torch_matmul(grad_output, W_deq)
        del W_deq
        return grad_X, None, None


@torch_compile
def fp8_torch_block_quant_forward(X, weight, weight_scale):
    return FP8BlockQuantLinear.apply(X, weight, weight_scale)


class FbgemmFp8Linear_matmul(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x, weight, weight_scale, bias = None):
        if weight.shape[0] == weight_scale.shape[0] and (
            weight.shape[0] % 8 == 0 and weight.shape[1] % 8 == 0
        ):
            # Edit: The kernel seems to expect that the weight has dimensions divisible by 8. Otherwise it throws `RuntimeError: cutlass cannot implement`
            # One thing we can do is to pad the weight and weight scale to multiple of 8 and perform a F8F8BF16 operation.
            # I tried benchmarking that for speed but observed that dequantize+bf16 matmul is significantly faster than padding+f8f8bf16 matmul. So we'll go that route.
            # So essentially, f8f8bf16_rowise only happens when shapes are proper (no transposes) and divisible by 8.

            # quantize_fp8_per_row will squash the leading dimensions, so save the desired shape here
            output_shape = (*x.shape[:-1], -1)
            # x_quantized and x_scale are not necessarily on the same device as x, this is an issue.
            # https://github.com/pytorch/FBGEMM/blob/e08af8539c391437f447173863df0f3f6f6f1855/fbgemm_gpu/experimental/gen_ai/src/quantize/quantize.cu#L1237C3-L1237C45
            x_quantized, x_scale = torch.ops.fbgemm.quantize_fp8_per_row(
                x.view(-1, x.shape[-1]).contiguous(),
                scale_ub = getattr(weight, "input_scale_ub", None),
            )
            # moving x_quantized, x_scale here creates glibberish output ... However, if we move the output, it works
            # x_quantized, x_scale = x_quantized.to(x.device), x_scale.to(x.device)

            # The computation still happens on the device where self.weight is even if x_quantized is not on the same device as self.weight
            weight_scale_float32 = weight_scale.to(torch.float32)

            if not weight.is_contiguous():
                weight = weight.contiguous()
            if not weight_scale.is_contiguous():
                weight_scale = weight_scale.contiguous()

            output = torch.ops.fbgemm.f8f8bf16_rowwise(
                x_quantized, weight, x_scale, weight_scale_float32, use_fast_accum = True
            )
            output = output + bias if bias is not None else output
            # Hacky for now, we have the output to the device of x
            output = output.to(x.device, x.dtype)
            output = output.reshape(output_shape)
            del x_quantized, x_scale
        elif (
            weight.shape[0] != weight_scale.shape[0]
            and weight.shape[1] == weight_scale.shape[0]
        ) or (weight.shape[0] // 8 != 0 or weight.shape[1] // 8 != 0):
            # Either the weight/scale is transposed or its shape is not divisible by 8. Both cases, dequantizing is the preferred way.
            # The transpose case is generally noticed in backward pass when we do dY@W instead of @W.T as we do for forward.
            # The shape case, I noticed to happen in MLP of Qwen 2.5 VL 7B where the gate proj is of shape (3420, 1280) and 3420/8=427.5

            W_deq = weight_dequant(weight, weight_scale).T
            output = torch_matmul(x, W_deq)
            del W_deq
        else:
            raise ValueError(
                f"Shapes are incompatible {weight.shape = }, {weight_scale.shape = }, {x.shape = }"
            )

        ctx.weight = weight
        ctx.weight_scale = weight_scale
        return output

    @staticmethod
    def backward(ctx, grad_output):
        W_deq = weight_dequant(ctx.weight, ctx.weight_scale)
        grad_X = torch_matmul(grad_output, W_deq)
        del W_deq
        return grad_X, None, None, None, None


@torch_compile
def fbgemm_fp8_linear(X, weight, weight_scale, bias = None):
    return FbgemmFp8Linear_matmul.apply(X, weight, weight_scale, bias)


class FP8_fbgemm_block_linear(torch.autograd.Function):
    @staticmethod
    def forward(ctx, X, weight, weight_scale, bias = None):
        orig_shape = X.shape
        X = X.view(-1, X.shape[-1])

        bs_n, bs_k = getattr(weight, "block_size", None) or getattr(
            weight_scale, "block_size", [128, 128]
        )
        bs_m = bs_n

        m, n = weight.shape
        p, q = weight_scale.shape

        if triton.cdiv(m, bs_n) != p or triton.cdiv(n, bs_k) != q:
            if triton.cdiv(m, bs_n) == q and triton.cdiv(n, bs_k) == p:
                # weights are transposed during backward pass for training :)
                # We transpose weight scale to counter that. Note that transposing weight would cause issues with matmul with input X
                weight_scale = weight_scale.T
            else:
                raise ValueError(
                    f"Weight shape {weight.shape} and scales shape {weight_scale.shape} is not compatible with block size {bs_n, bs_k}"
                )

        xq, xs = triton_quantize_fp8_block(X, bs_m, bs_n, None)
        ## TODO: Investigate and resolve the high divergence of this output from baseline
        # WARNING: This causes the outputs to diverge from expected when X has high values in it.
        # That results in the model producing gibberish, especially on longer sequences and training loss starting at high values like 8 instead of <1 ideally
        # Please refrain from using this till this issue is resolved. This exists here just for a future headstart.
        output = torch.ops.fbgemm.f8f8bf16_blockwise(
            xq, weight.contiguous(), xs, weight_scale.contiguous(), bs_m, bs_n, bs_k
        )
        output = output + bias if bias is not None else output

        output = output.view(*orig_shape[:-1], -1)

        del xq
        del xs

        ctx.weight = weight
        ctx.weight_scale = weight_scale
        ctx.block_size = [bs_m, bs_n, bs_k]
        return output

    @staticmethod
    def backward(ctx, grad_output):
        W_deq = weight_dequant(ctx.weight, ctx.weight_scale)
        grad_X = torch_matmul(grad_output, W_deq)
        del W_deq
        return grad_X, None, None, None, None


@torch_compile
def fp8_fbgemm_block_linear(X, weight, weight_scale, bias = None):
    return FP8_fbgemm_block_linear.apply(X, weight, weight_scale, bias)


def test_has_fbgemm():
    # We must manually check if the faster FBGEMM works on the specific GPU
    # For example RTX 5090 and RTX 4090 does not work
    # Also SM100 (Blackwell B200/B100) GPUs fail with CUTLASS SM90 kernels
    # [TODO] Investigate with TorchAO why FBGEMM fails on consumer GPUs
    M, N, K = 128, 128, 128
    xq = torch.ones(M, K, dtype = torch.float8_e4m3fn, device = "cuda")
    wq = xq
    M, K = xq.shape
    N, _ = wq.shape
    block_scale = torch.ones(M // 128, K // 128, dtype = torch.float32, device = "cuda")
    has_fbgemm = False
    try:
        out = torch.ops.fbgemm.f8f8bf16_blockwise(xq, wq, block_scale, block_scale)
        assert torch.unique(out).item() == 128
        has_fbgemm = True
        del out
    except Exception as e:
        error_str = str(e).lower()
        # Catch any CUTLASS/CUDA errors and disable FBGEMM
        # This includes MMA instruction errors, architecture mismatches, kernel launch failures, etc.
        cutlass_cuda_errors = (
            "cutlass",
            "cuda error",
            "cuda runtime error",
            "no kernel image",
            "arch conditional",
            "mma instruction",
            "compute capability",
            "cute_invalid_control_path",
            "tma",
        )
        is_cutlass_cuda_error = any(err in error_str for err in cutlass_cuda_errors)

        if is_cutlass_cuda_error:
            print(
                "Unsloth: FBGEMM on the current GPU cannot load - will switch to Triton kernels"
            )
        else:
            print(
                f"Unsloth: FBGEMM on the current GPU cannot load with error = {e} - will switch to Triton kernels"
            )
        has_fbgemm = False
    del block_scale, xq
    torch.cuda.empty_cache()
    return has_fbgemm


fp8_block_quant_linear = fp8_torch_block_quant_forward
if "UNSLOTH_HAS_FBGEMM" not in os.environ:
    os.environ["UNSLOTH_HAS_FBGEMM"] = "0"
try:
    import fbgemm_gpu

    # Older versions cause numerical imprecisions resulting in NaNs especially when X has high values in it.
    # This is both fast and accurate hence preferred.
    # This makes it 15% faster than the torchao implementation.
    if Version(fbgemm_gpu.__version__) >= Version("1.4.0"):
        # We must manually confirm if blockwise FBGEMM works!
        # This check is a must for consumer grade GPUs which fail
        # Suppress CUDA device printf during probe -- on Blackwell (SM100) GPUs,
        # FBGEMM's CUTLASS blockwise kernel (hardcoded SM90) fires thousands of
        # "Arch conditional MMA" lines to stdout fd 1 before aborting.
        from unsloth.import_fixes import suppress_cuda_printf

        with suppress_cuda_printf():
            _has_fbgemm = test_has_fbgemm()
        if _has_fbgemm:
            os.environ["UNSLOTH_HAS_FBGEMM"] = "1"
            logger.info(f"Using fbgemm_gpu block quantized FP8 matmul")
            fp8_block_quant_linear = fp8_fbgemm_block_linear
        else:
            os.environ["UNSLOTH_HAS_FBGEMM"] = "0"
except:
    pass


@torch_compile
def fp8_linear(X, weight, weight_scale, bias = None):
    # Per-tensor quantization: single scalar scale for entire weight
    # Block quantized FP8: 2D scale tensor with multiple columns
    if weight_scale.numel() == 1 or (
        weight_scale.ndim == 2 and weight_scale.shape[1] > 1
    ):
        out = fp8_block_quant_linear(X, weight, weight_scale)
    # Row/channel quantized FP8: 2D scale with shape (n, 1)
    else:
        out = fbgemm_fp8_linear(X, weight, weight_scale, bias)
    return out


def module_forward_patch(forward_function, scale_attr = "weight_scale"):
    def patched_forward(self, X):
        return forward_function(X, self.weight, getattr(self, scale_attr))

    return patched_forward


# Patch the forward functions of the layers (for compiled models)
if FbgemmFp8Linear is not None:
    FbgemmFp8Linear.forward = module_forward_patch(fbgemm_fp8_linear, "weight_scale")
if FP8Linear is not None:
    FP8Linear.forward = module_forward_patch(fp8_block_quant_linear, "weight_scale_inv")


================================================
FILE: unsloth/kernels/geglu.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import triton
import triton.language as tl
import torch
from .utils import (
    calculate_settings,
    triton_tanh,
    torch_gpu_device,
)

# signed int32 max is 2**31-1 so num_elements cannot exceed 2**31
NUM_INT32_ELEMENTS = 2**31
SAFE_INT32_BUFFER_MULTIPLIER = 4
BLOCK_SIZE = 1024
INT32_SAFETY_BUFFER = NUM_INT32_ELEMENTS - BLOCK_SIZE * SAFE_INT32_BUFFER_MULTIPLIER


@triton.jit
def _exact_forward_kernel(
    e,
    g,
    h,
    n_elements,
    BLOCK_SIZE: tl.constexpr,
    LONG_INDEXING: tl.constexpr,
):
    block_idx = tl.program_id(0)
    if LONG_INDEXING:
        offsets = block_idx.to(tl.int64) * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE).to(
            tl.int64
        )
        n_elements = tl.cast(n_elements, tl.int64)
    else:
        offsets = block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
    mask = offsets < n_elements

    # f = 1/2 * e * (1 + erf(1/sqrt(2) * e))
    # h = f * up
    e_row = tl.load(e + offsets, mask = mask, other = 0).to(tl.float32)
    g_row = tl.load(g + offsets, mask = mask, other = 0)  # .to(tl.float32)

    f_row = 0.5 * e_row * (tl.math.erf(tl.math.rsqrt(2.0) * e_row) + 1.0)
    f_row = f_row.to(g_row.dtype)  # Exact copy from HF
    h_row = f_row * g_row

    # Store h
    tl.store(h + offsets, h_row, mask = mask)


def geglu_exact_forward_kernel(gate, up):
    batch, seq_len, hd = gate.shape
    n_elements = gate.numel()
    device = gate.device
    out = torch.empty((batch, seq_len, hd), dtype = gate.dtype, device = device)
    grid = lambda meta: (triton.cdiv(n_elements, meta["BLOCK_SIZE"]),)
    with torch_gpu_device(device):
        _exact_forward_kernel[grid](
            gate,
            up,
            out,
            n_elements,
            BLOCK_SIZE = BLOCK_SIZE,
            LONG_INDEXING = 0 if n_elements <= INT32_SAFETY_BUFFER else 1,
        )
    return out


@triton.jit
def _exact_backward_kernel(
    DW,
    e,
    g,
    n_elements,
    BLOCK_SIZE: tl.constexpr,
    LONG_INDEXING: tl.constexpr,
):
    """
    f = 1/2 * e * (1 + erf(1/sqrt(2) * e))
    h = f * up

    df/de (with help of Wolfram :)
    df/de = 1/2 * (1 + erf(1/sqrt(2) * e)) + 1/sqrt(2*pi) * e * exp(-1/2 * e^2)

    Reuse via
    f =        1/2 * (1 + erf(1/sqrt(2) * e)) * e
    """
    block_idx = tl.program_id(0)
    if LONG_INDEXING:
        offsets = block_idx.to(tl.int64) * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE).to(
            tl.int64
        )
        n_elements = tl.cast(n_elements, tl.int64)
    else:
        offsets = block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
    mask = offsets < n_elements

    DW_row = tl.load(DW + offsets, mask = mask, other = 0)  # .to(tl.float32)
    e_row = tl.load(e + offsets, mask = mask, other = 0).to(tl.float32)
    g_row = tl.load(g + offsets, mask = mask, other = 0)  # .to(tl.float32)

    # Break e_row away for re-use
    # f = 1/2 * e * (1 + erf(1/sqrt(2) * e))
    f_partial_row = 0.5 * (tl.math.erf(tl.math.rsqrt(2.0) * e_row) + 1.0)
    f_row = f_partial_row * e_row

    f_row = f_row.to(DW_row.dtype)
    # h = f * g
    h_row = f_row * g_row
    # df = DW * f
    df_row = DW_row * f_row
    # dg = DW * g
    dg_row = DW_row * g_row

    # df/de = 1/2 * (1 + erf(1/sqrt(2) * e)) + 1/sqrt(2*pi) * e * exp(-1/2 * e^2)
    t = 0.3989422804014327  # 1/sqrt(2*pi)
    df_de = f_partial_row + t * e_row * tl.exp(-0.5 * e_row * e_row)

    de_row = dg_row.to(tl.float32) * df_de
    de_row = de_row.to(DW_row.dtype)

    # Store derivatives in buffers
    tl.store(DW + offsets, h_row, mask = mask)  # h  = f * g
    tl.store(e + offsets, df_row, mask = mask)  # df = DW * f
    tl.store(g + offsets, de_row, mask = mask)  # de


def geglu_exact_backward_kernel(DW, e, g):
    batch_seq_len, hd = e.shape
    n_elements = e.numel()
    grid = lambda meta: (triton.cdiv(n_elements, meta["BLOCK_SIZE"]),)
    with torch_gpu_device(e.device):
        _exact_backward_kernel[grid](
            DW,
            e,
            g,
            n_elements,
            BLOCK_SIZE = BLOCK_SIZE,
            LONG_INDEXING = 0 if n_elements <= INT32_SAFETY_BUFFER else 1,
        )
    return DW, e, g


@triton.jit
def _approx_forward_kernel(
    e,
    g,
    h,
    n_elements,
    BLOCK_SIZE: tl.constexpr,
    LONG_INDEXING: tl.constexpr,
):
    block_idx = tl.program_id(0)
    if LONG_INDEXING:
        offsets = block_idx.to(tl.int64) * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE).to(
            tl.int64
        )
        n_elements = tl.cast(n_elements, tl.int64)
    else:
        offsets = block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
    mask = offsets < n_elements

    # f = 1/2 * e * (1 + tanh( sqrt(2/pi) * (x + 0.044715 * x^3 ) ))
    # f = 1/2 * e * (1 + tanh( sqrt(2/pi) * x * (1 + 0.044715 * x^2 ) ))
    # h = f * up
    s = 0.7978845608028654  # math.sqrt(2 / math.pi)

    e_row = tl.load(e + offsets, mask = mask, other = 0).to(tl.float32)
    g_row = tl.load(g + offsets, mask = mask, other = 0)  # .to(tl.float32)

    f_row = (
        0.5 * e_row * (triton_tanh(s * e_row * (1.0 + 0.044715 * e_row * e_row)) + 1.0)
    )
    f_row = f_row.to(g_row.dtype)  # Exact copy from HF
    h_row = f_row * g_row

    # Store h
    tl.store(h + offsets, h_row, mask = mask)


def geglu_approx_forward_kernel(gate, up):
    batch, seq_len, hd = gate.shape
    n_elements = gate.numel()
    device = gate.device
    out = torch.empty((batch, seq_len, hd), dtype = gate.dtype, device = device)
    grid = lambda meta: (triton.cdiv(n_elements, meta["BLOCK_SIZE"]),)
    with torch_gpu_device(device):
        _approx_forward_kernel[grid](
            gate,
            up,
            out,
            n_elements,
            BLOCK_SIZE = BLOCK_SIZE,
            LONG_INDEXING = 0 if n_elements <= INT32_SAFETY_BUFFER else 1,
        )
    return out


@triton.jit
def _approx_backward_kernel(
    DW,
    e,
    g,
    n_elements,
    BLOCK_SIZE: tl.constexpr,
    LONG_INDEXING: tl.constexpr,
):
    """
    f = 1/2 * e * (1 + tanh( sqrt(2/pi) * x * (1 + 0.044715 * x^2 ) ))
    h = f * up

    df/de (with help from https://arxiv.org/pdf/2305.12073.pdf :))
    df/de = 1/2 * [1 + tanh( sqrt(2/pi) * x * (1 + 0.044715 * x^2 ) )] +
            1/2 * sech^2 [   sqrt(2/pi) * x * (1 + 0.044715 * x^2 )  ] * \
                           ( sqrt(2/pi) * x * (1 + 0.044715 * x^2 * 3 ) )

    Notice sech^2(x) = 1 - tanh^2(x)
    So reuse tanh( sqrt(2/pi) * x * (1 + 0.044715 * x^2 ) )

    See https://www.desmos.com/calculator/nqprfoni6x
    """
    block_idx = tl.program_id(0)
    if LONG_INDEXING:
        offsets = block_idx.to(tl.int64) * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE).to(
            tl.int64
        )
        n_elements = tl.cast(n_elements, tl.int64)
    else:
        offsets = block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
    mask = offsets < n_elements

    DW_row = tl.load(DW + offsets, mask = mask, other = 0)  # .to(tl.float32)
    e_row = tl.load(e + offsets, mask = mask, other = 0).to(tl.float32)
    g_row = tl.load(g + offsets, mask = mask, other = 0)  # .to(tl.float32)

    # See https://www.desmos.com/calculator/nqprfoni6x
    s = 0.7978845608028654  # math.sqrt(2 / math.pi)
    a = s * e_row  # a = sqrt(2 / pi) * x
    b = a * 0.044715 * e_row * e_row  # b = a * 0.044715 * x^2
    T = 1.0 + triton_tanh(a + b)
    T2 = 0.5 * T
    # Q = 0.5 * -T * (T - 2.0) * (a + 3.0 * b)
    Q2 = -T2 * (T - 2.0) * (a + 3.0 * b)
    df_de = T2 + Q2  # 1/2 * (T + Q)

    # f = 1/2 * e * (1 + tanh( sqrt(2/pi) * (x + 0.044715 * x^3 ) ))
    f_row = T2 * e_row
    f_row = f_row.to(DW_row.dtype)
    # h = f * g
    h_row = f_row * g_row
    # df = DW * f
    df_row = DW_row * f_row
    # dg = DW * g
    dg_row = DW_row * g_row

    de_row = dg_row.to(tl.float32) * df_de
    de_row = de_row.to(DW_row.dtype)

    # Store derivatives in buffers
    tl.store(DW + offsets, h_row, mask = mask)  # h  = f * g
    tl.store(e + offsets, df_row, mask = mask)  # df = DW * f
    tl.store(g + offsets, de_row, mask = mask)  # de


def geglu_approx_backward_kernel(DW, e, g):
    batch_seq_len, hd = e.shape
    n_elements = e.numel()
    grid = lambda meta: (triton.cdiv(n_elements, meta["BLOCK_SIZE"]),)
    with torch_gpu_device(e.device):
        _approx_backward_kernel[grid](
            DW,
            e,
            g,
            n_elements,
            BLOCK_SIZE = BLOCK_SIZE,
            LONG_INDEXING = 0 if n_elements <= INT32_SAFETY_BUFFER else 1,
        )
    return DW, e, g


================================================
FILE: unsloth/kernels/layernorm.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
# Copyright 2024-present Andrej Karpathy & the llm.c team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import triton
import triton.language as tl
import torch
from .utils import calculate_settings, torch_gpu_device
from unsloth_zoo.patching_utils import (
    patch_layernorm,
)


@triton.jit
def layernorm_forward(
    Y,
    Y_row_stride,
    X,
    X_row_stride,
    W,
    b,
    r,
    mu,
    n_cols: tl.constexpr,
    eps: tl.constexpr,
    BLOCK_SIZE: tl.constexpr,
):
    row_idx = tl.program_id(0)
    col_offsets = tl.arange(0, BLOCK_SIZE)
    mask = col_offsets < n_cols

    Y += row_idx * Y_row_stride
    X += row_idx * X_row_stride
    r += row_idx
    mu += row_idx

    # According to https://pytorch.org/torchtune/stable/_modules/torchtune/modules/layer_norm.html#Fp32LayerNorm, all modules
    # are in float32!
    X_row = tl.load(X + col_offsets, mask = mask, other = 0).to(tl.float32)
    W_row = tl.load(W + col_offsets, mask = mask, other = 0).to(tl.float32)
    b_row = tl.load(b + col_offsets, mask = mask, other = 0).to(tl.float32)

    mean_X = tl.sum(X_row, axis = 0) / n_cols
    # (X[0] - mean) == -mean so we need to mask it out
    XX = tl.where(mask, X_row - mean_X, 0)
    row_var = tl.sum(XX * XX, axis = 0) / n_cols
    # Explicit float32 scalar to ensure correct type promotion on HIP/ROCm
    eps_f32 = tl.full((), eps, tl.float32)
    inv_var = tl.math.rsqrt(row_var + eps_f32)
    tl.store(r, inv_var)
    tl.store(mu, mean_X)
    output = (XX * inv_var) * W_row + b_row
    tl.store(Y + col_offsets, output, mask = mask)


@triton.jit
def layernorm_backward(
    dY,
    dY_row_stride,
    X,
    X_row_stride,
    W,
    b,
    r,
    mu,
    n_cols: tl.constexpr,
    eps: tl.constexpr,
    BLOCK_SIZE: tl.constexpr,
):
    # Approximately follows https://github.com/karpathy/llm.c/blob/master/doc/layernorm/layernorm.md
    row_idx = tl.program_id(0)
    col_offsets = tl.arange(0, BLOCK_SIZE)
    mask = col_offsets < n_cols

    dY += row_idx * dY_row_stride
    X += row_idx * X_row_stride
    r += row_idx
    mu += row_idx

    # According to https://pytorch.org/torchtune/stable/_modules/torchtune/modules/layer_norm.html#Fp32LayerNorm, all modules
    # are in float32!
    dY_row = tl.load(dY + col_offsets, mask = mask, other = 0).to(tl.float32)
    X_row = tl.load(X + col_offsets, mask = mask, other = 0).to(tl.float32)
    W_row = tl.load(W + col_offsets, mask = mask, other = 0).to(tl.float32)
    b_row = tl.load(b + col_offsets, mask = mask, other = 0).to(tl.float32)

    inv_var = tl.load(r).to(tl.float32)
    mean = tl.load(mu).to(tl.float32)
    normed = (X_row - mean) * inv_var
    dY_W = dY_row * W_row
    dX_row = (
        dY_W
        - tl.sum(dY_W, axis = 0) / n_cols
        - normed * tl.sum(dY_W * normed, axis = 0) / n_cols
    )
    dX_row = dX_row * inv_var
    tl.store(dY + col_offsets, dX_row, mask = mask)


class Fast_Layernorm(torch.autograd.Function):
    @staticmethod
    def forward(ctx, X, W, b, eps):
        shape = X.shape
        dim = shape[-1]
        X = X.view(-1, dim)
        n_rows, n_cols = X.shape
        BLOCK_SIZE, num_warps = calculate_settings(n_cols)
        device = X.device
        Y = torch.empty((n_rows, n_cols), dtype = X.dtype, device = device)
        r = torch.empty(n_rows, dtype = torch.float32, device = device)
        mu = torch.empty(n_rows, dtype = torch.float32, device = device)

        with torch_gpu_device(device):
            layernorm_forward[(n_rows,)](
                Y,
                Y.stride(0),
                X,
                X.stride(0),
                W,
                b,
                r,
                mu,
                n_cols,
                eps,
                BLOCK_SIZE = BLOCK_SIZE,
                num_warps = num_warps,
            )
        ctx.eps = eps
        ctx.BLOCK_SIZE = BLOCK_SIZE
        ctx.num_warps = num_warps
        ctx.save_for_backward(X, W, b, r, mu)
        return Y.view(*shape)

    @staticmethod
    def backward(ctx, dY):
        shape = dY.shape
        dim = shape[-1]
        dY = dY.view(-1, dim)
        X, W, b, r, mu = ctx.saved_tensors
        n_rows, n_cols = dY.shape

        with torch_gpu_device(dY.device):
            layernorm_backward[(n_rows,)](
                dY,
                dY.stride(0),
                X,
                X.stride(0),
                W,
                b,
                r,
                mu,
                n_cols,
                ctx.eps,
                BLOCK_SIZE = ctx.BLOCK_SIZE,
                num_warps = ctx.num_warps,
            )
        dX = dY.view(*shape)
        return dX, None, None, None, None


def fast_layernorm(layernorm, X):
    assert layernorm.elementwise_affine is True
    W = layernorm.weight
    bias = layernorm.bias
    eps = (
        layernorm.variance_epsilon
        if hasattr(layernorm, "variance_epsilon")
        else layernorm.eps
    )
    out = Fast_Layernorm.apply(X, W, bias, eps)
    return out


def test_layernorm(
    dim = 1024,
    eps = 1e-5,
    dtype = torch.float16,
    bsz = 21,
    random_state = 3407,
    seqlen = 3341,
):
    from torch.nn import LayerNorm

    layernorm = LayerNorm((dim,), eps = eps, device = "cuda", dtype = dtype)
    torch.cuda.manual_seed(random_state)
    torch.manual_seed(random_state)
    torch.nn.init.uniform_(layernorm.weight)
    torch.nn.init.uniform_(layernorm.bias)
    X = torch.randn((bsz, seqlen, dim), dtype = dtype, device = "cuda")
    XX = X.clone()
    X.requires_grad_(True)
    XX.requires_grad_(True)
    Y = layernorm(X)
    YY = torch.randn((bsz, seqlen, dim), dtype = dtype, device = "cuda", requires_grad = True)
    Y.backward(YY)
    correct_grad = X.grad.clone()
    # from unsloth.kernels import fast_layernorm
    Y = fast_layernorm(layernorm, XX)
    Y.backward(YY)
    assert torch.dist(correct_grad, XX.grad).item() <= 0.1


def testing_suite_layernorm():
    for dim in [512, 1024, 2048]:
        for dtype in [torch.float16, torch.bfloat16]:
            with torch.autocast(device_type = "cuda", dtype = dtype):
                for seqlen in [3341, 2048, 349]:
                    for random_state in [3407, 42]:
                        test_layernorm(
                            dim = dim,
                            eps = 1e-5,
                            dtype = dtype,
                            bsz = 21,
                            random_state = random_state,
                            seqlen = seqlen,
                        )


================================================
FILE: unsloth/kernels/moe/LICENSE
================================================
                    GNU AFFERO GENERAL PUBLIC LICENSE
                       Version 3, 19 November 2007

 Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.

                            Preamble

  The GNU Affero General Public License is a free, copyleft license for
software and other kinds of works, specifically designed to ensure
cooperation with the community in the case of network server software.

  The licenses for most software and other practical works are designed
to take away your freedom to share and change the works.  By contrast,
our General Public Licenses are intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users.

  When we speak of free software, we are referring to freedom, not
price.  Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.

  Developers that use our General Public Licenses protect your rights
with two steps: (1) assert copyright on the software, and (2) offer
you this License which gives you legal permission to copy, distribute
and/or modify the software.

  A secondary benefit of defending all users' freedom is that
improvements made in alternate versions of the program, if they
receive widespread use, become available for other developers to
incorporate.  Many developers of free software are heartened and
encouraged by the resulting cooperation.  However, in the case of
software used on network servers, this result may fail to come about.
The GNU General Public License permits making a modified version and
letting the public access it on a server without ever releasing its
source code to the public.

  The GNU Affero General Public License is designed specifically to
ensure that, in such cases, the modified source code becomes available
to the community.  It requires the operator of a network server to
provide the source code of the modified version running there to the
users of that server.  Therefore, public use of a modified version, on
a publicly accessible server, gives the public access to the source
code of the modified version.

  An older license, called the Affero General Public License and
published by Affero, was designed to accomplish similar goals.  This is
a different license, not a version of the Affero GPL, but Affero has
released a new version of the Affero GPL which permits relicensing under
this license.

  The precise terms and conditions for copying, distribution and
modification follow.

                       TERMS AND CONDITIONS

  0. Definitions.

  "This License" refers to version 3 of the GNU Affero General Public License.

  "Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.

  "The Program" refers to any copyrightable work licensed under this
License.  Each licensee is addressed as "you".  "Licensees" and
"recipients" may be individuals or organizations.

  To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy.  The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.

  A "covered work" means either the unmodified Program or a work based
on the Program.

  To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy.  Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.

  To "convey" a work means any kind of propagation that enables other
parties to make or receive copies.  Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.

  An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License.  If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.

  1. Source Code.

  The "source code" for a work means the preferred form of the work
for making modifications to it.  "Object code" means any non-source
form of a work.

  A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.

  The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form.  A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.

  The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities.  However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work.  For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.

  The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.

  The Corresponding Source for a work in source code form is that
same work.

  2. Basic Permissions.

  All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met.  This License explicitly affirms your unlimited
permission to run the unmodified Program.  The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work.  This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.

  You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force.  You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright.  Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.

  Conveying under any other circumstances is permitted solely under
the conditions stated below.  Sublicensing is not allowed; section 10
makes it unnecessary.

  3. Protecting Users' Legal Rights From Anti-Circumvention Law.

  No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.

  When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.

  4. Conveying Verbatim Copies.

  You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.

  You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.

  5. Conveying Modified Source Versions.

  You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:

    a) The work must carry prominent notices stating that you modified
    it, and giving a relevant date.

    b) The work must carry prominent notices stating that it is
    released under this License and any conditions added under section
    7.  This requirement modifies the requirement in section 4 to
    "keep intact all notices".

    c) You must license the entire work, as a whole, under this
    License to anyone who comes into possession of a copy.  This
    License will therefore apply, along with any applicable section 7
    additional terms, to the whole of the work, and all its parts,
    regardless of how they are packaged.  This License gives no
    permission to license the work in any other way, but it does not
    invalidate such permission if you have separately received it.

    d) If the work has interactive user interfaces, each must display
    Appropriate Legal Notices; however, if the Program has interactive
    interfaces that do not display Appropriate Legal Notices, your
    work need not make them do so.

  A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit.  Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.

  6. Conveying Non-Source Forms.

  You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:

    a) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by the
    Corresponding Source fixed on a durable physical medium
    customarily used for software interchange.

    b) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by a
    written offer, valid for at least three years and valid for as
    long as you offer spare parts or customer support for that product
    model, to give anyone who possesses the object code either (1) a
    copy of the Corresponding Source for all the software in the
    product that is covered by this License, on a durable physical
    medium customarily used for software interchange, for a price no
    more than your reasonable cost of physically performing this
    conveying of source, or (2) access to copy the
    Corresponding Source from a network server at no charge.

    c) Convey individual copies of the object code with a copy of the
    written offer to provide the Corresponding Source.  This
    alternative is allowed only occasionally and noncommercially, and
    only if you received the object code with such an offer, in accord
    with subsection 6b.

    d) Convey the object code by offering access from a designated
    place (gratis or for a charge), and offer equivalent access to the
    Corresponding Source in the same way through the same place at no
    further charge.  You need not require recipients to copy the
    Corresponding Source along with the object code.  If the place to
    copy the object code is a network server, the Corresponding Source
    may be on a different server (operated by you or a third party)
    that supports equivalent copying facilities, provided you maintain
    clear directions next to the object code saying where to find the
    Corresponding Source.  Regardless of what server hosts the
    Corresponding Source, you remain obligated to ensure that it is
    available for as long as needed to satisfy these requirements.

    e) Convey the object code using peer-to-peer transmission, provided
    you inform other peers where the object code and Corresponding
    Source of the work are being offered to the general public at no
    charge under subsection 6d.

  A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.

  A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling.  In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage.  For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product.  A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.

  "Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source.  The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.

  If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information.  But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).

  The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed.  Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.

  Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.

  7. Additional Terms.

  "Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law.  If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.

  When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it.  (Additional permissions may be written to require their own
removal in certain cases when you modify the work.)  You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.

  Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:

    a) Disclaiming warranty or limiting liability differently from the
    terms of sections 15 and 16 of this License; or

    b) Requiring preservation of specified reasonable legal notices or
    author attributions in that material or in the Appropriate Legal
    Notices displayed by works containing it; or

    c) Prohibiting misrepresentation of the origin of that material, or
    requiring that modified versions of such material be marked in
    reasonable ways as different from the original version; or

    d) Limiting the use for publicity purposes of names of licensors or
    authors of the material; or

    e) Declining to grant rights under trademark law for use of some
    trade names, trademarks, or service marks; or

    f) Requiring indemnification of licensors and authors of that
    material by anyone who conveys the material (or modified versions of
    it) with contractual assumptions of liability to the recipient, for
    any liability that these contractual assumptions directly impose on
    those licensors and authors.

  All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10.  If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term.  If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.

  If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.

  Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.

  8. Termination.

  You may not propagate or modify a covered work except as expressly
provided under this License.  Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).

  However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.

  Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.

  Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License.  If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.

  9. Acceptance Not Required for Having Copies.

  You are not required to accept this License in order to receive or
run a copy of the Program.  Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance.  However,
nothing other than this License grants you permission to propagate or
modify any covered work.  These actions infringe copyright if you do
not accept this License.  Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.

  10. Automatic Licensing of Downstream Recipients.

  Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License.  You are not responsible
for enforcing compliance by third parties with this License.

  An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations.  If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.

  You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License.  For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.

  11. Patents.

  A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based.  The
work thus licensed is called the contributor's "contributor version".

  A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version.  For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.

  Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.

  In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement).  To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.

  If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients.  "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.

  If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.

  A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License.  You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.

  Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.

  12. No Surrender of Others' Freedom.

  If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License.  If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all.  For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.

  13. Remote Network Interaction; Use with the GNU General Public License.

  Notwithstanding any other provision of this License, if you modify the
Program, your modified version must prominently offer all users
interacting with it remotely through a computer network (if your version
supports such interaction) an opportunity to receive the Corresponding
Source of your version by providing access to the Corresponding Source
from a network server at no charge, through some standard or customary
means of facilitating copying of software.  This Corresponding Source
shall include the Corresponding Source for any work covered by version 3
of the GNU General Public License that is incorporated pursuant to the
following paragraph.

  Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU General Public License into a single
combined work, and to convey the resulting work.  The terms of this
License will continue to apply to the part which is the covered work,
but the work with which it is combined will remain governed by version
3 of the GNU General Public License.

  14. Revised Versions of this License.

  The Free Software Foundation may publish revised and/or new versions of
the GNU Affero General Public License from time to time.  Such new versions
will be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.

  Each version is given a distinguishing version number.  If the
Program specifies that a certain numbered version of the GNU Affero General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation.  If the Program does not specify a version number of the
GNU Affero General Public License, you may choose any version ever published
by the Free Software Foundation.

  If the Program specifies that a proxy can decide which future
versions of the GNU Affero General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.

  Later license versions may give you additional or different
permissions.  However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.

  15. Disclaimer of Warranty.

  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.

  16. Limitation of Liability.

  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.

  17. Interpretation of Sections 15 and 16.

  If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.

                     END OF TERMS AND CONDITIONS

            How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.

  To do so, attach the following notices to the program.  It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.

    <one line to give the program's name and a brief idea of what it does.>
    Copyright (C) <year>  <name of author>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published
    by the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.

Also add information on how to contact you by electronic and paper mail.

  If your software can interact with users remotely through a computer
network, you should also make sure that it provides a way for users to
get its source.  For example, if your program is a web application, its
interface could display a "Source" link that leads users to an archive
of the code.  There are many ways you could offer source, and different
solutions will be better for different programs; see section 13 for the
specific requirements.

  You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU AGPL, see
<https://www.gnu.org/licenses/>.

================================================
FILE: unsloth/kernels/moe/README.md
================================================
## MoE Grouped GEMM

Optimized implementation of `MoE MLP Block`.
Licensed under AGPLv3.

### Background

`MoE MLP` requires the following steps:
- Calculate `topk_weights` and `topk_indices`
- If using a grouped gemm implementation, calculate permutation indices needed to rearrange tokens grouped by expert
- For each expert:
    - `expert_tokens`: gather the tokens assigned to the expert
    - `first_gemm`: `gate / up proj` @ `expert_tokens`
    - `silu_and_mul`: `silu` and `mul` of `first_gemm`
    - `second_gemm`: `silu_and_mul` @ `down proj`
    - `scatter_second_gemm`: scatter the `second_gemm` to the original token order
    - `topk_weight_mul`: `second_gemm` @ `topk_weights`
    - `final_output`: if `topk > 1`, `topk_weight_mul.view(num_tokens, topk, -1).sum(dim=1)` else `topk_weight_mul`

One way to eliminate the loop is to use a grouped GEMM, where all expert GEMMs are computed within a single kernel, which iterates over tiles of the expert GEMMs as individual GEMMs, where each GEMM, the `A` matrix is `M' x K` and the `B` matrix is `K x N`, where `M'` is the number of tokens assigned to the expert and `B` is the weight matrix for that expert.

This requires an additional permute (and subsequent copy) of the hidden states such that the tokens assigned to each expert are contiguous in memory before running the first grouped GEMM within the Expert MLP.
Additionally, after the second grouped GEMM, the hidden states must be permuted back to the original token order and multiplied by `topk_weights` to get the final output.

### Optimizations
This repo implements a grouped GEMM-based MoE MLP with the following optimizations:
- Eliminates the loop over experts by performing gemms as a grouped GEMM, computing the expert gemms within a single fused triton kernel
- Fuses the permutation of hidden states from token order (original input order) to expert order (tokens grouped by expert) within the prologue of first the first grouped GEMM
- Fuses the (un)permutation of hidden states from expert order back to token order in second GEMM
- Fuses the mul of hidden states by expert weights within epilogue of second GEMM (only implemented for inference, not for training)

### Structure
- `grouped_gemm/interface.py`: wrappers for the individual forward / backward kernels as well as the `torch.autograd.Function`
- `grouped_gemm/kernels/forward.py`: forward kernel
- `grouped_gemm/kernels/backward.py`: backward dX and dW kernels
- `grouped_gemm/kernels/tuning.py`: manual tuning utils
- `grouped_gemm/kernels/autotuning.py`: autotuning utils
- `grouped_gemm/reference/moe_block.py`: contains `Qwen3MoeFusedGroupedGEMMBlock`, a reference implementation of Huggingface `Qwen3SparseMOEBlock` with fused triton kernel in-place of original HF expert computation
- `grouped_gemm/reference/moe_ops.py`: supporting ops (routing, token sorting, etc.) and reference MoE block using a torch-native grouped gemm approach.

### Tests
- `grouped_gemm/tests/test_grouped_gemm.py`: unit tests for forward, backward grouped gemm kernels as well as the wrapped grouped gemm autograd.Function.  Best not to run this entire test suite at once due to the large number of parametrized unit tests.  Rather, use filters to run specific
sets of tests.  E.g., to run forward tests with autotune turned on: `pytest -sv -k "forward and autotune" --tb=short tests/test_grouped_gemm.py`.  Use the test function names and parameter ids for words to filter on.
- `grouped_gemm/tests/test_qwen3_moe.py`: end to end test for Qwen3 MoE block.  IMPORTANT: read `tests/run_qwen3_moe_tests.sh` as well as notes in the test itself for complications when running parametrized pytest test suites and triton / autotune.  TLDR: use the test script and NOT pytest to run the tests.

### Benchmarks
- `grouped_gemm/benchmark/benchmark_fused_moe.py`: benchmarks HF `Qwen3SpareMOEBlock` or `Llama4TextMoe` against the fused implementation


Running with these flags on an `H100` to bench forward pass (run with `--help` to see all available flags):

For `Qwen3-30B-A3B`:
```
python benchmark/benchmark_fused_moe.py --model qwen3 --mode forward --seqlen 1024 --permute_x --permute_y --autotune
```

For the backward bench:
```
python benchmark/benchmark_fused_moe.py --model qwen3 --mode backward --seqlen 1024 --permute_x --permute_y --autotune
```

For `Llama-4-Scout-17B-16E`:
```
python benchmark/benchmark_fused_moe.py --model llama4 --autotune --mode=forward --permute_y
```
Ditto for backwards.

### Notes
- Tested and benched on `H100`, though should run on Ampere and possibly even earlier gpu generations though the autotuning configs will need to be adjusted.
- The env I used to develop the kernel was `pytorch 2.7/2.8` and `pytorch-triton 3.3`.
- The kernels can be run either as autotuned (see `autotuning.py`) or with manually specified config (see `tuning.py`).  Recommended to run using autotuner since the MoE block requires 2 configs for the forward (2 grouped gemms) and 4 for the backwards (dX and dW per grouped gemm, 2 grouped gemms).
- Running with autotuning turned off with the default manual kernel config will result is **highly** sub-optimal performance as it is only meant for testing / debugging purposes.
- I've tried to strike a balance between compilation time and autotuning search space -- can probably squeeze even more performance for specific workloads.
- The Llama4 reference layer is still highly under-optimized as there are many low-hanging opportunities for further speedups around routing and shared expert calculation.

TODO:
- TMA store: implemented but not enabled currently due to non-determinism arising from triton pipelining bug.
- Warp specialization: Hopper support for WS not yet enabled on triton 3.3x branch which ships with latest pytorch 2.7.  
- Additional optimizations:
    - Fused / optimized implementations of routing, token sorting, etc.
    - Better software pipelining within grouped gemm
    - Threadblock swizzling for better L2 caching
    - Llama4
        - Fused gather / topk weight merging 
        - Custom topk, gather indices kernel
        - Shared expert fusion with experts calculation

================================================
FILE: unsloth/kernels/moe/__init__.py
================================================


================================================
FILE: unsloth/kernels/moe/autotune_cache.py
================================================
# Unsloth
# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""
Auto-tuning cache system for MoE kernels to ensure tuning runs only once at training start.
"""

import hashlib
import json
import logging
import os
import time
from typing import Dict, List, Optional, Tuple, Any
import torch
import triton

logger = logging.getLogger(__name__)

# Global cache for kernel configurations
_kernel_config_cache: Dict[str, Any] = {}
_autotune_completed: Dict[str, bool] = {}


def _get_cache_key(
    num_experts: int,
    hidden_dim: int,
    intermediate_dim: int,
    top_k: int,
    dtype: torch.dtype,
    device_capability: Tuple[int, int],
    seq_len: int = 8192,  # Default sequence length for tuning
) -> str:
    """Generate a unique cache key based on model configuration."""
    key_data = {
        "num_experts": num_experts,
        "hidden_dim": hidden_dim,
        "intermediate_dim": intermediate_dim,
        "top_k": top_k,
        "dtype": str(dtype),
        "device_capability": device_capability,
        "seq_len": seq_len,
    }
    key_str = json.dumps(key_data, sort_keys = True)
    return hashlib.md5(key_str.encode()).hexdigest()


def _get_cache_file_path(cache_key: str) -> str:
    """Get the file path for the cache file."""
    cache_dir = os.path.expanduser("~/.cache/unsloth/moe_autotune")
    os.makedirs(cache_dir, exist_ok = True)
    return os.path.join(cache_dir, f"{cache_key}.json")


def load_cached_config(cache_key: str) -> Optional[Dict[str, Any]]:
    """Load cached kernel configuration from disk."""
    cache_file = _get_cache_file_path(cache_key)
    if not os.path.exists(cache_file):
        return None

    try:
        with open(cache_file, "r") as f:
            cached_data = json.load(f)

        # Verify cache is still valid (same device, etc.)
        current_device_capability = torch.cuda.get_device_capability()
        if cached_data.get("device_capability") != current_device_capability:
            logger.info("Device capability changed, invalidating cache")
            os.remove(cache_file)
            return None

        logger.info(f"Loaded cached MoE kernel config: {cache_key}")
        return cached_data
    except Exception as e:
        logger.warning(f"Failed to load cache file {cache_file}: {e}")
        try:
            os.remove(cache_file)
        except:
            pass
        return None


def save_cached_config(
    cache_key: str,
    config_fwd: Any,
    config_bwd_dx: Any,
    config_bwd_dw: Any,
    metadata: Dict[str, Any] = None,
) -> None:
    """Save kernel configuration to disk cache."""
    cache_file = _get_cache_file_path(cache_key)

    cache_data = {
        "timestamp": time.time(),
        "device_capability": torch.cuda.get_device_capability(),
        "config_fwd": config_fwd.__dict__
        if hasattr(config_fwd, "__dict__")
        else str(config_fwd),
        "config_bwd_dx": config_bwd_dx.__dict__
        if hasattr(config_bwd_dx, "__dict__")
        else str(config_bwd_dx),
        "config_bwd_dw": config_bwd_dw.__dict__
        if hasattr(config_bwd_dw, "__dict__")
        else str(config_bwd_dw),
        "metadata": metadata or {},
    }

    try:
        with open(cache_file, "w") as f:
            json.dump(cache_data, f, indent = 2)
        logger.info(f"Saved MoE kernel config cache: {cache_key}")
    except Exception as e:
        logger.warning(f"Failed to save cache file {cache_file}: {e}")


def get_or_autotune_moe_kernels(
    num_experts: int,
    hidden_dim: int,
    intermediate_dim: int,
    top_k: int,
    dtype: torch.dtype,
    force_autotune: bool = False,
    seq_len: int = 8192,
) -> Tuple[Any, Any, Any]:
    """
    Get cached kernel configurations or run auto-tuning.

    Args:
        num_experts: Number of experts in the MoE layer
        hidden_dim: Hidden dimension of the model
        intermediate_dim: Intermediate dimension for MoE MLP
        top_k: Number of experts to route to
        dtype: Data type for computation
        force_autotune: Force re-running autotuning even if cache exists
        seq_len: Sequence length to use for tuning benchmarks

    Returns:
        Tuple of (config_fwd, config_bwd_dx, config_bwd_dw)
    """
    device_capability = torch.cuda.get_device_capability()
    cache_key = _get_cache_key(
        num_experts,
        hidden_dim,
        intermediate_dim,
        top_k,
        dtype,
        device_capability,
        seq_len,
    )

    # 0. Check for environment variable override to DISABLE autotuning
    if os.environ.get("UNSLOTH_MOE_DISABLE_AUTOTUNE", "0") == "1":
        logger.info(
            f"UNSLOTH_MOE_DISABLE_AUTOTUNE=1: Using Heuristic (Safe) MoE kernel configs for SM{device_capability[0]}{device_capability[1]}"
        )
        return _get_heuristic_configs()
    if not force_autotune and cache_key in _kernel_config_cache:
        logger.info(f"Using in-memory cached MoE kernel configs: {cache_key}")
        return _kernel_config_cache[cache_key]

    # Try to load from disk
    if not force_autotune:
        cached_data = load_cached_config(cache_key)
        if cached_data is not None:
            # Reconstruct config objects from cached data
            try:
                from .grouped_gemm.kernels.tuning import (
                    KernelConfigForward,
                    KernelConfigBackward_dX,
                    KernelConfigBackward_dW,
                )

                config_fwd = KernelConfigForward(**cached_data["config_fwd"])
                config_bwd_dx = KernelConfigBackward_dX(**cached_data["config_bwd_dx"])
                config_bwd_dw = KernelConfigBackward_dW(**cached_data["config_bwd_dw"])

                configs = (config_fwd, config_bwd_dx, config_bwd_dw)
                _kernel_config_cache[cache_key] = configs
                return configs
            except Exception as e:
                logger.warning(f"Failed to reconstruct cached configs: {e}")

    # Run autotuning
    if cache_key in _autotune_completed and not force_autotune:
        logger.info(f"Autotuning already completed for: {cache_key}")
        return _kernel_config_cache[cache_key]

    logger.info(f"Running MoE kernel auto-tuning for: {cache_key}")
    logger.info(
        f"Configuration: {num_experts} experts, {hidden_dim} hidden, {intermediate_dim} intermediate, top_k={top_k}"
    )

    try:
        configs = _run_moe_autotuning(
            num_experts, hidden_dim, intermediate_dim, top_k, dtype, seq_len
        )

        # Cache the results
        _kernel_config_cache[cache_key] = configs
        _autotune_completed[cache_key] = True

        # Save to disk
        config_fwd, config_bwd_dx, config_bwd_dw = configs
        save_cached_config(
            cache_key,
            config_fwd,
            config_bwd_dx,
            config_bwd_dw,
            {
                "num_experts": num_experts,
                "hidden_dim": hidden_dim,
                "intermediate_dim": intermediate_dim,
            },
        )

        logger.info(f"MoE kernel auto-tuning completed: {cache_key}")
        return configs

    except Exception as e:
        logger.error(f"MoE kernel auto-tuning failed: {e}")
        if "AttributeError" in str(e) and "_experimental_make_tensor_descriptor" in str(
            e
        ):
            logger.warning(
                "Unsloth: Your Triton version might be incompatible with TMA features. Falling back to default configs."
            )
        logger.info("Falling back to default kernel configurations")
        return _get_default_configs()


def _run_moe_autotuning(
    num_experts: int,
    hidden_dim: int,
    intermediate_dim: int,
    top_k: int,
    dtype: torch.dtype,
    seq_len: int,
) -> Tuple[Any, Any, Any]:
    """Run the actual auto-tuning for MoE kernels."""

    # Create dummy inputs for tuning
    device = "cuda"
    # Use a fixed, safe number of tokens for autotuning to avoid OOMs and dependency on seq_len
    # 4096 is standard for finding good kernels without consuming 10GB+ VRAM
    # We ignore the passed seq_len for the actual allocation to satisfy user request
    num_tokens = 4096
    total_tokens = num_tokens * top_k

    # Create dummy tensors
    hidden_states = torch.randn(num_tokens, hidden_dim, device = device, dtype = dtype)

    # Create dummy weights
    gate_up_weights = torch.randn(
        num_experts, 2 * intermediate_dim, hidden_dim, device = device, dtype = dtype
    )
    down_weights = torch.randn(
        num_experts, hidden_dim, intermediate_dim, device = device, dtype = dtype
    )

    # Create dummy routing data
    m_sizes = torch.randint(
        1, total_tokens // num_experts + 1, (num_experts,), device = device
    )
    m_sizes = m_sizes * (total_tokens // m_sizes.sum().item())
    # Adjust to ensure exact total
    diff = total_tokens - m_sizes.sum().item()
    if diff != 0:
        m_sizes[0] += diff

    gather_indices = torch.arange(total_tokens, device = device)
    torch.randperm(total_tokens, out = gather_indices)

    # Autotune forward kernel - use the interface function with autotune=True
    # This properly invokes the kernel and lets triton handle the autotuning
    from .grouped_gemm.interface import (
        grouped_gemm_forward,
        grouped_gemm_dX,
        grouped_gemm_dW,
    )
    from .grouped_gemm.kernels.forward import _autotuned_grouped_gemm_forward_kernel
    from .grouped_gemm.kernels.backward import (
        _autotuned_grouped_gemm_dX_kernel,
        _autotuned_grouped_gemm_dW_kernel,
    )
    from .grouped_gemm.kernels.tuning import (
        KernelConfigForward,
        KernelConfigBackward_dX,
        KernelConfigBackward_dW,
    )

    logger.info("Autotuning forward kernel (first GEMM)...")
    # Run with autotune=True to trigger autotuning
    _ = grouped_gemm_forward(
        X = hidden_states,
        W = gate_up_weights,
        topk = top_k,
        m_sizes = m_sizes,
        gather_indices = gather_indices,
        permute_x = True,
        permute_y = False,
        autotune = True,
    )
    triton_config_fwd = _autotuned_grouped_gemm_forward_kernel.best_config

    # Convert triton.Config to KernelConfigForward
    config_fwd = KernelConfigForward(
        BLOCK_SIZE_M = triton_config_fwd.kwargs["BLOCK_SIZE_M"],
        BLOCK_SIZE_N = triton_config_fwd.kwargs["BLOCK_SIZE_N"],
        BLOCK_SIZE_K = triton_config_fwd.kwargs["BLOCK_SIZE_K"],
        num_warps = triton_config_fwd.num_warps,
        num_stages = triton_config_fwd.num_stages,
        use_tma_load_x = triton_config_fwd.kwargs.get("USE_TMA_LOAD_X", False),
        use_tma_load_w = triton_config_fwd.kwargs.get("USE_TMA_LOAD_W", False),
        use_tma_store = triton_config_fwd.kwargs.get("USE_TMA_STORE", False),
    )

    # Autotune backward dX kernel
    logger.info("Autotuning backward dX kernel...")
    dummy_grad = torch.randn(
        total_tokens, 2 * intermediate_dim, device = device, dtype = dtype
    )
    _ = grouped_gemm_dX(
        dY = dummy_grad,
        W = gate_up_weights,
        gather_indices = gather_indices,
        m_sizes = m_sizes,
        topk = top_k,
        permute_x = True,
        permute_y = False,
        autotune = True,
    )
    triton_config_bwd_dx = _autotuned_grouped_gemm_dX_kernel.best_config

    # Convert triton.Config to KernelConfigBackward_dX
    config_bwd_dx = KernelConfigBackward_dX(
        BLOCK_SIZE_M = triton_config_bwd_dx.kwargs["BLOCK_SIZE_M"],
        BLOCK_SIZE_N = triton_config_bwd_dx.kwargs["BLOCK_SIZE_N"],
        BLOCK_SIZE_K = triton_config_bwd_dx.kwargs["BLOCK_SIZE_K"],
        num_warps = triton_config_bwd_dx.num_warps,
        num_stages = triton_config_bwd_dx.num_stages,
        use_tma_load_dy = triton_config_bwd_dx.kwargs.get("USE_TMA_LOAD_dY", False),
        use_tma_load_w = triton_config_bwd_dx.kwargs.get("USE_TMA_LOAD_W", False),
        use_tma_store = triton_config_bwd_dx.kwargs.get("USE_TMA_STORE", False),
    )

    # Autotune backward dW kernel
    logger.info("Autotuning backward dW kernel...")
    _ = grouped_gemm_dW(
        X = hidden_states,
        dY = dummy_grad,
        m_sizes = m_sizes,
        gather_indices = gather_indices,
        topk = top_k,
        permute_x = True,
        permute_y = False,
        autotune = True,
    )
    triton_config_bwd_dw = _autotuned_grouped_gemm_dW_kernel.best_config

    # Convert triton.Config to KernelConfigBackward_dW
    config_bwd_dw = KernelConfigBackward_dW(
        BLOCK_SIZE_M = triton_config_bwd_dw.kwargs["BLOCK_SIZE_M"],
        BLOCK_SIZE_N = triton_config_bwd_dw.kwargs["BLOCK_SIZE_N"],
        BLOCK_SIZE_K = triton_config_bwd_dw.kwargs["BLOCK_SIZE_K"],
        num_warps = triton_config_bwd_dw.num_warps,
        num_stages = triton_config_bwd_dw.num_stages,
        use_tma_load_dy = triton_config_bwd_dw.kwargs.get("USE_TMA_LOAD_dY", False),
        use_tma_load_x = triton_config_bwd_dw.kwargs.get("USE_TMA_LOAD_X", False),
        use_tma_store = triton_config_bwd_dw.kwargs.get("USE_TMA_STORE", False),
    )

    return config_fwd, config_bwd_dx, config_bwd_dw

    return config_fwd, config_bwd_dx, config_bwd_dw


def _get_heuristic_configs() -> Tuple[Any, Any, Any]:
    """
    Get 'Safe Heuristic' kernel configurations.
    These are verified to be safe on A100 (SM80) and provide ~9x speedup on H100/B200.
    """
    from .grouped_gemm.kernels.tuning import (
        KernelConfigForward,
        KernelConfigBackward_dX,
        KernelConfigBackward_dW,
    )

    # Safe Forward Config: 64x128x128 (Fits A100 SMEM)
    config_fwd = KernelConfigForward(
        BLOCK_SIZE_M = 64,
        BLOCK_SIZE_N = 128,
        BLOCK_SIZE_K = 128,
        num_warps = 8,
        num_stages = 3,
        permute_x = True,
        permute_y = True,
        use_tma_load_x = False,
        use_tma_load_w = False,  # TMA loads might need alignment checks, safer to disable for heuristic
        use_tma_store = False,
    )

    # Safe Backward Configs: 64x64x256
    config_bwd_dx = KernelConfigBackward_dX(
        BLOCK_SIZE_M = 64,
        BLOCK_SIZE_N = 64,
        BLOCK_SIZE_K = 256,
        num_warps = 8,
        num_stages = 4,
        permute_x = True,
        permute_y = True,
        use_tma_load_dy = False,
        use_tma_load_w = False,
        use_tma_store = False,
    )

    config_bwd_dw = KernelConfigBackward_dW(
        BLOCK_SIZE_M = 64,
        BLOCK_SIZE_N = 64,
        BLOCK_SIZE_K = 256,
        num_warps = 8,
        num_stages = 4,
        permute_x = True,
        permute_y = True,
        use_tma_load_dy = False,
        use_tma_load_x = False,
        use_tma_store = False,
    )

    return config_fwd, config_bwd_dx, config_bwd_dw


def _get_default_configs() -> Tuple[Any, Any, Any]:
    """Get default kernel configurations as fallback."""
    from .grouped_gemm.kernels.tuning import (
        KernelConfigForward,
        KernelConfigBackward_dX,
        KernelConfigBackward_dW,
    )

    logger.warning("Using default MoE kernel configurations (not optimal)")

    config_fwd = KernelConfigForward(
        BLOCK_SIZE_M = 128,
        BLOCK_SIZE_N = 128,
        BLOCK_SIZE_K = 64,
        num_warps = 8,
        num_stages = 3,
        use_tma_load_x = False,
        use_tma_load_w = False,
        use_tma_store = False,
    )

    config_bwd_dx = KernelConfigBackward_dX(
        BLOCK_SIZE_M = 128,
        BLOCK_SIZE_N = 128,
        BLOCK_SIZE_K = 64,
        num_warps = 8,
        num_stages = 3,
        use_tma_load_dy = False,
        use_tma_load_w = False,
        use_tma_store = False,
    )

    config_bwd_dw = KernelConfigBackward_dW(
        BLOCK_SIZE_M = 128,
        BLOCK_SIZE_N = 128,
        BLOCK_SIZE_K = 64,
        num_warps = 8,
        num_stages = 3,
        use_tma_load_dy = False,
        use_tma_load_x = False,
        use_tma_store = False,
    )

    return config_fwd, config_bwd_dx, config_bwd_dw


def clear_cache() -> None:
    """Clear all cached kernel configurations."""
    global _kernel_config_cache, _autotune_completed
    _kernel_config_cache.clear()
    _autotune_completed.clear()
    logger.info("Cleared MoE kernel cache")


def is_autotuning_completed(cache_key: str) -> bool:
    """Check if autotuning has been completed for a given cache key."""
    return cache_key in _autotune_completed


================================================
FILE: unsloth/kernels/moe/benchmark/benchmark_fused_moe.py
================================================
import argparse
import time
from contextlib import nullcontext

import torch
from transformers import AutoConfig
from transformers.models.llama4 import Llama4TextConfig
from transformers.models.llama4.modeling_llama4 import Llama4TextMoe
from transformers.models.qwen3_moe import Qwen3MoeConfig
from transformers.models.qwen3_moe.modeling_qwen3_moe import Qwen3MoeSparseMoeBlock
from triton.testing import do_bench
from utils import (
    create_kernel_configs,
    get_autotuner,
    post_process_results,
    postprocess_autotune_results,
    save_results,
)

from grouped_gemm.kernels.autotuning import (
    DEFAULT_K_BLOCK_SIZES,
    DEFAULT_M_BLOCK_SIZES,
    DEFAULT_N_BLOCK_SIZES,
    DEFAULT_NUM_STAGES,
    DEFAULT_NUM_WARPS,
)
from grouped_gemm.kernels.tuning import (
    KernelConfigBackward_dW,
    KernelConfigBackward_dX,
    KernelConfigForward,
    KernelResult,
    TritonTuningContext,
)
from grouped_gemm.reference.layers.llama4_moe import Llama4TritonTextMoe
from grouped_gemm.reference.layers.qwen3_moe import Qwen3MoeFusedGroupedGEMMBlock

SEED = 42
LLAMA4_ID = "meta-llama/Llama-4-Scout-17B-16E"
QWEN3_MODEL_ID = "Qwen/Qwen3-30B-A3B"


def run_benchmark_forward(
    ref_model: torch.nn.Module,
    tt_model: torch.nn.Module,
    config: AutoConfig,
    seqlen: int,
    dtype: torch.dtype,
    autotune: bool,
    kernel_config_fwd: KernelConfigForward = None,
    bs: int = 1,
):
    torch.manual_seed(
        SEED
    )  # Should not be needed when running using pytest -- autouse fixture in conftest.py
    device = "cuda"
    hidden_size = config.hidden_size

    X = torch.randn(
        bs, seqlen, hidden_size, dtype = dtype, device = device, requires_grad = True
    )

    # Forward
    bench_forward_ref = lambda: ref_model(X)  # noqa: E731
    bench_forward_fused = lambda: tt_model(X)  # noqa: E731

    ref_forward_time = do_bench(bench_forward_ref)

    if not autotune:
        assert kernel_config_fwd is not None
        tuning_context = TritonTuningContext(kernel_config_fwd)
    else:
        tuning_context = nullcontext()

    with tuning_context:
        fused_forward_time = do_bench(bench_forward_fused)

    if (not autotune) and (not tuning_context.success):
        return 0, 1

    print(
        f"Forward: ref {ref_forward_time:.4f}, fused {fused_forward_time:.4f}, speedup {ref_forward_time / fused_forward_time:.1f}x"
    )
    return ref_forward_time, fused_forward_time


def run_benchmark_backward(
    ref_model: torch.nn.Module,
    tt_model: torch.nn.Module,
    config: AutoConfig,
    seqlen: int,
    dtype: torch.dtype,
    bs = 1,
):
    torch.manual_seed(
        SEED
    )  # Should not be needed when running using pytest -- autouse fixture in conftest.py
    device = "cuda"
    hidden_size = config.hidden_size

    X = torch.randn(
        bs, seqlen, hidden_size, dtype = dtype, device = device, requires_grad = True
    )
    X_test = X.detach().clone().requires_grad_(True)

    output, _ = ref_model(X)

    # Prevent autotuning forward pass
    from grouped_gemm.kernels.forward import _autotuned_grouped_gemm_forward_kernel

    _autotuned_grouped_gemm_forward_kernel.configs = (
        _autotuned_grouped_gemm_forward_kernel.configs[:20]
    )
    test_output, _ = tt_model(X_test)

    # Bench
    grad_output = torch.randn_like(output)
    bench_backward_ref = lambda: output.backward(grad_output, retain_graph = True)  # noqa: E731
    bench_backward_fused = lambda: test_output.backward(grad_output, retain_graph = True)  # noqa: E731

    ref_backward_time = do_bench(
        bench_backward_ref, grad_to_none = [X, *ref_model.parameters()]
    )
    fused_backward_time = do_bench(
        bench_backward_fused, grad_to_none = [X_test, *tt_model.parameters()]
    )
    print(
        f"Backward: ref {ref_backward_time:.4f}, fused {fused_backward_time:.4f}, speedup {ref_backward_time / fused_backward_time:.1f}x"
    )
    return ref_backward_time, fused_backward_time


def setup_model(
    config: Qwen3MoeConfig | Llama4TextConfig,
    dtype,
    permute_x,
    permute_y,
    autotune,
    kernel_config_fwd,
    kernel_config_bwd_dW,
    kernel_config_bwd_dX,
    dX_only = False,
    dW_only = False,
    overlap_router_shared = False,
    device = "cuda",
):
    if isinstance(config, Qwen3MoeConfig):
        ref_model = Qwen3MoeSparseMoeBlock(config).to(device, dtype)

        # Triton kernel grouped gemm version of MoE Block -- this is what we're testing
        tt_model = Qwen3MoeFusedGroupedGEMMBlock.from_hf(
            ref_model,
            permute_x = permute_x,
            permute_y = permute_y,
            autotune = autotune,
            kernel_config_fwd = kernel_config_fwd,
            kernel_config_bwd_dW = kernel_config_bwd_dW,
            kernel_config_bwd_dX = kernel_config_bwd_dX,
            dX_only = dX_only,
            dW_only = dW_only,
        ).to(device, dtype)

    elif isinstance(config, Llama4TextConfig):
        ref_model = Llama4TextMoe(config).to(device, dtype)
        tt_model = Llama4TritonTextMoe(
            config,
            overlap_router_shared = overlap_router_shared,
            permute_x = permute_x,
            permute_y = permute_y,
            autotune = autotune,
            kernel_config_fwd = kernel_config_fwd,
            kernel_config_bwd_dW = kernel_config_bwd_dW,
            kernel_config_bwd_dX = kernel_config_bwd_dX,
            dX_only = dX_only,
            dW_only = dW_only,
        ).to(device, dtype)

    else:
        raise ValueError(f"Unrecognized config {type(config).__name__}")

    return ref_model, tt_model


def run_benchmark(
    mode: str,
    model_config: Qwen3MoeConfig | Llama4TextConfig,
    seqlen: int,
    dtype: torch.dtype,
    permute_x: bool,
    permute_y: bool,
    autotune: bool,
    kernel_config_fwd: KernelConfigForward = None,
    kernel_config_bwd_dW: KernelConfigBackward_dW = None,
    kernel_config_bwd_dX: KernelConfigBackward_dX = None,
    overlap_router_shared: bool = False,
    results_dir: str = None,
):
    if autotune:
        autotuner = get_autotuner(mode)
    if mode == "dW":
        dW_only = True
    elif mode == "dX":
        dX_only = True
    else:
        dW_only = dX_only = False

    ref_model, tt_model = setup_model(
        model_config,
        dtype = dtype,
        permute_x = permute_x,
        permute_y = permute_y,
        autotune = autotune,
        kernel_config_fwd = kernel_config_fwd,
        kernel_config_bwd_dW = kernel_config_bwd_dW,
        kernel_config_bwd_dX = kernel_config_bwd_dX,
        dX_only = dX_only,
        dW_only = dW_only,
        overlap_router_shared = overlap_router_shared,
    )

    if mode == "forward":
        ref_time, fused_time = run_benchmark_forward(
            ref_model,
            tt_model,
            config = model_config,
            seqlen = seqlen,
            dtype = dtype,
            autotune = autotune,
            kernel_config_fwd = kernel_config_fwd,
        )
    else:
        ref_time, fused_time = run_benchmark_backward(
            ref_model, tt_model, config = model_config, seqlen = seqlen, dtype = dtype
        )

    if autotune:
        if mode == "backward":
            autotuner_dW, autotuner_dX = autotuner
            postprocess_autotune_results(
                autotuner_dW, "dW", ref_time, fused_time, results_dir
            )
            postprocess_autotune_results(
                autotuner_dX, "dX", ref_time, fused_time, results_dir
            )
        else:
            postprocess_autotune_results(
                autotuner, mode, ref_time, fused_time, results_dir
            )

    return ref_time, fused_time


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--results_dir", type = str, default = "benchmark_results")
    parser.add_argument("--model", type = str, choices = ["llama4", "qwen3"], required = True)
    parser.add_argument("--seqlen", type = int, default = 1024)
    parser.add_argument(
        "--dtype", type = str, choices = ["bfloat16", "float16"], default = "bfloat16"
    )
    parser.add_argument("--permute_x", action = "store_true")
    parser.add_argument("--permute_y", action = "store_true")
    parser.add_argument("--autotune", action = "store_true")
    parser.add_argument("--overlap_router_shared", action = "store_true")
    parser.add_argument(
        "--BLOCK_SIZE_M",
        nargs = 2,
        type = int,
        default = [DEFAULT_M_BLOCK_SIZES[0], DEFAULT_M_BLOCK_SIZES[-1]],
    )
    parser.add_argument(
        "--BLOCK_SIZE_N",
        nargs = 2,
        type = int,
        default = [DEFAULT_N_BLOCK_SIZES[0], DEFAULT_N_BLOCK_SIZES[-1]],
    )
    parser.add_argument(
        "--BLOCK_SIZE_K",
        nargs = 2,
        type = int,
        default = [DEFAULT_K_BLOCK_SIZES[0], DEFAULT_K_BLOCK_SIZES[-1]],
    )
    parser.add_argument(
        "--num_warps",
        nargs = 2,
        type = int,
        default = [DEFAULT_NUM_WARPS[0], DEFAULT_NUM_WARPS[-1]],
    )
    parser.add_argument(
        "--num_stages",
        nargs = 2,
        type = int,
        default = [DEFAULT_NUM_STAGES[0], DEFAULT_NUM_STAGES[-1]],
    )
    parser.add_argument(
        "--use_tma_load_w", action = "store_true"
    )  # No need to specify, will automatically parametrize these for each kernel config
    parser.add_argument(
        "--use_tma_load_x", action = "store_true"
    )  # No need to specify, will automatically parametrize these for each kernel config
    parser.add_argument(
        "--use_tma_load_dy", action = "store_true"
    )  # No need to specify, will automatically parametrize these for each kernel config
    parser.add_argument(
        "--mode",
        type = str,
        choices = ["forward", "backward", "dW", "dX"],
        default = "forward",
    )
    args = parser.parse_args()
    args.dtype = getattr(torch, args.dtype)

    model_id = QWEN3_MODEL_ID if args.model == "qwen3" else LLAMA4_ID
    model_config = AutoConfig.from_pretrained(model_id)
    model_config = model_config.text_config if args.model == "llama4" else model_config

    mode = args.mode

    if args.autotune:
        # logging.basicConfig(level=logging.INFO)
        print(
            f"Benchmarking {model_id} {mode}: seqlen={args.seqlen}, dtype={args.dtype}, permute_x={args.permute_x}, permute_y={args.permute_y}, autotune"
        )
        start_time = time.time()
        ref_time, fused_time = run_benchmark(
            args.mode,
            model_config,
            seqlen = args.seqlen,
            dtype = args.dtype,
            permute_x = args.permute_x,
            permute_y = args.permute_y,
            autotune = args.autotune,
            overlap_router_shared = args.overlap_router_shared,
            results_dir = args.results_dir,
        )
        end_time = time.time()
        print(f"Total time: {end_time - start_time:.4f} seconds")

    # NOTE: better to use autotuner for now, since the MoE block needs 2 different kernel configs for forward (2 grouped gemms, gate_up_proj and down_proj)
    # and the backward pass needs 4 different kernel configs (2 grouped gemms each for dW and dX)
    # The benchmark only supports 1 kernel config at a time so the same config will be used for both grouped gemms, which is suboptimal.
    else:
        assert False, "Use autotune for now"
        kernel_configs = create_kernel_configs(args, args.permute_x, args.permute_y)
        print(f"Running {len(kernel_configs)} kernel configs")
        default_kernel_config_fwd = KernelConfigForward(
            permute_x = args.permute_x, permute_y = args.permute_y
        )
        default_kernel_config_bwd_dW = KernelConfigBackward_dW(
            permute_x = args.permute_x, permute_y = args.permute_y
        )
        default_kernel_config_bwd_dX = KernelConfigBackward_dX(
            permute_x = args.permute_x, permute_y = args.permute_y
        )
        results = []
        for kernel_config in kernel_configs:
            if args.mode == "forward":
                kernel_config_fwd = kernel_config
                kernel_config_bwd_dW = default_kernel_config_bwd_dW
                kernel_config_bwd_dX = default_kernel_config_bwd_dX
            elif args.mode == "dW":
                kernel_config_fwd = default_kernel_config_fwd
                kernel_config_bwd_dW = kernel_config
                kernel_config_bwd_dX = default_kernel_config_bwd_dX
            elif args.mode == "dX":
                kernel_config_fwd = default_kernel_config_fwd
                kernel_config_bwd_dW = default_kernel_config_bwd_dW
                kernel_config_bwd_dX = kernel_config
            else:
                raise ValueError(f"Invalid mode: {args.mode}")
            print(
                f"Benchmarking {model_id} {args.mode} with seqlen={args.seqlen}, dtype={args.dtype}, permute_x={args.permute_x}, permute_y={args.permute_y}, kernel_config_fwd={kernel_config_fwd}, kernel_config_bwd_dW={kernel_config_bwd_dW}, kernel_config_bwd_dX={kernel_config_bwd_dX}"
            )

            ref_time, fused_time = run_benchmark(
                args.mode,
                model_config,
                seqlen = args.seqlen,
                dtype = args.dtype,
                permute_x = kernel_config.permute_x,
                permute_y = kernel_config.permute_y,
                autotune = False,
                kernel_config_fwd = kernel_config_fwd,
                kernel_config_bwd_dW = kernel_config_bwd_dW,
                kernel_config_bwd_dX = kernel_config_bwd_dX,
            )
            results.append(
                KernelResult(
                    torch_time = ref_time,
                    triton_time = fused_time,
                    speedup = ref_time / fused_time,
                    kernel_config = kernel_config,
                )
            )
        df = post_process_results(
            results, args.mode, args.seqlen, args.dtype, args.autotune
        )
        save_results(
            df, args.results_dir, args.mode, args.seqlen, args.dtype, args.autotune
        )


================================================
FILE: unsloth/kernels/moe/benchmark/utils.py
================================================
import argparse
import datetime
import json
import logging
import math
import os
from itertools import product

import pandas as pd
import torch

from grouped_gemm.kernels.tuning import (
    KernelConfigBackward_dW,
    KernelConfigBackward_dX,
    KernelConfigForward,
    KernelResult,
)

SEED = 42


def create_merged_results(
    df: pd.DataFrame, mode: str, seqlen: int, dtype: torch.dtype, autotune: bool
):
    kernel_result_cols = df.columns.to_list()
    test_config_dict = {
        "mode": mode,
        "seqlen": seqlen,
        "dtype": dtype,
        "autotune": autotune,
    }
    test_config_cols = list(test_config_dict.keys())
    for col in test_config_cols:
        df[col] = test_config_dict[col]
    # Reorder columns so that test config cols are first
    df = df[test_config_cols + kernel_result_cols]
    return df


def post_process_results(
    results: list[KernelResult],
    mode: str,
    seqlen: int,
    dtype: torch.dtype,
    autotune: bool,
):
    df = KernelResult.to_dataframe(results, sort_by = "speedup")
    df = create_merged_results(df, mode, seqlen, dtype, autotune)
    return df


def save_results(
    df: pd.DataFrame,
    results_dir: str,
    mode: str,
    seqlen: int,
    dtype: torch.dtype,
    autotune: bool,
):
    dt = datetime.datetime.now().strftime("%Y%m%d_%H%M")
    save_dir = f"{results_dir}/{mode}"
    save_path = f"{save_dir}/{dt}_{seqlen}_{str(dtype).split('.')[-1]}.csv"
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    print(f"Saving results to {save_path}")
    df.to_csv(save_path, index = False)


def create_kernel_configs(args: argparse.Namespace, permute_x: bool, permute_y: bool):
    block_m_range = power_of_two_range(args.BLOCK_SIZE_M[0], args.BLOCK_SIZE_M[1])
    block_n_range = power_of_two_range(args.BLOCK_SIZE_N[0], args.BLOCK_SIZE_N[1])
    block_k_range = power_of_two_range(args.BLOCK_SIZE_K[0], args.BLOCK_SIZE_K[1])
    num_warps_range = multiples_of_range(args.num_warps[0], args.num_warps[1], step = 2)
    num_stages_range = multiples_of_range(
        args.num_stages[0], args.num_stages[1], step = 1
    )

    mode = args.mode
    kernel_configs = []
    for (
        block_m,
        block_n,
        block_k,
        num_warps,
        num_stages,
        tma_load_a,
        tma_load_b,
    ) in product(
        block_m_range,
        block_n_range,
        block_k_range,
        num_warps_range,
        num_stages_range,
        [True, False],
        [True, False],
    ):
        if mode == "forward":
            kernel_config = KernelConfigForward(
                BLOCK_SIZE_M = block_m,
                BLOCK_SIZE_N = block_n,
                BLOCK_SIZE_K = block_k,
                num_warps = num_warps,
                num_stages = num_stages,
                use_tma_load_w = tma_load_a,
                use_tma_load_x = tma_load_b,
                permute_x = permute_x,
                permute_y = permute_y,
            )
        elif mode == "dW":
            kernel_config = KernelConfigBackward_dW(
                BLOCK_SIZE_M = block_m,
                BLOCK_SIZE_N = block_n,
                BLOCK_SIZE_K = block_k,
                num_warps = num_warps,
                num_stages = num_stages,
                use_tma_load_dy = tma_load_a,
                use_tma_load_x = tma_load_b,
                permute_x = permute_x,
                permute_y = permute_y,
            )
        elif mode == "dX":
            kernel_config = KernelConfigBackward_dX(
                BLOCK_SIZE_M = block_m,
                BLOCK_SIZE_N = block_n,
                BLOCK_SIZE_K = block_k,
                num_warps = num_warps,
                num_stages = num_stages,
                use_tma_load_dy = tma_load_a,
                use_tma_load_w = tma_load_b,
                permute_x = permute_x,
                permute_y = permute_y,
            )
        else:
            raise ValueError(f"Invalid mode: {mode}")
        kernel_configs.append(kernel_config)

    logging.info(f"Pruning {len(kernel_configs)} kernel configs")

    pruned_configs = []
    for config in kernel_configs:
        if mode == "forward":
            if permute_x and config.use_tma_load_x:
                continue
        elif mode == "dW":
            if permute_x and config.use_tma_load_x:
                continue
            if permute_y and config.use_tma_load_dy:
                continue
        elif mode == "dX":
            if permute_y and config.use_tma_load_dy:
                continue
        pruned_configs.append(config)
    logging.info(f"After pruning, {len(pruned_configs)} kernel configs")

    return pruned_configs


def power_of_two_range(start, end):
    start = math.log2(start)
    end = math.log2(end)
    return [2**i for i in range(int(start), int(end) + 1)]


def multiples_of_range(start, end, step = 1):
    return list(range(start, end + step, step))


def map_key_to_args(key, mode):
    pass


def save_autotune_results(autotune_cache, mode, ref_time, fused_time, results_dir):
    device_name = torch.cuda.get_device_name().replace(" ", "_")
    dt = datetime.datetime.now().strftime("%Y%m%d_%H%M")
    save_dir = f"{results_dir}/{mode}/autotune/{dt}/{device_name}"
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    for key, config in autotune_cache.items():
        key = [
            str(k) if not "torch" in str(k) else str(k.split("torch.")[-1]) for k in key
        ]
        filename = "_".join(key)
        save_path = f"{save_dir}/{filename}.json"
        print(f"Saving autotune results to {save_path}")
        with open(save_path, "w") as f:
            result = {
                **config.all_kwargs(),
                "ref_time": ref_time,
                "fused_time": fused_time,
            }
            json.dump(result, f)


def get_autotuner(mode):
    if mode == "forward":
        from grouped_gemm.kernels.forward import _autotuned_grouped_gemm_forward_kernel

        return _autotuned_grouped_gemm_forward_kernel
    elif mode == "dW":
        from grouped_gemm.kernels.backward import _autotuned_grouped_gemm_dW_kernel

        return _autotuned_grouped_gemm_dW_kernel
    elif mode == "dX":
        from grouped_gemm.kernels.backward import _autotuned_grouped_gemm_dX_kernel

        return _autotuned_grouped_gemm_dX_kernel
    elif mode == "backward":
        from grouped_gemm.kernels.backward import (
            _autotuned_grouped_gemm_dW_kernel,
            _autotuned_grouped_gemm_dX_kernel,
        )

        return _autotuned_grouped_gemm_dW_kernel, _autotuned_grouped_gemm_dX_kernel
    else:
        raise ValueError(f"Invalid mode: {mode}")


def postprocess_autotune_results(autotuner, mode, ref_time, fused_time, results_dir):
    for key, value in autotuner.cache.items():
        print(f"{mode} {key}: {value.all_kwargs()}")
    save_autotune_results(
        autotuner.cache,
        mode = mode,
        ref_time = ref_time,
        fused_time = fused_time,
        results_dir = results_dir,
    )


================================================
FILE: unsloth/kernels/moe/grouped_gemm/LICENSE
================================================
                    GNU AFFERO GENERAL PUBLIC LICENSE
                       Version 3, 19 November 2007

 Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.

                            Preamble

  The GNU Affero General Public License is a free, copyleft license for
software and other kinds of works, specifically designed to ensure
cooperation with the community in the case of network server software.

  The licenses for most software and other practical works are designed
to take away your freedom to share and change the works.  By contrast,
our General Public Licenses are intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users.

  When we speak of free software, we are referring to freedom, not
price.  Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.

  Developers that use our General Public Licenses protect your rights
with two steps: (1) assert copyright on the software, and (2) offer
you this License which gives you legal permission to copy, distribute
and/or modify the software.

  A secondary benefit of defending all users' freedom is that
improvements made in alternate versions of the program, if they
receive widespread use, become available for other developers to
incorporate.  Many developers of free software are heartened and
encouraged by the resulting cooperation.  However, in the case of
software used on network servers, this result may fail to come about.
The GNU General Public License permits making a modified version and
letting the public access it on a server without ever releasing its
source code to the public.

  The GNU Affero General Public License is designed specifically to
ensure that, in such cases, the modified source code becomes available
to the community.  It requires the operator of a network server to
provide the source code of the modified version running there to the
users of that server.  Therefore, public use of a modified version, on
a publicly accessible server, gives the public access to the source
code of the modified version.

  An older license, called the Affero General Public License and
published by Affero, was designed to accomplish similar goals.  This is
a different license, not a version of the Affero GPL, but Affero has
released a new version of the Affero GPL which permits relicensing under
this license.

  The precise terms and conditions for copying, distribution and
modification follow.

                       TERMS AND CONDITIONS

  0. Definitions.

  "This License" refers to version 3 of the GNU Affero General Public License.

  "Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.

  "The Program" refers to any copyrightable work licensed under this
License.  Each licensee is addressed as "you".  "Licensees" and
"recipients" may be individuals or organizations.

  To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy.  The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.

  A "covered work" means either the unmodified Program or a work based
on the Program.

  To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy.  Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.

  To "convey" a work means any kind of propagation that enables other
parties to make or receive copies.  Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.

  An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License.  If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.

  1. Source Code.

  The "source code" for a work means the preferred form of the work
for making modifications to it.  "Object code" means any non-source
form of a work.

  A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.

  The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form.  A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.

  The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities.  However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work.  For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.

  The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.

  The Corresponding Source for a work in source code form is that
same work.

  2. Basic Permissions.

  All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met.  This License explicitly affirms your unlimited
permission to run the unmodified Program.  The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work.  This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.

  You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force.  You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright.  Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.

  Conveying under any other circumstances is permitted solely under
the conditions stated below.  Sublicensing is not allowed; section 10
makes it unnecessary.

  3. Protecting Users' Legal Rights From Anti-Circumvention Law.

  No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.

  When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.

  4. Conveying Verbatim Copies.

  You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.

  You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.

  5. Conveying Modified Source Versions.

  You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:

    a) The work must carry prominent notices stating that you modified
    it, and giving a relevant date.

    b) The work must carry prominent notices stating that it is
    released under this License and any conditions added under section
    7.  This requirement modifies the requirement in section 4 to
    "keep intact all notices".

    c) You must license the entire work, as a whole, under this
    License to anyone who comes into possession of a copy.  This
    License will therefore apply, along with any applicable section 7
    additional terms, to the whole of the work, and all its parts,
    regardless of how they are packaged.  This License gives no
    permission to license the work in any other way, but it does not
    invalidate such permission if you have separately received it.

    d) If the work has interactive user interfaces, each must display
    Appropriate Legal Notices; however, if the Program has interactive
    interfaces that do not display Appropriate Legal Notices, your
    work need not make them do so.

  A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit.  Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.

  6. Conveying Non-Source Forms.

  You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:

    a) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by the
    Corresponding Source fixed on a durable physical medium
    customarily used for software interchange.

    b) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by a
    written offer, valid for at least three years and valid for as
    long as you offer spare parts or customer support for that product
    model, to give anyone who possesses the object code either (1) a
    copy of the Corresponding Source for all the software in the
    product that is covered by this License, on a durable physical
    medium customarily used for software interchange, for a price no
    more than your reasonable cost of physically performing this
    conveying of source, or (2) access to copy the
    Corresponding Source from a network server at no charge.

    c) Convey individual copies of the object code with a copy of the
    written offer to provide the Corresponding Source.  This
    alternative is allowed only occasionally and noncommercially, and
    only if you received the object code with such an offer, in accord
    with subsection 6b.

    d) Convey the object code by offering access from a designated
    place (gratis or for a charge), and offer equivalent access to the
    Corresponding Source in the same way through the same place at no
    further charge.  You need not require recipients to copy the
    Corresponding Source along with the object code.  If the place to
    copy the object code is a network server, the Corresponding Source
    may be on a different server (operated by you or a third party)
    that supports equivalent copying facilities, provided you maintain
    clear directions next to the object code saying where to find the
    Corresponding Source.  Regardless of what server hosts the
    Corresponding Source, you remain obligated to ensure that it is
    available for as long as needed to satisfy these requirements.

    e) Convey the object code using peer-to-peer transmission, provided
    you inform other peers where the object code and Corresponding
    Source of the work are being offered to the general public at no
    charge under subsection 6d.

  A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.

  A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling.  In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage.  For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product.  A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.

  "Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source.  The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.

  If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information.  But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).

  The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed.  Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.

  Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.

  7. Additional Terms.

  "Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law.  If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.

  When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it.  (Additional permissions may be written to require their own
removal in certain cases when you modify the work.)  You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.

  Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:

    a) Disclaiming warranty or limiting liability differently from the
    terms of sections 15 and 16 of this License; or

    b) Requiring preservation of specified reasonable legal notices or
    author attributions in that material or in the Appropriate Legal
    Notices displayed by works containing it; or

    c) Prohibiting misrepresentation of the origin of that material, or
    requiring that modified versions of such material be marked in
    reasonable ways as different from the original version; or

    d) Limiting the use for publicity purposes of names of licensors or
    authors of the material; or

    e) Declining to grant rights under trademark law for use of some
    trade names, trademarks, or service marks; or

    f) Requiring indemnification of licensors and authors of that
    material by anyone who conveys the material (or modified versions of
    it) with contractual assumptions of liability to the recipient, for
    any liability that these contractual assumptions directly impose on
    those licensors and authors.

  All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10.  If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term.  If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.

  If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.

  Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.

  8. Termination.

  You may not propagate or modify a covered work except as expressly
provided under this License.  Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).

  However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.

  Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.

  Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License.  If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.

  9. Acceptance Not Required for Having Copies.

  You are not required to accept this License in order to receive or
run a copy of the Program.  Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance.  However,
nothing other than this License grants you permission to propagate or
modify any covered work.  These actions infringe copyright if you do
not accept this License.  Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.

  10. Automatic Licensing of Downstream Recipients.

  Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License.  You are not responsible
for enforcing compliance by third parties with this License.

  An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations.  If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.

  You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License.  For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.

  11. Patents.

  A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based.  The
work thus licensed is called the contributor's "contributor version".

  A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version.  For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.

  Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.

  In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement).  To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.

  If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients.  "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.

  If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.

  A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License.  You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.

  Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.

  12. No Surrender of Others' Freedom.

  If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License.  If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all.  For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.

  13. Remote Network Interaction; Use with the GNU General Public License.

  Notwithstanding any other provision of this License, if you modify the
Program, your modified version must prominently offer all users
interacting with it remotely through a computer network (if your version
supports such interaction) an opportunity to receive the Corresponding
Source of your version by providing access to the Corresponding Source
from a network server at no charge, through some standard or customary
means of facilitating copying of software.  This Corresponding Source
shall include the Corresponding Source for any work covered by version 3
of the GNU General Public License that is incorporated pursuant to the
following paragraph.

  Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU General Public License into a single
combined work, and to convey the resulting work.  The terms of this
License will continue to apply to the part which is the covered work,
but the work with which it is combined will remain governed by version
3 of the GNU General Public License.

  14. Revised Versions of this License.

  The Free Software Foundation may publish revised and/or new versions of
the GNU Affero General Public License from time to time.  Such new versions
will be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.

  Each version is given a distinguishing version number.  If the
Program specifies that a certain numbered version of the GNU Affero General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation.  If the Program does not specify a version number of the
GNU Affero General Public License, you may choose any version ever published
by the Free Software Foundation.

  If the Program specifies that a proxy can decide which future
versions of the GNU Affero General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.

  Later license versions may give you additional or different
permissions.  However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.

  15. Disclaimer of Warranty.

  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.

  16. Limitation of Liability.

  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.

  17. Interpretation of Sections 15 and 16.

  If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.

                     END OF TERMS AND CONDITIONS

            How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.

  To do so, attach the following notices to the program.  It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.

    <one line to give the program's name and a brief idea of what it does.>
    Copyright (C) <year>  <name of author>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published
    by the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.

Also add information on how to contact you by electronic and paper mail.

  If your software can interact with users remotely through a computer
network, you should also make sure that it provides a way for users to
get its source.  For example, if your program is a web application, its
interface could display a "Source" link that leads users to an archive
of the code.  There are many ways you could offer source, and different
solutions will be better for different programs; see section 13 for the
specific requirements.

  You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU AGPL, see
<https://www.gnu.org/licenses/>.

================================================
FILE: unsloth/kernels/moe/grouped_gemm/__init__.py
================================================


================================================
FILE: unsloth/kernels/moe/grouped_gemm/interface.py
================================================
# SPDX-License-Identifier: GNU Affero General Public License v3.0
# Copyright 2023-present the Unsloth team. All rights reserved.

import logging
import warnings
from dataclasses import asdict
from unsloth import DEVICE_TYPE

import torch
import triton

from .kernels.backward import (
    _autotuned_grouped_gemm_dW_kernel,
    _autotuned_grouped_gemm_dX_kernel,
    _grouped_gemm_dW_kernel,
    _grouped_gemm_dX_kernel,
)
from .kernels.forward import (
    _autotuned_grouped_gemm_forward_kernel,
    _grouped_gemm_forward_kernel,
)
from .kernels.tuning import (
    KernelConfigBackward_dW,
    KernelConfigBackward_dX,
    KernelConfigForward,
)

logger = logging.getLogger(__name__)
# Set formatter to include timestamp, pathname and lineno
formatter = logging.Formatter(
    "%(asctime)s::%(levelname)s,%(pathname)s:%(lineno)d:: %(message)s"
)

# Add console handler
ch = logging.StreamHandler()
ch.setFormatter(formatter)
logger.addHandler(ch)


# Precompute TMA support to avoid graph breaks
# TMA requires both:
# 1. NVIDIA GPU with capability >= 9 (Hopper+)
# 2. Triton version with TMA API (make_tensor_descriptor or _experimental_make_tensor_descriptor)
def _check_tma_support():
    if DEVICE_TYPE in ("xpu", "hip"):
        return False
    import triton.language as tl

    gpu_supports_tma = torch.cuda.get_device_capability()[0] >= 9
    # Check for both old experimental and new stable API names
    triton_has_tma_api = hasattr(tl, "make_tensor_descriptor") or hasattr(
        tl, "_experimental_make_tensor_descriptor"
    )
    return gpu_supports_tma and triton_has_tma_api


_SUPPORTS_TMA = _check_tma_support()

# Check if triton.set_allocator is available (Triton 3.0+)
_HAS_SET_ALLOCATOR = hasattr(triton, "set_allocator")


def supports_tma():
    return _SUPPORTS_TMA


# Helper to support allow_in_graph
try:
    from torch.compiler import allow_in_graph
except ImportError:
    from torch._dynamo import allow_in_graph


# Helper to detect if we're in tracing/compilation mode
def _is_tracing(*tensors):
    """
    Check if tensors are fake tensors used during torch.compile tracing.
    During tracing, tensors are FakeTensor/FunctionalTensor and we can't run Triton kernels.
    During execution, tensors are real Tensors and we MUST run the kernels.

    NOTE: We do NOT use torch.compiler.is_compiling() because it returns True
    during both tracing AND execution. We only want to skip kernels during tracing
    when tensors are actually fake.
    """
    for t in tensors:
        name = type(t).__name__
        if name in ("FakeTensor", "FunctionalTensor", "FunctionalTensorWrapper"):
            return True
    return False


_per_device_alloc_fns = {}


def get_per_device_per_stream_alloc_fn(device):
    if device not in _per_device_alloc_fns:
        _per_stream_tensors = {}

        def alloc_fn(size: int, alignment: int, stream):
            assert alignment == 128
            if (
                stream not in _per_stream_tensors
                or _per_stream_tensors[stream].numel() < size
            ):
                _per_stream_tensors[stream] = torch.empty(
                    size, device = device, dtype = torch.int8
                )
                _per_stream_tensors[stream].__hibernate__ = {"type": "ignore"}
            return _per_stream_tensors[stream]

        _per_device_alloc_fns[device] = alloc_fn
    return _per_device_alloc_fns[device]


def log_kernel_info(
    compiled_kernel: triton.compiler.CompiledKernel, best_config: triton.Config = None
):
    kernel_name = compiled_kernel.name
    nregs = compiled_kernel.n_regs
    nspills = compiled_kernel.n_spills
    metadata = compiled_kernel.metadata
    logger.debug(
        f"{kernel_name}: n_regs={nregs} n_spills={nspills} metadata={metadata}"
    )
    if best_config is not None:
        logger.debug(f"{kernel_name} autotuned best_config: {best_config}")


@allow_in_graph
def grouped_gemm_forward(
    X: torch.Tensor,
    W: torch.Tensor,
    topk: int,
    m_sizes: torch.Tensor,
    gather_indices: torch.Tensor = None,
    topk_weights: torch.Tensor = None,
    # Fusions
    permute_x: bool = False,
    permute_y: bool = False,
    fuse_mul_post: bool = False,
    # Autotuning - manual kernel params will be ignored if autotune is True
    autotune: bool = False,
    # Kernel tuning params if not autotuning -- NOTE: these params need to be tuned, otherwise performance will be poor
    BLOCK_SIZE_M: int = 32,
    BLOCK_SIZE_N: int = 32,
    BLOCK_SIZE_K: int = 32,
    num_warps: int = 4,
    num_stages: int = 2,
    use_tma_load_w: bool = False,
    use_tma_load_x: bool = False,
    use_tma_store: bool = False,
    # software pipelining -- set to True for now, won't impact until loop is re-written
    flatten: bool = True,
    # debugging
    debug: bool = False,
) -> torch.Tensor:
    """
    Grouped GEMM forward pass for MoE MLPs.

    The implementation offers a number of fusions specific to MoE:
    - `permute_x`: fuse the permutation of hidden states from token order (original order) to grouped expert order, typically only needed for the first grouped GEMM in an MoE MLP.
        - When `permute_x` is True, `X` is expected to be of shape (num_tokens, K).
        - When `permute_x` is False, `X` is expected to be of shape (total_tokens, K) where `total_tokens = num_tokens * topk` AND already permuted to grouped expert order, i.e., hidden states are sorted such that tokens assigned to each expert are contiguous.
    - `permute_y`: fused the permutation of the output from expert grouped order back to original token order, typically only needed for the second grouped GEMM in an MoE MLP.
    - `fuse_mul_pre`: fuse the multiplication of the routed input with topk_weights, only done in the first grouped GEMM in an MoE MLP as for Llama4.  Do not use, since results in performance regression as it interrupts the GEMM mainloop.
    - `fuse_mul_post`: fuse the multiplication of the routed output with topk_weights, used only when `permute_y` is True. NOTE: this should only be used when using this kernel for inference, not for training.

    X: (M, K) hidden states where M is the num_tokens if `permute_x` is True, otherwise `total_tokens` where `total_tokens = num_tokens * topk`.
    W: (E, N, K) expert weights, where E is number of experts, N in the intermediate (output) dim, and K is the reduction dim
    m_sizes: tokens assigned to each expert which correspond to the size of M in the respective GEMMs in the grouped GEMM.
    gather_indices: (total_tokens,) indices of tokens assigned to each expert.  E.g., slicing gather_indices by cumsum of m_sizes gives the indices of tokens assigned to each expert.
    topk_weights: (total_tokens,) weights to multiply routed output by in expert MLP calculation, used only when `fuse_mul_post` is True (see note on `fuse_mul_post`).
    use_fast_accum: currently unused; trade off faster accumulation dtype in GEMM for less precision.
    use_tma_load_x: use TMA for loading activations, incompatible with permute_x.  TODO: add TMA gather / scatter support for Blackwell+.
    use_tma_load_w: use TMA for loading weights.  If TMA supported, this should always be enabled as it is faster than global memory load.
    use_tma_store: use TMA for storing output, incompatible with permute_y.  TODO: add TMA scatter support for Blackwell+.

    Returns:
        y: (total_tokens, N) output of grouped GEMM
    """

    assert X.device.type == "cuda", "X and W must be on CUDA"
    assert m_sizes.device.type == "cuda", "m_sizes must be on CUDA"

    X = X.contiguous()
    W = W.contiguous()
    m_sizes = m_sizes.contiguous()

    # Preconditions
    assert not (permute_x and permute_y), "Cannot permute both X and Y"
    assert not (permute_y and use_tma_store), "Cannot use both TMA store and permute_y"

    if use_tma_load_x:
        # TMA load for activations, TMA gather only supported on Blackwell+
        assert not permute_x, "Cannot use both use_tma_load_x and permute_x"

    use_tma = use_tma_load_w or use_tma_load_x or use_tma_store
    if not supports_tma() and use_tma:
        warnings.warn("TMA not supported, tma_load will be set to False")
        use_tma_load_w = False
        use_tma_load_x = False
        use_tma_store = False

    if use_tma or autotune:
        # Respect global persistent allocator if set
        if _HAS_SET_ALLOCATOR and not getattr(triton, "_unsloth_allocator_set", False):

            def alloc_fn(size: int, alignment: int, stream: int):
                return torch.empty(size, device = "cuda", dtype = torch.int8)

            triton.set_allocator(alloc_fn)

    if W.ndim == 3:
        num_experts = W.shape[0]
        N = W.shape[1]
        # K = W.shape[2]
    else:
        num_experts = m_sizes.shape[0]
        N = W.shape[0] // num_experts

    X = X.view(-1, X.shape[-1])
    W = W.view(-1, W.shape[-1])

    if permute_x or permute_y:
        assert (
            gather_indices is not None
        ), "gather_indices must be provided when permute_x or permute_y is True"
        assert gather_indices.is_contiguous()
        assert gather_indices.device.type == "cuda"
        assert gather_indices.ndim == 1
        total_tokens = gather_indices.shape[0]
        num_tokens = total_tokens // topk
        if permute_x:
            assert (
                X.shape[0] == num_tokens
            ), f"X.shape[0] ({X.shape[0]}) must match num_tokens ({num_tokens})"
        else:
            assert (
                X.shape[0] == total_tokens
            ), f"X.shape[0] ({X.shape[0]}) must match total_tokens ({total_tokens})"
    else:
        total_tokens = X.shape[0]
        num_tokens = total_tokens // topk

    _, K = X.shape
    assert K == W.shape[1], f"K ({K}) must match W.shape[1] ({W.shape[1]})"

    if fuse_mul_post:
        global _FUSED_MUL_WARN
        if not _FUSED_MUL_WARN:
            warnings.warn(
                "fused_mul should only be used for inference, not for training"
            )
            _FUSED_MUL_WARN = True
        assert permute_y, "FUSE_MUL requires PERMUTE_Y"
        assert topk_weights is not None
        assert topk_weights.numel() == total_tokens
        assert topk_weights.device.type == "cuda"
        assert topk_weights.is_contiguous()
        topk_weights = topk_weights.view(-1)
        if debug:
            print(
                f"DEBUG::GROUPED_GEMM {topk_weights.tolist()} {gather_indices.tolist()}"
            )

    y = torch.empty((total_tokens, N), device = X.device, dtype = X.dtype)
    # if total_tokens == 0 or N == 0:
    #     return y

    NUM_SMS = torch.cuda.get_device_properties("cuda").multi_processor_count

    def grid(META):
        return (NUM_SMS,)

    if not autotune:
        # BLOCK_SIZE_K = min(K, BLOCK_SIZE_K)
        # BLOCK_SIZE_N = min(N, BLOCK_SIZE_N)
        pass

    if debug:
        print(
            f"DEBUG::GROUPED_GEMM {num_tokens = } {topk = } {num_experts = } {N = } {K = } {BLOCK_SIZE_M = } {BLOCK_SIZE_N = } {BLOCK_SIZE_K = } {permute_x = }"
        )
        print(
            f"DEBUG::GROUPED_GEMM {m_sizes.tolist()} {(gather_indices // topk).tolist()}"
        )

    kernel_args = {
        # Inputs
        "x_ptr": X,
        "w_ptr": W,
        "m_sizes_ptr": m_sizes,
        "gather_indices_ptr": gather_indices,
        "topk_weights_ptr": topk_weights,
        # Output
        "y_ptr": y,
        # Problem shapes
        "NUM_TOKENS": num_tokens,
        "NUM_EXPERTS": num_experts,
        "TOPK": topk,
        "N": N,
        "K": K,
        "NUM_SMS": NUM_SMS,
        # Gather / Scatter
        "PERMUTE_X": permute_x,
        "PERMUTE_Y": permute_y,
        # TopK weight merging
        "FUSE_MUL_POST": fuse_mul_post,
        # Loop pipelining
        "FLATTEN": flatten,
    }
    if not autotune:
        kernel_args.update(
            {
                "USE_TMA_LOAD_W": use_tma_load_w,
                "USE_TMA_LOAD_X": use_tma_load_x,
                "USE_TMA_STORE": use_tma_store,
                "BLOCK_SIZE_M": BLOCK_SIZE_M,
                "BLOCK_SIZE_N": BLOCK_SIZE_N,
                "BLOCK_SIZE_K": BLOCK_SIZE_K,
                "num_warps": num_warps,
                "num_stages": num_stages,
            }
        )

    kernel = (
        _autotuned_grouped_gemm_forward_kernel
        if autotune
        else _grouped_gemm_forward_kernel
    )

    is_fake = _is_tracing(X, W)
    if not is_fake:
        compiled_kernel: triton.compiler.CompiledKernel = kernel[grid](**kernel_args)
        if autotune:
            log_kernel_info(compiled_kernel, kernel.best_config)
        else:
            log_kernel_info(compiled_kernel)

    return y


@allow_in_graph
def grouped_gemm_dX(
    dY: torch.Tensor,
    W: torch.Tensor,
    gather_indices: torch.Tensor,
    m_sizes: torch.Tensor,
    topk: int,
    BLOCK_SIZE_M: int = 32,
    BLOCK_SIZE_N: int = 32,
    BLOCK_SIZE_K: int = 32,
    debug: bool = False,
    permute_x: bool = False,
    permute_y: bool = False,
    use_tma_load_w: bool = False,
    use_tma_load_dy: bool = False,
    use_tma_store: bool = False,
    num_warps: int = 4,
    num_stages: int = 2,
    flatten: bool = True,
    fuse_mul_pre: bool = False,
    fuse_mul_post: bool = False,
    autotune: bool = False,
) -> torch.Tensor:
    """
    dX backward kernel
    grad_output: (M, N)
    gather_indices: (total_tokens,), indices of tokens assigned to each expert.  E.g., slicing gather_indices by cumsum of m_sizes gives the indices of tokens assigned to each expert.
    m_sizes: tokens assigned to each expert which correspond to the size of M in the respective GEMMs in the grouped GEMM.
    topk: number of experts chosen per token.
    `permute_x`: whether X was permuted on load in the forward pass, typically only used for the first grouped GEMM in an MoE MLP to group tokens by expert.
    - In the forward pass, if we permuted X on load, we need to permute store in the backward pass
    - Shapes
        - the forward pass input X shape is [NUM_TOKENS, K], reduce across K, output y is [NUM_TOKENS * TOPK, K]
        - the backward pass input dy shape is [NUM_TOKENS * TOPK, N], reduce across N, output dX is [NUM_TOKENS * TOPK, K]
    - Note that in the backward pass, the output size is still [NUM_TOKENS * TOPK, K] since we still need to accumulate gradients for each expert chosen by the token in a post-processing step.
    `permute_y`: whether the output was permuted on store in the forward pass, typically only used for the second grouped GEMM in an MoE MLP to restore to the original token order.
    - In the forward pass, if we permuted output on store (e.g., in the second grouped GEMM in fused MoE MLP), we need to permute on load to get from token order to expert grouped order
    - We still store in contiguous order since we are writing out dX which will be the input to the backwards pass of the first grouped GEMM
    `fuse_mul_{pre,post}`: always set to False since this should only be used for inference.
    use_tma_load_dy: use TMA for loading dy. use_tma_load_dy is incompatible with permute_y.  TODO: add TMA gather / scatter support for Blackwell+ which will enable permute_y and use_tma_load_dy.
    use_tma_load_w: use TMA for loading weights.  If TMA supported, this should always be enabled as it is faster than global memory load.
    use_tma_store: use TMA for storing dX.  Incompatible with permute_x.  TODO: add TMA gather / scatter support for Blackwell+ which will enable permute_x and use_tma_store.
    """
    assert (
        not fuse_mul_pre
    ), "fuse_mul_pre should only be used for inference, not for training"
    assert (
        not fuse_mul_post
    ), "fuse_mul_post should only be used for inference, not for training"
    assert dY.is_contiguous()
    assert W.is_contiguous()
    assert m_sizes.is_contiguous()
    assert m_sizes.ndim == 1

    # Preconditions
    assert not (permute_x and permute_y), "Cannot permute both X and Y"
    # Note that this is flipped from the forward pass
    # If we permuted y in the forward, we need to permute on load in the backward
    assert not (permute_y and use_tma_load_dy), "Cannot use both TMA load and permute_y"
    assert not (permute_x and use_tma_store), "Cannot use both TMA store and permute_x"

    use_tma = use_tma_load_dy or use_tma_load_w or use_tma_store
    if not supports_tma() and use_tma:
        warnings.warn("TMA not supported, tma_load will be set to False")
        use_tma_load_w = False
        use_tma_load_dy = False
        use_tma_store = False

    if use_tma or autotune:
        # Respect global persistent allocator if set
        if _HAS_SET_ALLOCATOR and not getattr(triton, "_unsloth_allocator_set", False):

            def alloc_fn(size: int, alignment: int, stream: int):
                # print(f"DEBUG::GROUPED_GEMM alloc_fn {size=} {alignment=} {stream=}")
                return torch.empty(size, device = "cuda", dtype = torch.int8)

            triton.set_allocator(alloc_fn)

    if W.ndim == 3:
        num_experts = W.shape[0]
        N = W.shape[1]
    else:
        num_experts = m_sizes.shape[0]
        N = W.shape[0] // num_experts

    dY = dY.view(-1, dY.shape[-1])
    W = W.view(-1, W.shape[-1])

    M_total, N_grad = dY.shape
    N_total, K = W.shape
    # N = N_total // num_experts
    assert N_grad == N, f"Grad_output N ({N_grad}) must match weight N ({N})"

    assert (
        M_total % topk == 0
    ), f"M_total ({M_total}) must be divisible by topk ({topk})"
    num_tokens = M_total // topk

    total_tokens = gather_indices.shape[0]
    assert (
        total_tokens == M_total
    ), f"Total tokens ({total_tokens}) must match M_total ({M_total})"

    # Note that the output shape is [NUM_TOKENS * TOPK, K] even when `permute_x` is True since we need to accumulate gradients across all experts chosen by the token.
    # This will be done in a post-processing step reduction step.
    output_shape = (total_tokens, K)
    dX = torch.zeros(output_shape, device = dY.device, dtype = dY.dtype)

    NUM_SMS = torch.cuda.get_device_properties(
        "cuda"
    ).multi_processor_count  # if not debug else 1

    def grid(META):
        return (NUM_SMS,)

    if not autotune:
        # BLOCK_SIZE_N = min(N_grad, BLOCK_SIZE_N)
        # BLOCK_SIZE_K = min(K, BLOCK_SIZE_K)
        pass

    if debug:
        print(
            f"DEBUG::GROUPED_GEMM {num_tokens = } {topk = } {output_shape = } {num_experts = } {N = } {K = } {BLOCK_SIZE_M = } {BLOCK_SIZE_N = } {BLOCK_SIZE_K = } {NUM_SMS = }"
        )
        print(f"DEBUG::GROUPED_GEMM {m_sizes.tolist()}")

    kernel_args = {
        # Inputs
        "dY_ptr": dY,
        "w_ptr": W,
        "gather_indices_ptr": gather_indices,
        "m_sizes_ptr": m_sizes,
        # Output
        "dX_ptr": dX,
        # Problem sizes
        "NUM_EXPERTS": num_experts,
        "NUM_TOKENS": num_tokens,
        "TOPK": topk,
        "N": N,
        "K": K,
        "NUM_SMS": NUM_SMS,
        # Gather / Scatter
        "PERMUTE_X": permute_x,
        "PERMUTE_Y": permute_y,
        "FLATTEN": flatten,
    }
    if not autotune:
        kernel_args.update(
            {
                "BLOCK_SIZE_M": BLOCK_SIZE_M,
                "BLOCK_SIZE_N": BLOCK_SIZE_N,
                "BLOCK_SIZE_K": BLOCK_SIZE_K,
                "num_warps": num_warps,
                "num_stages": num_stages,
                "USE_TMA_LOAD_dY": use_tma_load_dy,
                "USE_TMA_LOAD_W": use_tma_load_w,
                "USE_TMA_STORE": use_tma_store,
            }
        )
    kernel = _autotuned_grouped_gemm_dX_kernel if autotune else _grouped_gemm_dX_kernel

    is_fake = _is_tracing(dY, W)
    if not is_fake:
        compiled_kernel: triton.compiler.CompiledKernel = kernel[grid](**kernel_args)

        if autotune:
            log_kernel_info(compiled_kernel, kernel.best_config)
        else:
            log_kernel_info(compiled_kernel)
    return dX


@allow_in_graph
def grouped_gemm_dW(
    X: torch.Tensor,
    dY: torch.Tensor,
    m_sizes: torch.Tensor,
    gather_indices: torch.Tensor,
    topk: int,
    BLOCK_SIZE_M: int = 32,
    BLOCK_SIZE_N: int = 32,
    BLOCK_SIZE_K: int = 32,
    permute_x: bool = False,
    permute_y: bool = False,
    use_tma_load_dy: bool = False,
    use_tma_load_x: bool = False,
    use_tma_store: bool = False,
    fuse_mul_pre: bool = False,
    fuse_mul_post: bool = False,
    num_warps: int = 4,
    num_stages: int = 2,
    flatten: bool = True,
    autotune: bool = False,
    debug: bool = False,
) -> torch.Tensor:
    """
    X: (M, K) hidden states where M is the num_tokens if `permute_x` is True, otherwise `total_tokens` where `total_tokens = num_tokens * topk`.
    dY: (M, N)
    topk: number of experts to choose per token.
    m_sizes: tokens assigned to each expert which correspond to the size of M in the respective GEMMs in the grouped GEMM.
    gather_indices: (total_tokens,) indices of tokens assigned to each expert.  E.g., slicing gather_indices by cumsum of m_sizes gives the indices of tokens assigned to each expert.
    permute_x: whether X was permuted on load in the forward pass, typically only used for the first grouped GEMM in an MoE MLP to group tokens by expert.
    - for the first grouped GEMM, we permuted on load -> X was [num_tokens, K] and stored y in expert grouped order [num_tokens * topk, K]
    - in the backwards pass, we need to permute on load of X while loading dy in contiguous (expert grouped) order
    - since we are writing out dW, there is no need to permute on store
    permute_y: whether the output was permuted on store in the forward pass, typically only used for the second grouped GEMM in an MoE MLP to restore to the original token order.
    - for the second grouped GEMM, we permuted on store -> y was permuted from expert grouped order to token order while X was loaded in expert grouped order since it was the output of the first grouped GEMM
    - in the backwards pass, we need to permute on load of dy to get from token order to expert grouped order to match the order of X
    - since we are writing out dW, there is no need to permute on store
    use_tma_load_dy: use TMA for loading dy. use_tma_load_dy is incompatible with permute_y.  TODO: add TMA gather / scatter support for Blackwell+ which will enable permute_y and use_tma_load_dy.
    use_tma_load_x: use TMA for loading x. use_tma_load_x is incompatible with permute_x.  TODO: add TMA gather / scatter support for Blackwell+ which will enable permute_x and use_tma_load_x.
    use_tma_store: use TMA for storing dW.  If TMA supported, this should always be enabled as it is faster than global memory store.
    """
    assert not fuse_mul_pre, "fuse_mul_pre not supported"
    assert not fuse_mul_post, "fuse_mul_post not supported"
    NUM_SMS = (
        torch.cuda.get_device_properties("cuda").multi_processor_count
        if not debug
        else 1
    )
    X = X.view(-1, X.shape[-1]).contiguous()
    dY = dY.contiguous()
    m_sizes = m_sizes.contiguous()

    # Preconditions
    assert not (permute_x and permute_y), "Cannot permute both X and Y"
    assert not (permute_y and use_tma_load_dy), "Cannot use both TMA load and permute_y"
    assert not (permute_x and use_tma_load_x), "Cannot use both TMA load and permute_x"

    use_tma = use_tma_load_dy or use_tma_load_x or use_tma_store
    if not supports_tma() and use_tma:
        warnings.warn("TMA not supported, tma_load will be set to False")
        use_tma_load_x = False
        use_tma_load_dy = False
        use_tma_store = False

    if use_tma or autotune:
        # Respect global persistent allocator if set
        if _HAS_SET_ALLOCATOR and not getattr(triton, "_unsloth_allocator_set", False):

            def alloc_fn(size: int, alignment: int, stream: int):
                return torch.empty(size, device = "cuda", dtype = torch.int8)

            triton.set_allocator(alloc_fn)

    if permute_x or permute_y:
        assert gather_indices is not None
        assert gather_indices.is_contiguous()
        assert gather_indices.device.type == "cuda"
        assert gather_indices.ndim == 1
        total_tokens = gather_indices.shape[0]
        num_tokens = total_tokens // topk
        if permute_x:
            assert X.shape[0] == num_tokens
        else:
            assert X.shape[0] == total_tokens
    else:
        total_tokens = X.shape[0]
        num_tokens = total_tokens // topk

    num_experts = m_sizes.shape[0]
    # Get dimensions
    _, K = X.shape
    M_grad, N = dY.shape

    assert M_grad == total_tokens, f"dY M ({M_grad}) != total_tokens ({total_tokens})"

    dW = torch.zeros((num_experts, N, K), device = X.device, dtype = X.dtype)

    if not autotune:
        # BLOCK_SIZE_N = min(N, BLOCK_SIZE_N)
        # BLOCK_SIZE_K = min(K, BLOCK_SIZE_K)
        pass

    def grid(META):
        return (NUM_SMS,)

    if debug:
        print(
            f"DEBUG::GROUPED_GEMM_DW_TMA {num_experts = } {N = } {K = } {BLOCK_SIZE_M = } {BLOCK_SIZE_N = } {BLOCK_SIZE_K = } {NUM_SMS = }"
        )

        print(f"DEBUG::GROUPED_GEMM_DW_TMA {m_sizes.tolist() = }")
        print(f"DEBUG::GROUPED_GEMM_DW_TMA {gather_indices.tolist() = }")
        m_start = 0
        for i in range(num_experts):
            expert_token_idx = gather_indices[m_start : m_start + m_sizes[i]]
            t_start = 0
            while t_start < m_sizes[i]:
                token_idx = expert_token_idx[t_start : t_start + BLOCK_SIZE_M]
                if permute_x:
                    token_idx = token_idx // topk
                print(
                    f"DEBUG::GROUPED_GEMM_DW_TMA Token expert {i} indices: {token_idx.tolist()}"
                )
                t_start += BLOCK_SIZE_M

            m_start += m_sizes[i]

    kernel_args = {
        # Inputs
        "x_ptr": X,
        "dY_ptr": dY,
        "m_sizes_ptr": m_sizes,
        "gather_indices_ptr": gather_indices,
        # Output
        "dW_ptr": dW,
        # Problem sizes
        "NUM_TOKENS": num_tokens,
        "TOPK": topk,
        "NUM_EXPERTS": num_experts,
        "N": N,
        "K": K,
        "NUM_SMS": NUM_SMS,
        # Gather / Scatter
        "PERMUTE_X": permute_x,
        "PERMUTE_Y": permute_y,
        # Loop pipelining
        "FLATTEN": flatten,
    }

    if not autotune:
        kernel_args.update(
            {
                "BLOCK_SIZE_M": BLOCK_SIZE_M,
                "BLOCK_SIZE_N": BLOCK_SIZE_N,
                "BLOCK_SIZE_K": BLOCK_SIZE_K,
                "USE_TMA_LOAD_dY": use_tma_load_dy,
                "USE_TMA_LOAD_X": use_tma_load_x,
                "USE_TMA_STORE": use_tma_store,
                "num_warps": num_warps,
                "num_stages": num_stages,
            }
        )

    kernel = _autotuned_grouped_gemm_dW_kernel if autotune else _grouped_gemm_dW_kernel

    is_fake = _is_tracing(X, dY)
    if not is_fake:
        compiled_kernel: triton.compiler.CompiledKernel = kernel[grid](**kernel_args)

        if autotune:
            log_kernel_info(compiled_kernel, kernel.best_config)
        else:
            log_kernel_info(compiled_kernel)

    return dW


class GroupedGemm(torch.autograd.Function):
    @staticmethod
    def forward(
        ctx,
        X,
        W,
        m_sizes,
        topk,
        gather_indices,
        permute_x,
        permute_y,
        topk_weights,
        fuse_mul_post,
        kernel_config_fwd,
        kernel_config_bwd_dX,
        kernel_config_bwd_dW,
        autotune,
        dX_only,
        dW_only,
    ):
        ctx.topk = topk
        ctx.permute_x = permute_x
        ctx.permute_y = permute_y
        ctx.fuse_mul_post = fuse_mul_post
        ctx.kernel_config_fwd = kernel_config_fwd
        ctx.kernel_config_bwd_dX = kernel_config_bwd_dX
        ctx.kernel_config_bwd_dW = kernel_config_bwd_dW
        ctx.autotune = autotune
        ctx.dX_only = dX_only
        ctx.dW_only = dW_only

        # NOTE: we don't save topk_weights for backward since we do not support training with fused_mul
        ctx.save_for_backward(X, W, m_sizes, gather_indices)

        fwd_config = {}
        if kernel_config_fwd is not None:
            fwd_config["BLOCK_SIZE_M"] = kernel_config_fwd.BLOCK_SIZE_M
            fwd_config["BLOCK_SIZE_N"] = kernel_config_fwd.BLOCK_SIZE_N
            fwd_config["BLOCK_SIZE_K"] = kernel_config_fwd.BLOCK_SIZE_K
            fwd_config["num_warps"] = kernel_config_fwd.num_warps
            fwd_config["num_stages"] = kernel_config_fwd.num_stages
            fwd_config["use_tma_load_x"] = kernel_config_fwd.use_tma_load_x
            fwd_config["use_tma_load_w"] = kernel_config_fwd.use_tma_load_w
            fwd_config["use_tma_store"] = kernel_config_fwd.use_tma_store

        return grouped_gemm_forward(
            X = X,
            W = W,
            topk = topk,
            m_sizes = m_sizes,
            gather_indices = gather_indices,
            topk_weights = topk_weights,
            permute_x = permute_x,
            permute_y = permute_y,
            fuse_mul_post = fuse_mul_post,
            # Autotune -- this will override the manual kernel config if true
            autotune = autotune,
            # Manual kernel config
            **fwd_config,
        )

    @staticmethod
    def backward(ctx, dY):
        dY = dY.contiguous()
        X, W, m_sizes, gather_indices = ctx.saved_tensors
        topk = ctx.topk
        permute_x = ctx.permute_x
        permute_y = ctx.permute_y
        fuse_mul_post = ctx.fuse_mul_post
        kernel_config_bwd_dX = ctx.kernel_config_bwd_dX
        kernel_config_bwd_dW = ctx.kernel_config_bwd_dW
        autotune = ctx.autotune
        dX_only = ctx.dX_only
        dW_only = ctx.dW_only

        if not autotune:
            if not dW_only:
                assert (
                    kernel_config_bwd_dX is not None
                ), "kernel_config_bwd_dX must be provided if autotune is False"
            if not dX_only:
                assert (
                    kernel_config_bwd_dW is not None
                ), "kernel_config_bwd_dW must be provided if autotune is False"

        assert (
            not fuse_mul_post
        ), "fused_mul should only be used for inference, not for training"

        if not dX_only:
            bwd_dW_config = {}

            if kernel_config_bwd_dW is not None:
                bwd_dW_config["use_tma_load_dy"] = kernel_config_bwd_dW.use_tma_load_dy
                bwd_dW_config["use_tma_load_x"] = kernel_config_bwd_dW.use_tma_load_x
                bwd_dW_config["use_tma_store"] = kernel_config_bwd_dW.use_tma_store
                bwd_dW_config["BLOCK_SIZE_M"] = kernel_config_bwd_dW.BLOCK_SIZE_M
                bwd_dW_config["BLOCK_SIZE_N"] = kernel_config_bwd_dW.BLOCK_SIZE_N
                bwd_dW_config["BLOCK_SIZE_K"] = kernel_config_bwd_dW.BLOCK_SIZE_K
                bwd_dW_config["num_warps"] = kernel_config_bwd_dW.num_warps
                bwd_dW_config["num_stages"] = kernel_config_bwd_dW.num_stages

            dW = grouped_gemm_dW(
                X = X,
                dY = dY,
                m_sizes = m_sizes,
                gather_indices = gather_indices,
                topk = topk,
                permute_x = permute_x,
                permute_y = permute_y,
                # Autotune -- this will override the manual kernel config if true
                autotune = autotune,
                # Manual kernel config
                **bwd_dW_config,
            )
        else:
            dW = None

        if not dW_only:
            bwd_dX_config = {}
            if kernel_config_bwd_dX is not None:
                bwd_dX_config["use_tma_load_dy"] = kernel_config_bwd_dX.use_tma_load_dy
                bwd_dX_config["use_tma_load_w"] = kernel_config_bwd_dX.use_tma_load_w
                bwd_dX_config["use_tma_store"] = kernel_config_bwd_dX.use_tma_store
                bwd_dX_config["BLOCK_SIZE_M"] = kernel_config_bwd_dX.BLOCK_SIZE_M
                bwd_dX_config["BLOCK_SIZE_N"] = kernel_config_bwd_dX.BLOCK_SIZE_N
                bwd_dX_config["BLOCK_SIZE_K"] = kernel_config_bwd_dX.BLOCK_SIZE_K
                bwd_dX_config["num_warps"] = kernel_config_bwd_dX.num_warps
                bwd_dX_config["num_stages"] = kernel_config_bwd_dX.num_stages

            dX = grouped_gemm_dX(
                dY = dY,
                W = W,
                m_sizes = m_sizes,
                gather_indices = gather_indices,
                topk = topk,
                permute_x = permute_x,
                permute_y = permute_y,
                # Autotune -- this will override the manual kernel config if true
                autotune = autotune,
                # Manual kernel config
                **bwd_dX_config,
            )

            if topk > 1 and permute_x:
                dX = dX.view(X.shape[0], topk, -1).sum(dim = 1)
        else:
            dX = None

        return (
            dX,
            dW,
            None,  # m_sizes
            None,  # gather_indices
            None,  # topk
            None,  # permute_x
            None,  # permute_y
            None,  # topk_weights
            None,  # fuse_mul_post
            None,  # kernel_config_fwd
            None,  # kernel_config_bwd_dX
            None,  # kernel_config_bwd_dW
            None,  # autotune
            None,  # dX_only
            None,  # dW_only
        )


def check_valid_config_fwd(
    permute_x,
    permute_y,
    use_tma_load_x,
    use_tma_load_w,
    use_tma_store,
    fuse_mul_post,
    is_first_gemm,
):
    """
    Check if the configuration is valid for the forward pass.
    """
    is_second_gemm = not is_first_gemm

    assert not (permute_x and permute_y), "Cannot permute both X and Y"
    assert not (
        is_second_gemm and permute_x
    ), "Cannot permute X for the second grouped GEMM"
    assert not (
        is_first_gemm and permute_y
    ), "Cannot permute Y for the first grouped GEMM"
    assert not (
        fuse_mul_post and is_first_gemm
    ), "Cannot fuse mul for the first grouped GEMM"
    assert not (
        use_tma_load_x and permute_x
    ), "Cannot use TMA load and permute X unless on sm100+ (Blackwell+)"
    assert not (
        use_tma_store and permute_y and is_second_gemm
    ), "Cannot use TMA store and permute Y for the second grouped GEMM unless on sm100+ (Blackwell+)"


def check_valid_config_bwd_dW(
    permute_x,
    permute_y,
    use_tma_load_dY,
    use_tma_load_x,
    use_tma_store,
    fuse_mul_post,
    is_first_gemm,
):
    """
    Check if the configuration is valid for the backward pass of dW.
    """
    is_second_gemm = not is_first_gemm
    if fuse_mul_post:
        assert False, "Cannot fuse_mul is not supported for backward pass"
    if is_second_gemm and permute_y and use_tma_load_dY:
        assert False, "Cannot use TMA load and permute Y for the second grouped GEMM"
    if is_first_gemm and permute_x and use_tma_load_x:
        assert False, "Cannot use TMA load and permute X for the first grouped GEMM"


def check_valid_config_bwd_dX(
    permute_x,
    permute_y,
    use_tma_load_dY,
    use_tma_load_w,
    use_tma_store,
    fuse_mul_post,
    is_first_gemm,
):
    """
    Check if the configuration is valid for the backward pass of dW.
    """
    is_second_gemm = not is_first_gemm
    if fuse_mul_post:
        assert False, "Cannot fuse_mul is not supported for backward pass"
    if is_second_gemm and permute_y and use_tma_load_dY:
        assert False, "Cannot use TMA load and permute Y for the second grouped GEMM"
    if use_tma_store and permute_x and is_first_gemm:
        assert False, "Cannot use TMA store and permute X for the first grouped GEMM"


def grouped_gemm(
    X: torch.Tensor,
    W: torch.Tensor,
    m_sizes: torch.Tensor,
    topk: int,
    gather_indices: torch.Tensor = None,
    permute_x: bool = False,
    permute_y: bool = False,
    topk_weights = None,
    fuse_mul_post = False,
    kernel_config_fwd: KernelConfigForward = None,
    kernel_config_bwd_dX: KernelConfigBackward_dX = None,
    kernel_config_bwd_dW: KernelConfigBackward_dW = None,
    autotune: bool = False,
    is_first_gemm: bool = True,
    # Only for debugging
    dX_only: bool = False,
    dW_only: bool = False,
):
    """
    Grouped GEMM for MoE MLPs.

    The implementation offers a number of fusions specific to MoE:
    - `permute_x`: fuse the permutation of hidden states from token order (original order) to grouped expert order, typically only needed for the first grouped GEMM in an MoE MLP.
        - When `permute_x` is True, `X` is expected to be of shape (num_tokens, K).
        - When `permute_x` is False, `X` is expected to be of shape (total_tokens, K) where `total_tokens = num_tokens * topk` AND already permuted to grouped expert order, i.e., hidden states are sorted such that tokens assigned to each expert are contiguous.
    - `permute_y`: fused the permutation of the output from expert grouped order back to original token order, typically only needed for the second grouped GEMM in an MoE MLP.
    - `fuse_mul`: fuse the multiplication of the routed output with topk_weights, used only when `permute_y` is True. NOTE: this should only be used when using this kernel for inference, not for training.

    X: (M, K) hidden states where M is the num_tokens if `permute_x` is True, otherwise `total_tokens` where `total_tokens = num_tokens * topk`.
    W: (E, N, K) expert weights, where E is number of experts, N in the intermediate (output) dim, and K is the reduction dim
    m_sizes: tokens assigned to each expert which correspond to the size of M in the respective GEMMs in the grouped GEMM.
    gather_indices: (total_tokens,) indices of tokens assigned to each expert.  E.g., slicing gather_indices by cumsum of m_sizes gives the indices of tokens assigned to each expert. Needed when either `permute_x` or `permute_y` is True.
    topk_weights: (total_tokens,) weights to multiply routed output by in expert MLP calculation, used only when `fuse_mul` is True (see note on `fuse_mul`).
    kernel_config_fwd: KernelConfigForward for forward pass.
    kernel_config_bwd_dX: KernelConfigBackward_dX for backward pass of dX.
    kernel_config_bwd_dW: KernelConfigBackward_dW for backward pass of dW.
    autotune: whether to autotune the kernel, if yes, kernel_config_fwd, kernel_config_bwd_dX, and kernel_config_bwd_dW will be ignored.
    is_first_gemm: whether this is the first grouped GEMM in an MoE MLP.  This is needed to check whether kernel configs are valid.  `permute_x` should only be used for first gemm; `permute_y` should only be used for second gemm.
    This will impact whether TMA can be used for loading and storing.

    """
    if not autotune:
        assert (
            kernel_config_fwd is not None
        ), "kernel_config_fwd must be provided if autotune is False"

        check_valid_config_fwd(
            permute_x,
            permute_y,
            use_tma_load_x = kernel_config_fwd.use_tma_load_x,
            use_tma_load_w = kernel_config_fwd.use_tma_load_w,
            use_tma_store = kernel_config_fwd.use_tma_store,
            fuse_mul_post = fuse_mul_post,
            is_first_gemm = is_first_gemm,
        )
        if kernel_config_bwd_dW is not None and not dX_only:
            check_valid_config_bwd_dW(
                permute_x,
                permute_y,
                use_tma_load_dY = kernel_config_bwd_dW.use_tma_load_dy,
                use_tma_load_x = kernel_config_bwd_dW.use_tma_load_x,
                use_tma_store = kernel_config_bwd_dW.use_tma_store,
                fuse_mul_post = fuse_mul_post,
                is_first_gemm = is_first_gemm,
            )
        if kernel_config_bwd_dX is not None and not dW_only:
            check_valid_config_bwd_dX(
                permute_x,
                permute_y,
                use_tma_load_dY = kernel_config_bwd_dX.use_tma_load_dy,
                use_tma_load_w = kernel_config_bwd_dX.use_tma_load_w,
                use_tma_store = kernel_config_bwd_dX.use_tma_store,
                fuse_mul_post = fuse_mul_post,
                is_first_gemm = is_first_gemm,
            )

    if permute_x or permute_y:
        assert (
            gather_indices is not None
        ), "gather_indices is required when either permute_x or permute_y is True"

    if fuse_mul_post:
        assert (
            topk_weights is not None
        ), "topk_weights is required when fuse_mul_post is True"

    X = X.view(-1, X.shape[-1])
    m_sizes = m_sizes.view(-1)
    gather_indices = gather_indices.view(-1)

    return GroupedGemm.apply(
        X,
        W,
        m_sizes,
        topk,
        gather_indices,
        permute_x,
        permute_y,
        topk_weights,
        fuse_mul_post,
        kernel_config_fwd,
        kernel_config_bwd_dX,
        kernel_config_bwd_dW,
        autotune,
        dX_only,
        dW_only,
    )


================================================
FILE: unsloth/kernels/moe/grouped_gemm/kernels/__init__.py
================================================


================================================
FILE: unsloth/kernels/moe/grouped_gemm/kernels/autotuning.py
================================================
# Unsloth
# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""
Autotuning utils
"""

import logging
from itertools import product
from typing import List

import torch
import triton

logger = logging.getLogger(__name__)

DEFAULT_M_BLOCK_SIZES = [64, 128]
DEFAULT_N_BLOCK_SIZES = [64, 128, 256]
DEFAULT_K_BLOCK_SIZES = [64, 128, 256]
DEFAULT_NUM_CTAS = 1
DEFAULT_NUM_WARPS = [4, 8]
DEFAULT_NUM_STAGES = [3, 4, 5]
BOOLS = [True, False]


def val_to_list(val):
    if val is None:
        return None
    elif isinstance(val, list):
        return val
    else:
        return [val]


def convert_args_to_list(args):
    return [val_to_list(arg) for arg in args]


def _triton_supports_tma():
    """Check if current Triton version supports TMA API."""
    import triton.language as tl

    # Check for both old experimental and new stable API names
    return hasattr(tl, "make_tensor_descriptor") or hasattr(
        tl, "_experimental_make_tensor_descriptor"
    )


# Precompute at module import
# NOTE: TMA is disabled for now due to compatibility issues with permute_x/permute_y settings
# in the MoE grouped GEMM forward/backward passes. Re-enable once these are resolved.
_TRITON_HAS_TMA = False  # _triton_supports_tma()


def get_forward_configs(
    BLOCK_M = DEFAULT_M_BLOCK_SIZES,
    BLOCK_N = DEFAULT_N_BLOCK_SIZES,
    BLOCK_K = DEFAULT_K_BLOCK_SIZES,
    TMA_LOAD_X = None,  # Auto-detect if not specified
    TMA_LOAD_W = None,  # Auto-detect if not specified
    TMA_STORE = False,  # NOTE: TMA_STORE is disabled for now
    num_warps = DEFAULT_NUM_WARPS,
    num_stages = DEFAULT_NUM_STAGES,
    num_ctas = DEFAULT_NUM_CTAS,
):
    # Auto-detect TMA support
    if TMA_LOAD_X is None:
        TMA_LOAD_X = _TRITON_HAS_TMA
    if TMA_LOAD_W is None:
        TMA_LOAD_W = _TRITON_HAS_TMA

    (
        BLOCK_M,
        BLOCK_N,
        BLOCK_K,
        TMA_LOAD_X,
        TMA_LOAD_W,
        TMA_STORE,
        num_warps,
        num_stages,
        num_ctas,
    ) = convert_args_to_list(
        [
            BLOCK_M,
            BLOCK_N,
            BLOCK_K,
            TMA_LOAD_X,
            TMA_LOAD_W,
            TMA_STORE,
            num_warps,
            num_stages,
            num_ctas,
        ]
    )
    kernel_configs = []
    for (
        block_m,
        block_n,
        block_k,
        w,
        s,
        tma_load_x,
        tma_load_w,
        tma_store,
        num_ctas,
    ) in product(
        BLOCK_M,
        BLOCK_N,
        BLOCK_K,
        num_warps,
        num_stages,
        TMA_LOAD_X,
        TMA_LOAD_W,
        TMA_STORE,
        num_ctas,
    ):
        kernel_configs.append(
            triton.Config(
                dict(
                    BLOCK_SIZE_M = block_m,
                    BLOCK_SIZE_N = block_n,
                    BLOCK_SIZE_K = block_k,
                    USE_TMA_LOAD_X = tma_load_x,
                    USE_TMA_LOAD_W = tma_load_w,
                    USE_TMA_STORE = tma_store,
                ),
                num_warps = w,
                num_stages = s,
                num_ctas = num_ctas,
            )
        )

    return kernel_configs


def get_dX_kernel_configs(
    BLOCK_M = DEFAULT_M_BLOCK_SIZES,
    BLOCK_N = DEFAULT_N_BLOCK_SIZES,
    BLOCK_K = DEFAULT_K_BLOCK_SIZES,
    TMA_LOAD_dY = None,  # Auto-detect if not specified
    TMA_LOAD_W = None,  # Auto-detect if not specified
    TMA_STORE = False,  # NOTE: TMA_STORE is disabled for now
    num_warps = DEFAULT_NUM_WARPS,
    num_stages = DEFAULT_NUM_STAGES,
    num_ctas = DEFAULT_NUM_CTAS,
):
    # Auto-detect TMA support
    if TMA_LOAD_dY is None:
        TMA_LOAD_dY = _TRITON_HAS_TMA
    if TMA_LOAD_W is None:
        TMA_LOAD_W = _TRITON_HAS_TMA
    (
        BLOCK_M,
        BLOCK_N,
        BLOCK_K,
        TMA_LOAD_dY,
        TMA_LOAD_W,
        TMA_STORE,
        num_warps,
        num_stages,
        num_ctas,
    ) = convert_args_to_list(
        [
            BLOCK_M,
            BLOCK_N,
            BLOCK_K,
            TMA_LOAD_dY,
            TMA_LOAD_W,
            TMA_STORE,
            num_warps,
            num_stages,
            num_ctas,
        ]
    )
    kernel_configs = []
    for (
        block_m,
        block_n,
        block_k,
        w,
        s,
        tma_load_dy,
        tma_load_w,
        tma_store,
        num_ctas,
    ) in product(
        BLOCK_M,
        BLOCK_N,
        BLOCK_K,
        num_warps,
        num_stages,
        TMA_LOAD_dY,
        TMA_LOAD_W,
        TMA_STORE,
        num_ctas,
    ):
        kernel_configs.append(
            triton.Config(
                dict(
                    BLOCK_SIZE_M = block_m,
                    BLOCK_SIZE_N = block_n,
                    BLOCK_SIZE_K = block_k,
                    USE_TMA_LOAD_dY = tma_load_dy,
                    USE_TMA_LOAD_W = tma_load_w,
                    USE_TMA_STORE = tma_store,
                ),
                num_warps = w,
                num_stages = s,
                num_ctas = num_ctas,
            )
        )

    return kernel_configs


def get_dW_kernel_configs(
    BLOCK_M = DEFAULT_M_BLOCK_SIZES,
    BLOCK_N = DEFAULT_N_BLOCK_SIZES,
    BLOCK_K = DEFAULT_K_BLOCK_SIZES,
    num_warps = DEFAULT_NUM_WARPS,
    num_stages = DEFAULT_NUM_STAGES,
    num_ctas = DEFAULT_NUM_CTAS,
    TMA_LOAD_dY = None,  # Auto-detect if not specified
    TMA_LOAD_X = None,  # Auto-detect if not specified
    TMA_STORE = False,
):
    # Auto-detect TMA support
    if TMA_LOAD_dY is None:
        TMA_LOAD_dY = _TRITON_HAS_TMA
    if TMA_LOAD_X is None:
        TMA_LOAD_X = _TRITON_HAS_TMA
    (
        BLOCK_M,
        BLOCK_N,
        BLOCK_K,
        num_warps,
        num_stages,
        num_ctas,
        TMA_LOAD_dY,
        TMA_LOAD_X,
        TMA_STORE,
    ) = convert_args_to_list(
        [
            BLOCK_M,
            BLOCK_N,
            BLOCK_K,
            num_warps,
            num_stages,
            num_ctas,
            TMA_LOAD_dY,
            TMA_LOAD_X,
            TMA_STORE,
        ]
    )
    kernel_configs = []
    for (
        block_m,
        block_n,
        block_k,
        w,
        s,
        tma_load_dy,
        tma_load_x,
        tma_store,
        num_ctas,
    ) in product(
        BLOCK_M,
        BLOCK_N,
        BLOCK_K,
        num_warps,
        num_stages,
        TMA_LOAD_dY,
        TMA_LOAD_X,
        TMA_STORE,
        num_ctas,
    ):
        kernel_configs.append(
            triton.Config(
                dict(
                    BLOCK_SIZE_M = block_m,
                    BLOCK_SIZE_N = block_n,
                    BLOCK_SIZE_K = block_k,
                    USE_TMA_LOAD_dY = tma_load_dy,
                    USE_TMA_LOAD_X = tma_load_x,
                    USE_TMA_STORE = tma_store,
                ),
                num_warps = w,
                num_stages = s,
                num_ctas = num_ctas,
            )
        )

    return kernel_configs


def estimate_smem_reqs(
    num_stages: int,
    BLOCK_SIZE_M: int,
    BLOCK_SIZE_N: int,
    BLOCK_SIZE_K: int,
    dtype: torch.dtype,
):
    num_bytes = dtype.itemsize
    return (
        num_stages * BLOCK_SIZE_K * (BLOCK_SIZE_M + BLOCK_SIZE_N)
        + BLOCK_SIZE_M * BLOCK_SIZE_N
    ) * num_bytes


def exceeds_smem_capacity(
    num_stages: int,
    BLOCK_SIZE_M: int,
    BLOCK_SIZE_N: int,
    BLOCK_SIZE_K: int,
    dtype: torch.dtype,
    smem_size: int,
    slack: float = 50000,
):
    smem_reqs = estimate_smem_reqs(
        num_stages, BLOCK_SIZE_M, BLOCK_SIZE_N, BLOCK_SIZE_K, dtype
    )
    return smem_reqs > smem_size + slack


def common_prune_criteria(config: triton.Config, kwargs: dict, dtype):
    from ..interface import supports_tma
    from .tuning import get_device_properties

    smem_size = get_device_properties().SIZE_SMEM

    num_stages = config.num_stages
    BLOCK_SIZE_M = config.kwargs["BLOCK_SIZE_M"]
    BLOCK_SIZE_N = config.kwargs["BLOCK_SIZE_N"]
    BLOCK_SIZE_K = config.kwargs["BLOCK_SIZE_K"]

    num_tokens = kwargs["NUM_TOKENS"]
    num_experts = kwargs["NUM_EXPERTS"]
    permute_x = kwargs["PERMUTE_X"]
    permute_y = kwargs["PERMUTE_Y"]
    tokens_per_expert = num_tokens // num_experts

    # use_tma = [k for k in config.kwargs.keys() if k.startswith("USE_TMA_")]
    MIN_BLOCK_SIZE_M = DEFAULT_M_BLOCK_SIZES[0]
    if exceeds_smem_capacity(
        num_stages, BLOCK_SIZE_M, BLOCK_SIZE_N, BLOCK_SIZE_K, dtype, smem_size
    ):
        return True
    if BLOCK_SIZE_M > tokens_per_expert * 2 and tokens_per_expert > MIN_BLOCK_SIZE_M:
        return True
    if permute_x and permute_y:
        return True
    # if not supports_tma() and any(use_tma):
    #     return True
    return False


def maybe_disable_tma(config: triton.Config):
    from ..interface import supports_tma

    tma_keys = [k for k in config.kwargs.keys() if k.startswith("USE_TMA_")]
    if not supports_tma():
        logger.info("Disabling TMA")
        for k in tma_keys:
            config.kwargs[k] = False


def prune_kernel_configs_fwd(configs: list[triton.Config], args, **kwargs):
    x = kwargs["x_ptr"]
    dtype = x.dtype

    logger.debug(f"Pruning configs: {len(configs)}")

    pruned_configs = []
    for config in configs:
        # disable TMA if gpu does not support it
        maybe_disable_tma(config)

        if common_prune_criteria(config, kwargs, dtype):
            continue
        if config.kwargs["USE_TMA_LOAD_X"] and kwargs["PERMUTE_X"]:
            # Dynamically disable TMA_LOAD_X for permuted X
            config.kwargs["USE_TMA_LOAD_X"] = False
        if config.kwargs["USE_TMA_STORE"] and kwargs["PERMUTE_Y"]:
            continue

        pruned_configs.append(config)

    logger.debug(f"Pruned configs: {len(pruned_configs)}")
    return pruned_configs


def prune_dX_configs(configs: List[triton.Config], args, **kwargs):
    dtype = kwargs["w_ptr"].dtype

    logger.debug(f"Pruning configs: {len(configs)}")
    pruned_configs = []

    for config in configs:
        if common_prune_criteria(config, kwargs, dtype):
            continue
        if config.kwargs["USE_TMA_LOAD_dY"] and kwargs["PERMUTE_Y"]:
            # dynamically disable TMA_LOAD_dY for permuted Y
            config.kwargs["USE_TMA_LOAD_dY"] = False
        if config.kwargs["USE_TMA_STORE"] and kwargs["PERMUTE_X"]:
            continue
        pruned_configs.append(config)

    logger.debug(f"Pruned configs: {len(pruned_configs)}")
    return pruned_configs


def prune_kernel_configs_backward_dW(configs: list[triton.Config], args, **kwargs):
    dtype = kwargs["x_ptr"].dtype

    pruned_configs = []
    logger.debug(f"Pruning configs: {len(configs)}")

    for config in configs:
        if common_prune_criteria(config, kwargs, dtype):
            continue
        if config.kwargs["USE_TMA_LOAD_dY"] and kwargs["PERMUTE_Y"]:
            config.kwargs["USE_TMA_LOAD_dY"] = False
        if config.kwargs["USE_TMA_LOAD_X"] and kwargs["PERMUTE_X"]:
            config.kwargs["USE_TMA_LOAD_X"] = False
        pruned_configs.append(config)

    logger.debug(f"Pruned configs: {len(pruned_configs)}")
    return pruned_configs


================================================
FILE: unsloth/kernels/moe/grouped_gemm/kernels/backward.py
================================================
# SPDX-License-Identifier: GNU Affero General Public License v3.0
# Copyright 2023-present the Unsloth team. All rights reserved.

import torch
import triton
import triton.language as tl

from .autotuning import (
    get_dW_kernel_configs,
    get_dX_kernel_configs,
    prune_dX_configs,
    prune_kernel_configs_backward_dW,
)

"""
dX backward kernel

- Shapes
    - the forward pass input X shape is [NUM_TOKENS, K] if permute_x else [NUM_TOKENS * TOPK, K]; output y is [NUM_TOKENS * TOPK, N]
    - the backward pass input dy shape is [NUM_TOKENS * TOPK, N], reduce across N, output dX is [NUM_TOKENS * TOPK, K]
- Note that in the backward pass, the output size is still [NUM_TOKENS * TOPK, K] since we still need to accumulate gradients for each expert chosen by the token in a post-processing step.

`permute_x` notes:
- In the forward pass, if we permute X on load, we need to permute on store in the backward pass to restore to original token order
- the output dX with have shape [NUM_TOKENS * TOPK, K] and we need to perform an additional reduction across topk to accumulate gradients
- This is done as a post-processing step in autograd.Function.
- If not `permute_x`, this postprocessing step should take place outside autograd.Function such that the gradient shape matches the input X shape.

`permute_y` notes:
- In the forward pass, if we permuted output on store (e.g., in the second grouped GEMM in fused MoE MLP), we need to permute on load to get from token order to expert grouped order
- We still store in contiguous order since we are writing out dX which will be the input to the backwards pass of the first grouped GEMM

`fused_mul` notes:
- In the forward pass, if we used the multiplication of topk weights (e.g., in the second grouped GEMM in fused MoE MLP), we need to make a few additional changes:
    1) We load topk_weights in natural (token) order.  Since we only enable `fuse_mul` when permuting on store (`permute_y`), we multiply grad_output by topk_weights before backpropagating
    2) We need to calculate the gradient of the topk_weights.  This gets messy since we need do an additional elementwise multiplication in the GEMM main loop and then write out in unpermuted order.  For now, we do not fuse this step but calculate as a simple

Invalid combinations:
- permute_y and use_tma_load: permuting y on store in forward -> load in permuted order in backward, therefore can't use TMA load (unless Blackwell which supports gather / scatter TMA)
- permute_x and use_tma_store: permuting x on load in forward -> store in permuted order in backward, therefore can't use TMA store (unless Blackwell which supports gather / scatter TMA)

TODO:
- We define indices for all conditions and expect that unused indices will be DCE'd during compilation.  Check that this is the case otherwise will result in unnecessary register usage.
"""


@triton.jit
def _grouped_gemm_dX_kernel(
    dY_ptr,  # [M_total, N]
    w_ptr,  # [E, N, K]
    dX_ptr,  # [M_total, K]
    gather_indices_ptr,
    m_sizes_ptr,
    # problem sizes
    NUM_EXPERTS: tl.constexpr,
    NUM_TOKENS,
    TOPK: tl.constexpr,
    N: tl.constexpr,
    K: tl.constexpr,
    NUM_SMS,
    # Tuning parameters
    BLOCK_SIZE_M: tl.constexpr,
    BLOCK_SIZE_N: tl.constexpr,
    BLOCK_SIZE_K: tl.constexpr,
    PERMUTE_X: tl.constexpr = False,
    PERMUTE_Y: tl.constexpr = False,
    USE_TMA_LOAD_W: tl.constexpr = False,
    USE_TMA_LOAD_dY: tl.constexpr = False,
    USE_TMA_STORE: tl.constexpr = False,
    FLATTEN: tl.constexpr = True,
) -> None:
    TOTAL_TOKENS = NUM_TOKENS * TOPK
    output_dtype = dX_ptr.dtype.element_ty

    tidx = tl.program_id(0)
    # This removes the need for predication along N in the GEMM main loop
    tl.static_assert(N % BLOCK_SIZE_N == 0, "N must be divisible by BLOCK_SIZE_N")
    tl.static_assert(K % BLOCK_SIZE_K == 0, "K must be divisible by BLOCK_SIZE_K")

    # Create TMA descriptors for loading sorted tokens
    # When using TMA load, we don't permute_x, so shape should be [TOTAL_TOKENS, K]
    # Also, we are defining a single global descriptor with single block shape
    # Need to check that this does not result in errors when crossing expert boundaries
    if USE_TMA_LOAD_dY:
        dY_desc = tl.make_tensor_descriptor(
            dY_ptr,
            shape = [TOTAL_TOKENS, N],
            strides = [N, 1],
            block_shape = [BLOCK_SIZE_M, BLOCK_SIZE_N],
        )

    if USE_TMA_LOAD_W:
        expert_stride = N * K
        w_desc = tl.make_tensor_descriptor(
            w_ptr,
            shape = [NUM_EXPERTS, N, K],
            strides = [expert_stride, K, 1],
            block_shape = [1, BLOCK_SIZE_N, BLOCK_SIZE_K],
        )

    m_end = 0
    processed_tiles = 0
    m_block_range = tl.arange(0, BLOCK_SIZE_M)
    n_block_range = tl.arange(0, BLOCK_SIZE_N)
    k_block_range = tl.arange(0, BLOCK_SIZE_K)

    for expert_idx in range(NUM_EXPERTS, flatten = FLATTEN):
        m_start = m_end
        m_size = tl.load(m_sizes_ptr + expert_idx).to(tl.int32)
        m_end = m_start + m_size

        if m_size > 0:
            # Advance n offset to the weights for that respective expert
            n_start = expert_idx * N
            # N_start_offset = g.to(tl.int64) * N
            # tiles for this group's GEMM
            num_m_tiles = tl.cdiv(m_size, BLOCK_SIZE_M)
            num_k_tiles = tl.cdiv(K, BLOCK_SIZE_K)
            num_tiles_per_expert = num_m_tiles * num_k_tiles

            if USE_TMA_STORE:
                # Need to define descript within loop to predicate store along M
                tl.static_assert(
                    K % BLOCK_SIZE_K == 0, "K must be divisible by BLOCK_SIZE_K"
                )
                dX_desc = tl.make_tensor_descriptor(
                    dX_ptr,
                    shape = [m_end, K],
                    strides = [K, 1],
                    block_shape = [BLOCK_SIZE_M, BLOCK_SIZE_K],
                )

            # Lower bound and upper bound are defined relative to the total tiles processed so far
            # This ensures that we are only processing tiles for the current expert group AND
            # we never exceed the total number of tiles for all expert groups
            while tidx >= processed_tiles and tidx < (
                processed_tiles + num_tiles_per_expert
            ):
                group_index = tidx - processed_tiles

                # Output tile for this thread block for this expert group
                tile_m_idx = group_index % num_m_tiles
                tile_k_idx = group_index // num_m_tiles

                if PERMUTE_X or PERMUTE_Y:
                    # These will be used for loading and storing in permuted order
                    gather_offsets = tile_m_idx * BLOCK_SIZE_M + m_block_range
                    # indices_to_gather = m_start + gather_offsets
                    indices_to_gather = m_start + tl.max_contiguous(
                        tl.multiple_of(gather_offsets % m_size, BLOCK_SIZE_M),
                        BLOCK_SIZE_M,
                    )
                    expert_token_idx = tl.load(
                        gather_indices_ptr + indices_to_gather,
                        mask = indices_to_gather < TOTAL_TOKENS,
                    )
                    expert_token_offsets = expert_token_idx[:, None]

                    # Masks for permuted load and store
                    row_mask = gather_offsets < m_size
                    row_mask = row_mask[:, None]

                    # We only take into account the following two cases: (PERMUTE_X and NOT PERMUTE_Y) and (NOT PERMUTE_X and PERMUTE_Y)
                    # Hence, we can make the following simplifying assumptions when loading and storing
                    # Note the different strides between the two cases: the offsets for loading and storing are flipped and the strides must also be adjusted

                    if PERMUTE_X:
                        # Case where we permuted on load in the forward pass (typically first grouped GEMM in MoE MLP)
                        load_a_idx = (
                            indices_to_gather[:, None] * N
                        )  # Load in contiguous (expert grouped) order
                        store_idx = (
                            expert_token_offsets * K
                        )  # Permute on store from expert -> token order
                    else:
                        # Case where we permuted on store in the forward pass (typically second grouped GEMM in MoE MLP)
                        load_a_idx = (
                            expert_token_offsets * N
                        )  # Permute on load from token -> expert order
                        store_idx = (
                            indices_to_gather[:, None] * K
                        )  # Store in contiguous order
                else:
                    # # Position in full matrix - needed for TMA
                    # m_offset = (M_start + (tile_m_idx * BLOCK_SIZE_M)).to(tl.int32)
                    # k_offset = (tile_k_idx * BLOCK_SIZE_K).to(tl.int32)
                    # Offsets *relative* to the *current* expert -- m_start will then advance to this expert's start token
                    offs_am = tile_m_idx * BLOCK_SIZE_M + m_block_range

                    # [M, N] @ [N, K] -> [M, K] => Stride for A is N, stride for B is K
                    # We need two additional offsets:
                    # 1. For A, m_start to advance to this expert's start token
                    # 2. For B, n_start to advance to this expert's weights since we are passing in an [E, N, K] weight matrix
                    row_offsets_a = m_start + offs_am[:, None]
                    load_a_idx = row_offsets_a * N
                    store_idx = row_offsets_a * K
                    row_mask = offs_am[:, None] < m_size

                if not USE_TMA_LOAD_dY:
                    dY_ptrs = dY_ptr + load_a_idx + n_block_range[None, :]

                offs_bk = tile_k_idx * BLOCK_SIZE_K + k_block_range
                if not USE_TMA_LOAD_W:
                    row_offsets_b = n_start + n_block_range
                    # offs_bn = n_start + n_block_range
                    # row_offsets_b = tl.max_contiguous(tl.multiple_of(offs_bn, BLOCK_SIZE_N), BLOCK_SIZE_N)
                    w_ptrs = w_ptr + row_offsets_b[:, None] * K + offs_bk[None, :]

                # TODO: check whether predication along K is needed since we checked that K is divisible by BLOCK_SIZE_K in the forward kernel
                # col_mask = offs_bk[None, :] < K
                store_mask = row_mask  # & col_mask

                accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_K), dtype = tl.float32)

                # GEMM main loop
                for n_offset in range(0, N, BLOCK_SIZE_N):
                    # dY block [M, N]
                    if not USE_TMA_LOAD_dY:
                        dY = tl.load(dY_ptrs, mask = row_mask)
                    else:
                        dY = dY_desc.load(
                            [m_start + tile_m_idx * BLOCK_SIZE_M, n_offset]
                        )

                    if not USE_TMA_LOAD_W:
                        w = tl.load(w_ptrs)  # , mask=col_mask)
                    else:
                        w = w_desc.load(
                            [expert_idx, n_offset, tile_k_idx * BLOCK_SIZE_K]
                        )
                        w = tl.reshape(w, (BLOCK_SIZE_N, BLOCK_SIZE_K))
                    # TODO: check if predication along K is needed since we checked that K is divisible by BLOCK_SIZE_K in the forward kernel

                    # [M, N] @ [N, K] -> [M, K]
                    dY = dY.to(w.dtype)
                    accumulator += tl.dot(dY, w)  # NOTE: no transpose of b

                    # Advance A along contiguous dimension
                    if not USE_TMA_LOAD_dY:
                        dY_ptrs += BLOCK_SIZE_N
                    # Note we are no longer advancing B along contiguous dimension since weights are arranged as [N, K]
                    # Instead, we need to stride by K to advance to the [N_BLOCK_SIZE, K_BLOCK_SIZE] tile
                    if not USE_TMA_LOAD_W:
                        w_ptrs += BLOCK_SIZE_N * K

                dX = accumulator.to(output_dtype)

                # Writing out a BLOCK_M x BLOCK_K tile, so we need to stride by K
                if USE_TMA_STORE:
                    offset_m = tile_m_idx * BLOCK_SIZE_M  # .to(tl.int32)
                    offset_k = tile_k_idx * BLOCK_SIZE_K  # .to(tl.int32)
                    dX_desc.store([m_start + offset_m, offset_k], dX)
                else:
                    tl.store(
                        dX_ptr + store_idx + offs_bk[None, :],
                        dX,
                        mask = store_mask,
                    )

                # Move to the next tile within this expert group
                tidx += NUM_SMS

            # Update the total tiles count for the next expert group
            processed_tiles += num_tiles_per_expert


_autotuned_grouped_gemm_dX_kernel = triton.autotune(
    configs = get_dX_kernel_configs(),
    prune_configs_by = {"early_config_prune": prune_dX_configs},
    # NOTE: NUM_TOKENS removed from key to avoid recompilation for every sequence length
    key = ["NUM_EXPERTS", "N", "K", "PERMUTE_X", "PERMUTE_Y"],
)(_grouped_gemm_dX_kernel)

"""
notes on permute_x:
- for the first grouped GEMM, we permuted on load -> X was [num_tokens, K] and stored y in expert grouped order [num_tokens * topk, K]
- in the backwards pass, we need to permute on load of X while loading dy in contiguous (expert grouped) order
- since we are writing out dW, there is no need to permute on store

notes on permute_y:
- for the second grouped GEMM, we permuted on store -> y was permuted from expert grouped order to token order, x was loaded in expert grouped order since it was the output of the first grouped GEMM
- in the backwards pass, we need to permute on load of dy to get from token order to expert grouped order to match the order of X
- since we are writing out dW, there is no need to permute on store

notes on TMA loading:
- if we're TMA loading both X and dY, then we need to mask along the M dimension
to account for expert boundaries
- we can either
    - define TMA descriptors within the outer for loop to predicate loads
    or
    - mask along M after loading
"""


@triton.jit
def _grouped_gemm_dW_kernel(
    x_ptr,
    dY_ptr,
    dW_ptr,
    m_sizes_ptr,
    gather_indices_ptr,
    # problem sizes
    NUM_TOKENS,
    TOPK: tl.constexpr,
    NUM_EXPERTS: tl.constexpr,
    N: tl.constexpr,
    K: tl.constexpr,
    NUM_SMS,
    BLOCK_SIZE_N: tl.constexpr,
    BLOCK_SIZE_K: tl.constexpr,
    BLOCK_SIZE_M: tl.constexpr,
    PERMUTE_X: tl.constexpr = False,
    PERMUTE_Y: tl.constexpr = False,
    USE_TMA_LOAD_dY: tl.constexpr = False,
    USE_TMA_LOAD_X: tl.constexpr = False,
    USE_TMA_STORE: tl.constexpr = False,
    FLATTEN: tl.constexpr = True,
    acc_dtype: tl.constexpr = tl.float32,
) -> None:
    TOTAL_TOKENS = NUM_TOKENS * TOPK
    TMA_LOAD_BOTH: tl.constexpr = USE_TMA_LOAD_X and USE_TMA_LOAD_dY

    tidx = tl.program_id(0)
    output_dtype = dW_ptr.dtype.element_ty

    if USE_TMA_LOAD_dY and not TMA_LOAD_BOTH:
        dY_desc = tl.make_tensor_descriptor(
            dY_ptr,
            shape = [TOTAL_TOKENS, N],
            strides = [N, 1],
            block_shape = [BLOCK_SIZE_M, BLOCK_SIZE_N],
        )

    if USE_TMA_LOAD_X and not TMA_LOAD_BOTH:
        x_desc = tl.make_tensor_descriptor(
            x_ptr,
            shape = [TOTAL_TOKENS, K],
            strides = [K, 1],
            block_shape = [BLOCK_SIZE_M, BLOCK_SIZE_K],
        )
    # Output tiles per expert, since each expert weight matrix is [N, K]
    num_n_tiles = tl.cdiv(N, BLOCK_SIZE_N)
    num_k_tiles = tl.cdiv(K, BLOCK_SIZE_K)
    output_tiles_per_expert = num_n_tiles * num_k_tiles

    block_range_m = tl.arange(0, BLOCK_SIZE_M)
    block_range_n = tl.arange(0, BLOCK_SIZE_N)
    block_range_k = tl.arange(0, BLOCK_SIZE_K)

    # NOTE: Important that N % BLOCK_SIZE_N == 0 and K % BLOCK_SIZE_K == 0 when using TMA store
    if USE_TMA_STORE:
        tl.static_assert(N % BLOCK_SIZE_N == 0, "N must be divisible by BLOCK_SIZE_N")
        tl.static_assert(K % BLOCK_SIZE_K == 0, "K must be divisible by BLOCK_SIZE_K")
        dW_desc = tl.make_tensor_descriptor(
            dW_ptr,
            shape = [NUM_EXPERTS, N, K],
            strides = [N * K, K, 1],
            block_shape = [1, BLOCK_SIZE_N, BLOCK_SIZE_K],
        )

    for tile_idx in range(
        tidx, output_tiles_per_expert, NUM_SMS
    ):  # , flatten=FLATTEN):
        # Output tile index
        tile_n_idx = tile_idx % num_n_tiles
        tile_k_idx = tile_idx // num_n_tiles

        # Output tile offsets
        n_offset = tile_n_idx * BLOCK_SIZE_N
        k_offset = tile_k_idx * BLOCK_SIZE_K

        # For storing
        # TODO: Check whether the k mask is needed since we statically check that K is divisible by BLOCK_SIZE_K in the forward kernel
        # ditto for n_mask
        n_mask = block_range_n + n_offset < N
        k_mask = block_range_k + k_offset < K
        nk_mask = n_mask[:, None] & k_mask[None, :]

        m_end = 0
        for expert_idx in range(NUM_EXPERTS):
            # We need to instantiate a fresh accumulator for each expert
            accumulator = tl.zeros((BLOCK_SIZE_N, BLOCK_SIZE_K), dtype = acc_dtype)

            m_start = m_end
            # Need to figure out why this cast is needed, otherwise compiler complains about mismatching types
            m_size = tl.load(m_sizes_ptr + expert_idx).to(tl.int32)
            m_end = m_start + m_size

            # NOTE: when storing the result, we need to offset by n_start since we are storing the result for this expert to the global [E, N, K] weight matrix
            n_start = expert_idx * N
            store_row_offs = n_start + n_offset + block_range_n

            if m_size > 0:
                if TMA_LOAD_BOTH:
                    dY_desc = tl.make_tensor_descriptor(
                        dY_ptr,
                        shape = [m_end, N],
                        strides = [N, 1],
                        block_shape = [BLOCK_SIZE_M, BLOCK_SIZE_N],
                    )

                    x_desc = tl.make_tensor_descriptor(
                        x_ptr,
                        shape = [m_end, K],
                        strides = [K, 1],
                        block_shape = [BLOCK_SIZE_M, BLOCK_SIZE_K],
                    )

                for tile_m_idx in range(0, m_size, BLOCK_SIZE_M):
                    m_block_size = tl.minimum(BLOCK_SIZE_M, m_size - tile_m_idx)

                    if m_block_size > 0:
                        # Global offset for this chunk
                        m_global_offset = m_start + tile_m_idx
                        m_offsets = m_global_offset + block_range_m

                        if PERMUTE_X or PERMUTE_Y:
                            # These will be used for loading and storing in permuted order
                            gather_offsets = (
                                tile_m_idx + block_range_m
                            )  # NOTE: tile_m_idx is already strided by BLOCK_SIZE_M

                            indices_to_gather = m_start + tl.max_contiguous(
                                tl.multiple_of(gather_offsets % m_size, BLOCK_SIZE_M),
                                BLOCK_SIZE_M,
                            )
                            # indices_to_gather = m_start + gather_offsets
                            expert_token_idx = tl.load(
                                gather_indices_ptr + indices_to_gather,
                                mask = indices_to_gather < TOTAL_TOKENS,
                            )
                            expert_token_offsets = expert_token_idx[:, None]

                            # Masks for permuted load and store
                            row_load_mask = gather_offsets < m_size

                            # We only take into account the following two cases: (PERMUTE_X and NOT PERMUTE_Y) and (NOT PERMUTE_X and PERMUTE_Y)
                            # Hence, we can make the following simplifying assumptions when loading and storing
                            # Note the different strides between the two cases: the offsets for loading and storing are flipped and the strides must also be adjusted
                            if PERMUTE_X:
                                x_row_load_idx = (
                                    (expert_token_offsets // TOPK) * K
                                )  # Permute on load from token -> expert order, divide by TOPK to index from original number of tokens
                                dY_row_load_idx = m_offsets[:, None] * N
                            else:
                                x_row_load_idx = (
                                    indices_to_gather[:, None] * K
                                )  # Load in contiguous order (no permutation on load)
                                dY_row_load_idx = expert_token_offsets * N

                        else:
                            x_row_load_idx = m_offsets[:, None] * K
                            dY_row_load_idx = m_offsets[:, None] * N
                            row_load_mask = block_range_m < m_block_size

                        mk_mask = row_load_mask[:, None] & k_mask[None, :]
                        mn_mask = row_load_mask[:, None] & n_mask[None, :]

                        if USE_TMA_LOAD_X:
                            x = x_desc.load([m_global_offset, k_offset])
                        else:
                            x = tl.load(
                                x_ptr
                                + x_row_load_idx
                                + (k_offset + block_range_k)[None, :],
                                mask = mk_mask,
                            )

                        if USE_TMA_LOAD_dY:
                            dY = dY_desc.load([m_global_offset, n_offset])
                        else:
                            dY = tl.load(
                                dY_ptr
                                + dY_row_load_idx
                                + (n_offset + block_range_n)[None, :],
                                mask = mn_mask,
                            )

                        accumulator += tl.dot(
                            dY.T.to(x.dtype),  # [BLOCK_N, BLOCK_M]
                            x,  # [BLOCK_M, BLOCK_K]
                        )

                y = accumulator.to(output_dtype)
                if USE_TMA_STORE:
                    # Need to expand dims to match [E, N, K] shape
                    y = tl.expand_dims(y, 0)
                    dW_desc.store([expert_idx, n_offset, k_offset], y)
                else:
                    tl.store(
                        dW_ptr
                        # + (n_offset + offs_n)[:, None] * K
                        + store_row_offs[:, None] * K
                        + (k_offset + block_range_k)[None, :],
                        y,
                        mask = nk_mask,
                    )


_autotuned_grouped_gemm_dW_kernel = triton.autotune(
    configs = get_dW_kernel_configs(),
    prune_configs_by = {"early_config_prune": prune_kernel_configs_backward_dW},
    # NOTE: NUM_TOKENS removed from key to avoid recompilation for every sequence length
    key = ["NUM_EXPERTS", "N", "K", "PERMUTE_X", "PERMUTE_Y"],
)(_grouped_gemm_dW_kernel)


================================================
FILE: unsloth/kernels/moe/grouped_gemm/kernels/forward.py
================================================
# SPDX-License-Identifier: GNU Affero General Public License v3.0
# Copyright 2023-present the Unsloth team. All rights reserved.

import torch
import triton
import triton.language as tl

from .autotuning import (
    get_forward_configs,
    prune_kernel_configs_fwd,
)


#
# PERMUTE_X -> permute tokens so that they are ordered by expert
# PERMUTE_Y -> permute output so that they are ordered by token
# These are effectively the same thing: the former loads in permuted order, the latter stores in permuted order => we only need to define the permutation indices once
# In the former, we use these row indices when loading X
# For the latter, we use these row indices when storing Y
# FUSE_MUL -> multiply routed outputs by their respective weights
# topk_weights are in token order
# Only account for the case when X is in expert order and we are permuting Y when fusing mul -- this precondition is checked in the interface
@triton.jit
def _grouped_gemm_forward_kernel(
    x_ptr,
    w_ptr,
    y_ptr,
    # Variable depending on routed probs
    m_sizes_ptr,
    gather_indices_ptr,
    topk_weights_ptr,
    # Constant problem shapes
    NUM_EXPERTS: tl.constexpr,
    NUM_TOKENS,
    TOPK: tl.constexpr,
    N: tl.constexpr,
    K: tl.constexpr,
    NUM_SMS,
    # Tuning params
    BLOCK_SIZE_M: tl.constexpr,
    BLOCK_SIZE_N: tl.constexpr,
    BLOCK_SIZE_K: tl.constexpr,
    PERMUTE_X: tl.constexpr = False,
    PERMUTE_Y: tl.constexpr = False,
    FUSE_MUL_PRE: tl.constexpr = False,
    FUSE_MUL_POST: tl.constexpr = False,
    USE_FAST_ACCUM: tl.constexpr = False,
    USE_TMA_LOAD_W: tl.constexpr = False,
    USE_TMA_LOAD_X: tl.constexpr = False,
    USE_TMA_STORE: tl.constexpr = False,
    acc_dtype: tl.constexpr = tl.float32,
    FLATTEN: tl.constexpr = True,
) -> None:
    tl.static_assert(K % BLOCK_SIZE_K == 0)

    TOTAL_TOKENS = NUM_TOKENS * TOPK
    SHOULD_PERMUTE: tl.constexpr = PERMUTE_X or PERMUTE_Y
    SHOULD_FUSE_MUL: tl.constexpr = FUSE_MUL_PRE or FUSE_MUL_POST
    SHOULD_PERMUTE_OR_FUSE: tl.constexpr = SHOULD_PERMUTE or SHOULD_FUSE_MUL
    # tl.static_print("SHOULD_PERMUTE", PERMUTE_X, PERMUTE_Y, FUSE_MUL_PRE, FUSE_MUL_POST, SHOULD_PERMUTE, SHOULD_FUSE, SHOULD_PERMUTE_OR_FUSE)
    tidx = tl.program_id(0)
    output_dtype: tl.dtype = y_ptr.dtype.element_ty

    # Create TMA descriptors for loading sorted tokens
    # When using TMA load, we don't permute_x, so shape should be [TOTAL_TOKENS, K]
    # Also, we are defining a single global descriptor with single block shape
    # Need to check that this does not result in errors when crossing expert boundaries
    if USE_TMA_LOAD_X:
        x_desc = tl.make_tensor_descriptor(
            x_ptr,
            shape = [TOTAL_TOKENS, K],
            strides = [K, 1],
            block_shape = [BLOCK_SIZE_M, BLOCK_SIZE_K],
        )

    if USE_TMA_LOAD_W:
        expert_stride = N * K
        w_desc = tl.make_tensor_descriptor(
            w_ptr,
            shape = [NUM_EXPERTS, N, K],
            strides = [expert_stride, K, 1],
            block_shape = [1, BLOCK_SIZE_N, BLOCK_SIZE_K],
        )

    m_end = 0
    processed_tiles = 0
    m_block_range = tl.arange(0, BLOCK_SIZE_M)

    for expert_idx in tl.range(NUM_EXPERTS, flatten = FLATTEN):
        m_start = m_end
        m_size = tl.load(m_sizes_ptr + expert_idx).to(tl.int32)
        m_end = m_start + m_size

        if m_size > 0:
            n_start = expert_idx * N

            num_m_tiles = tl.cdiv(m_size, BLOCK_SIZE_M)
            num_n_tiles = tl.cdiv(N, BLOCK_SIZE_N)
            num_tiles_per_expert = num_m_tiles * num_n_tiles

            # Need to create tma_store within loop since we need to predicate stores based on m_size
            if USE_TMA_STORE:
                y_desc = tl.make_tensor_descriptor(
                    y_ptr,  # + m_start * N,
                    shape = [m_end, N],
                    strides = [N, 1],
                    block_shape = [BLOCK_SIZE_M, BLOCK_SIZE_N],
                )

            # Process tiles for this expert
            while (
                tidx >= processed_tiles
                and tidx < processed_tiles + num_tiles_per_expert
            ):
                tile_idx = tidx - processed_tiles

                # Check if L2 cache re-use for this order is optimal
                tile_m_idx = tile_idx % num_m_tiles
                tile_n_idx = tile_idx // num_m_tiles

                if SHOULD_PERMUTE_OR_FUSE:
                    # These will be used for loading and storing in permuted order
                    gather_offsets = tile_m_idx * BLOCK_SIZE_M + m_block_range
                    indices_to_gather = m_start + tl.max_contiguous(
                        tl.multiple_of(gather_offsets % m_size, BLOCK_SIZE_M),
                        BLOCK_SIZE_M,
                    )
                    expert_token_idx = tl.load(
                        gather_indices_ptr + indices_to_gather,
                        mask = indices_to_gather < TOTAL_TOKENS,
                    )
                    expert_token_offsets = expert_token_idx[:, None]

                    # Masks for permuted load and store

                    row_mask = gather_offsets < m_size
                    row_mask = row_mask[:, None]

                    # row_mask = indices_to_gather < m_end
                    # row_mask = row_mask[:, None]

                # We only take into account the following two cases: (PERMUTE_X and NOT PERMUTE_Y) and (NOT PERMUTE_X and PERMUTE_Y)
                # Hence, we can make the following simplifying assumptions when loading and storing
                # Note the different strides between the two cases: the offsets for loading and storing are flipped and the strides must also be adjusted
                if PERMUTE_X:
                    load_idx = (
                        (expert_token_offsets // TOPK) * K
                    )  # Permute on load from token -> expert order, divide by TOPK to index from original number of tokens
                    store_idx = (
                        indices_to_gather[:, None] * N
                    )  # Store in contiguous order
                else:
                    off_am = tile_m_idx * BLOCK_SIZE_M
                    if not PERMUTE_Y:
                        # These will already be computed if permuting y
                        offs_am = off_am + m_block_range
                        row_mask = offs_am[:, None] < m_size
                        row_idx = m_start + offs_am[:, None]
                        store_idx = row_idx * N
                        if not USE_TMA_LOAD_X:
                            load_idx = row_idx * K

                if PERMUTE_Y:
                    if not USE_TMA_LOAD_X:
                        load_idx = (
                            indices_to_gather[:, None] * K
                        )  # Load in contiguous order (no permutation on load)
                    # offs_am = off_am + m_block_range
                    # row_mask = offs_am[:, None] < m_size
                    store_idx = (
                        expert_token_offsets * N
                    )  # Permute on store from expert -> token order

                # We always load topk weights in expert order
                # In the pre-multiplication case, we multiply permuted hidden states by weights before the first gemm
                # In the post-multiplication case, we multiply permuted hidden states by weights after the second gemm
                # In either case, the hidden states are grouped by expert, so we always permute on load of topk weights
                if SHOULD_FUSE_MUL:
                    topk_load_idx = expert_token_offsets

                accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype = acc_dtype)

                offs_k = tl.arange(0, BLOCK_SIZE_K)

                if not USE_TMA_LOAD_X:
                    x_ptrs = x_ptr + load_idx + offs_k[None, :]

                if not USE_TMA_LOAD_W:
                    offs_bn = tile_n_idx * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
                    offs_bn = tl.max_contiguous(
                        tl.multiple_of(offs_bn % N, BLOCK_SIZE_N), BLOCK_SIZE_N
                    )
                    w_ptrs = w_ptr + (n_start + offs_bn[:, None]) * K + offs_k[None, :]

                for k_offset in range(0, K, BLOCK_SIZE_K):
                    if not USE_TMA_LOAD_X:
                        x = tl.load(x_ptrs, mask = row_mask)
                    else:
                        x = x_desc.load([m_start + off_am, k_offset])

                    if FUSE_MUL_PRE:
                        # Check for correct broadcasting
                        topk_weights = tl.load(
                            topk_weights_ptr + topk_load_idx, mask = row_mask
                        )
                        x *= topk_weights.to(x.dtype)

                    if not USE_TMA_LOAD_W:
                        w = tl.load(w_ptrs, mask = offs_bn[:, None] < N)
                    else:
                        w = w_desc.load(
                            [expert_idx, tile_n_idx * BLOCK_SIZE_N, k_offset]
                        )
                        w = tl.reshape(w, (BLOCK_SIZE_N, BLOCK_SIZE_K))

                    x = x.to(w.dtype)
                    accumulator += tl.dot(x, w.T)

                    if not USE_TMA_LOAD_X:
                        x_ptrs += BLOCK_SIZE_K

                    if not USE_TMA_LOAD_W:
                        w_ptrs += BLOCK_SIZE_K

                y = accumulator.to(output_dtype)

                # NOTE: order of fusing multiplication is important
                # Fusing before accumulator dtype conversion results in numerical diffs
                if FUSE_MUL_POST:
                    # Check for correct broadcasting
                    topk_weights = tl.load(
                        topk_weights_ptr + topk_load_idx, mask = row_mask
                    )
                    y *= topk_weights.to(output_dtype)

                offs_bn = tile_n_idx * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
                store_mask = row_mask & (offs_bn[None, :] < N)

                if USE_TMA_STORE:
                    offset_m = tile_m_idx * BLOCK_SIZE_M  # .to(tl.int32)
                    offset_n = tile_n_idx * BLOCK_SIZE_N  # .to(tl.int32)
                    y_desc.store([m_start + offset_m, offset_n], y)
                else:
                    tl.store(
                        y_ptr + store_idx + offs_bn[None, :],
                        y,
                        mask = store_mask,
                    )
                tidx += NUM_SMS

            processed_tiles += num_tiles_per_expert


_autotuned_grouped_gemm_forward_kernel = triton.autotune(
    configs = get_forward_configs(),
    prune_configs_by = {"early_config_prune": prune_kernel_configs_fwd},
    # NOTE: NUM_TOKENS removed from key to avoid recompilation for every sequence length
    # The kernel handles variable token counts via m_sizes and tile-based processing
    key = [
        "NUM_EXPERTS",
        "N",
        "K",
        "PERMUTE_X",
        "PERMUTE_Y",
        "FUSE_MUL_POST",
    ],
)(_grouped_gemm_forward_kernel)


================================================
FILE: unsloth/kernels/moe/grouped_gemm/kernels/tuning.py
================================================
# SPDX-License-Identifier: GNU Affero General Public License v3.0
# Copyright 2023-present the Unsloth team. All rights reserved.

"""
Manual tuning utils
"""

from collections import OrderedDict
from dataclasses import asdict, dataclass, fields
from itertools import product
from typing import Optional

import pandas as pd
import torch
import triton
from triton.runtime.errors import OutOfResources

from .autotuning import (
    BOOLS,
    DEFAULT_K_BLOCK_SIZES,
    DEFAULT_M_BLOCK_SIZES,
    DEFAULT_N_BLOCK_SIZES,
    DEFAULT_NUM_STAGES,
    DEFAULT_NUM_WARPS,
)


@dataclass
class DeviceProperties:
    NUM_SM: int
    NUM_REGS: int
    SIZE_SMEM: int
    WARP_SIZE: int


_DEVICE_PROPERTIES: Optional[DeviceProperties] = None


def get_device_properties():
    global _DEVICE_PROPERTIES
    if _DEVICE_PROPERTIES is None:
        properties = triton.runtime.driver.active.utils.get_device_properties(
            torch.cuda.current_device()
        )
        NUM_SM = properties["multiprocessor_count"]
        NUM_REGS = properties["max_num_regs"]
        SIZE_SMEM = properties["max_shared_mem"]
        WARP_SIZE = properties["warpSize"]
        _DEVICE_PROPERTIES = DeviceProperties(NUM_SM, NUM_REGS, SIZE_SMEM, WARP_SIZE)
    return _DEVICE_PROPERTIES


@dataclass
class KernelConfig:
    BLOCK_SIZE_M: int = 32
    BLOCK_SIZE_N: int = 32
    BLOCK_SIZE_K: int = 32
    num_warps: int = 4
    num_stages: int = 2
    flatten: bool = True
    permute_x: bool = False
    permute_y: bool = False
    fuse_mul_post: bool = False
    use_tma_store: bool = False

    def to_string(self, include_tuning_params: bool = False, include_tma: bool = False):
        s = []
        if self.permute_x:
            s.append("permute_x")
        if self.permute_y:
            s.append("permute_y")
        if include_tuning_params:
            s.append(
                f"BLOCK_SIZE_M={self.BLOCK_SIZE_M},BLOCK_SIZE_N={self.BLOCK_SIZE_N},BLOCK_SIZE_K={self.BLOCK_SIZE_K},num_warps={self.num_warps},num_stages={self.num_stages},flatten={self.flatten}"
            )
        if include_tma:
            for f in fields(self):
                if f.name.startswith("use_tma_"):
                    if getattr(self, f.name):
                        s.append(f.name)
        return ",".join(s)


@dataclass
class KernelConfigForward(KernelConfig):
    use_tma_load_w: bool = False
    use_tma_load_x: bool = False


@dataclass
class KernelConfigBackward_dW(KernelConfig):
    use_tma_load_dy: bool = False
    use_tma_load_x: bool = False


@dataclass
class KernelConfigBackward_dX(KernelConfig):
    use_tma_load_dy: bool = False
    use_tma_load_w: bool = False


@dataclass
class KernelResult:
    torch_time: float
    triton_time: float
    speedup: float
    kernel_config: KernelConfig

    def to_dict(self):
        return OrderedDict(
            **asdict(self.kernel_config),
            torch_time = self.torch_time,
            triton_time = self.triton_time,
            speedup = self.speedup,
        )

    @staticmethod
    def to_dataframe(
        results: list["KernelResult"], sort_by: str = "speedup", ascending: bool = False
    ):
        df = pd.DataFrame([result.to_dict() for result in results])
        df = df.sort_values(by = sort_by, ascending = ascending)
        return df

    @staticmethod
    def to_csv(
        results: list["KernelResult"],
        sort_by: str = "speedup",
        ascending: bool = False,
        filename: str = "results.csv",
    ):
        df = KernelResult.to_dataframe(results, sort_by, ascending)
        df.to_csv(filename, index = False)

    @staticmethod
    def print_table(
        results: list["KernelResult"],
        sort_by: str = "speedup",
        ascending: bool = False,
        num_results: int = 10,
    ):
        df = KernelResult.to_dataframe(results, sort_by, ascending)
        print(df.head(num_results).to_string(index = False))


def get_kernel_configs(
    BLOCK_M = DEFAULT_M_BLOCK_SIZES,
    BLOCK_N = DEFAULT_N_BLOCK_SIZES,
    BLOCK_K = DEFAULT_K_BLOCK_SIZES,
    num_warps = DEFAULT_NUM_WARPS,
    num_stages = DEFAULT_NUM_STAGES,
    use_tma_loads = BOOLS,
    fuse_permute = BOOLS,
):
    kernel_configs_fwd = []
    kernel_configs_backward_dW = []
    kernel_configs_backward_dX = []
    for block_m, block_n, block_k, w, s, use_tma_load, permute in product(
        BLOCK_M, BLOCK_N, BLOCK_K, num_warps, num_stages, use_tma_loads, fuse_permute
    ):
        kernel_configs_fwd.append(
            KernelConfigForward(
                BLOCK_SIZE_M = block_m,
                BLOCK_SIZE_N = block_n,
                BLOCK_SIZE_K = block_k,
                num_warps = w,
                num_stages = s,
                use_tma_load_x = use_tma_load,
                use_tma_load_w = use_tma_load,
                use_tma_store = False,
                permute_x = permute,
                permute_y = permute,
            )
        )
        kernel_configs_backward_dW.append(
            KernelConfigBackward_dW(
                BLOCK_SIZE_M = block_m,
                BLOCK_SIZE_N = block_n,
                BLOCK_SIZE_K = block_k,
                num_warps = w,
                num_stages = s,
                use_tma_load_dy = use_tma_load,
                use_tma_load_x = use_tma_load,
                use_tma_store = False,
                permute_x = permute,
                permute_y = permute,
            )
        )
        kernel_configs_backward_dX.append(
            KernelConfigBackward_dX(
                BLOCK_SIZE_M = block_m,
                BLOCK_SIZE_N = block_n,
                BLOCK_SIZE_K = block_k,
                num_warps = w,
                num_stages = s,
                use_tma_load_dy = use_tma_load,
                use_tma_load_w = use_tma_load,
                use_tma_store = False,
                permute_x = permute,
                permute_y = permute,
            )
        )

    kernel_configs_fwd = prune_kernel_configs_fwd(kernel_configs_fwd)
    kernel_configs_backward_dW = prune_kernel_configs_backward_dW(
        kernel_configs_backward_dW
    )
    kernel_configs_backward_dX = prune_kernel_configs_backward_dX(
        kernel_configs_backward_dX
    )
    return kernel_configs_fwd, kernel_configs_backward_dW, kernel_configs_backward_dX


def prune_kernel_configs_fwd(configs: list[KernelConfigForward]):
    pruned_configs = []
    for config in configs:
        if config.use_tma_load_x and config.permute_x:
            continue
        if config.permute_x and config.permute_y:
            continue
        if config.use_tma_store and config.permute_y:
            continue
        pruned_configs.append(config)
    return pruned_configs


def prune_kernel_configs_backward_dX(configs: list[KernelConfigBackward_dX]):
    pruned_configs = []
    for config in configs:
        if config.use_tma_load_dy and config.permute_y:
            continue
        if config.permute_x and config.permute_y:
            continue
        if config.use_tma_store and config.permute_x:
            continue
        pruned_configs.append(config)
    return pruned_configs


def prune_kernel_configs_backward_dW(configs: list[KernelConfigBackward_dW]):
    pruned_configs = []
    for config in configs:
        if config.use_tma_load_dy and config.permute_y:
            continue
        if config.use_tma_load_x and config.permute_x:
            continue
        if config.permute_x and config.permute_y:
            continue
        pruned_configs.append(config)
    return pruned_configs


class TritonTuningContext:
    def __init__(self, kernel_config: KernelConfig):
        self.kernel_config = kernel_config
        self.success = True

    def __enter__(self):
        # Setup code can be added here if needed
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        if exc_type is OutOfResources:
            name = exc_value.name
            required = exc_value.required
            limit = exc_value.limit
            print(
                f"Kernel config {self.kernel_config} failed: {name}, required: {required}, limit: {limit}"
            )
            self.success = False
        elif exc_type is not None:
            print(
                f"Error running Triton grouped GEMM for kernel config: {self.kernel_config}: {exc_value}"
            )
            self.success = False
        # Return False to propagate exceptions, True to suppress them
        return True


================================================
FILE: unsloth/kernels/moe/grouped_gemm/reference/__init__.py
================================================


================================================
FILE: unsloth/kernels/moe/grouped_gemm/reference/layers/llama4_moe.py
================================================
# SPDX-License-Identifier: GNU Affero General Public License v3.0
# Copyright 2023-present the Unsloth team. All rights reserved.

from dataclasses import dataclass
from typing import Tuple

import torch
import torch.nn.functional as F
from transformers.models.llama4 import Llama4TextConfig
from transformers.models.llama4.modeling_llama4 import Llama4TextMoe

from ...interface import grouped_gemm
from ...kernels.tuning import (
    KernelConfigBackward_dW,
    KernelConfigBackward_dX,
    KernelConfigForward,
)
from ..moe_ops import (
    get_routing_indices,
    permute,
    torch_grouped_gemm,
    unpermute,
)

"""
Reference implementation of Llama4 MoE block using triton grouped gemm.

`Llama4GroupedGemmTextMoe` is the HF `Llama4TextMoe` block implemented with a torch-native grouped gemm.
`Llama4TritonTextMoe` is the HF `Llama4TextMoe` implemented with triton grouped gemm.
"""


@dataclass
class Llama4MoeResult:
    token_counts_by_expert: torch.Tensor
    gather_indices: torch.Tensor
    topk_weights: torch.Tensor
    hidden_states_after_weight_merge: torch.Tensor
    first_gemm: torch.Tensor
    intermediate: torch.Tensor
    second_gemm: torch.Tensor
    hidden_states_unpermute: torch.Tensor
    shared_expert_out: torch.Tensor
    final_out: torch.Tensor
    router_logits: torch.Tensor = None


class Llama4GroupedGemmTextMoe(Llama4TextMoe):
    EXPERT_WEIGHT_NAMES = ["experts.gate_up_proj", "experts.down_proj"]

    def __init__(
        self,
        config: Llama4TextConfig,
        overlap_router_shared = False,
        verbose = False,
        debug = False,
    ):
        super().__init__(config)
        self.overlap_router_shared = overlap_router_shared
        self.verbose = verbose
        self.debug = debug

        # Permute in-place expert weights
        E, K, N = self.num_experts, self.hidden_dim, self.experts.expert_dim
        assert self.experts.gate_up_proj.shape == torch.Size(
            [E, K, 2 * N]
        ), f"{self.experts.gate_up_proj.shape} != {[E, K, 2 * N]}"
        permuted_shape = [E, 2 * N, K]
        permuted_stride = [2 * N * K, K, 1]
        if verbose:
            print(
                f"Changing gate_up_proj from {self.experts.gate_up_proj.size()}:{self.experts.gate_up_proj.stride()} to {permuted_shape}:{permuted_stride}"
            )
        with torch.no_grad():
            self.experts.gate_up_proj.as_strided_(permuted_shape, permuted_stride)

        if verbose:
            print(
                f"{self.experts.gate_up_proj.shape}:{self.experts.gate_up_proj.stride()}"
            )

        assert self.experts.down_proj.shape == torch.Size(
            [E, N, K]
        ), f"{self.experts.down_proj.shape} != {[E, N, K]}"
        permuted_shape = [E, K, N]
        permuted_stride = [K * N, N, 1]
        if verbose:
            print(
                f"Changing down_proj from {self.experts.down_proj.size()}:{self.experts.down_proj.stride()} to {permuted_shape}:{permuted_stride}"
            )

        with torch.no_grad():
            self.experts.down_proj.as_strided_(permuted_shape, permuted_stride)

        if verbose:
            print(f"{self.experts.down_proj.shape}:{self.experts.down_proj.stride()}")

        if overlap_router_shared:
            self.shared_expert_stream = torch.cuda.Stream()
            self.default_event = torch.cuda.Event()
            self.shared_expert_end_event = torch.cuda.Event()

    @torch.no_grad
    def copy_weights(self, other: Llama4TextMoe):
        for name, param_to_copy in other.named_parameters():
            if self.verbose:
                print(f"Copying {name} with shape {param_to_copy.shape}")
            param = self.get_parameter(name)

            if any(n in name for n in self.EXPERT_WEIGHT_NAMES):
                param_to_copy = param_to_copy.permute(0, 2, 1)

            assert (
                param.shape == param_to_copy.shape
            ), f"{param.shape} != {param_to_copy.shape}"
            param.copy_(param_to_copy)

        return self

    def check_weights(self, other: Llama4TextMoe):
        for name, other_param in other.named_parameters():
            if any(n in name for n in self.EXPERT_WEIGHT_NAMES):
                other_param = other_param.permute(0, 2, 1)
            param = self.get_parameter(name)
            assert param.equal(other_param), f"Param {name} not equal!"
            assert param.is_contiguous(), f"{name} not contiguous!"

    def act_and_mul(self, x: torch.Tensor) -> torch.Tensor:
        assert x.shape[-1] == 2 * self.experts.expert_dim
        gate_proj = x[..., : self.experts.expert_dim]
        up_proj = x[..., self.experts.expert_dim :]
        return self.experts.act_fn(gate_proj) * up_proj

    def run_router(self, hidden_states: torch.Tensor) -> torch.Tensor:
        # router_logits: (batch * sequence_length, n_experts)
        hidden_states = hidden_states.view(-1, self.hidden_dim)
        router_logits = self.router(hidden_states)
        routing_weights, selected_experts = torch.topk(
            router_logits, self.top_k, dim = -1
        )

        routing_weights = F.sigmoid(routing_weights.float()).to(hidden_states.dtype)

        return router_logits, routing_weights, selected_experts

    def get_token_counts_and_gather_indices(
        self, selected_experts: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        token_counts_by_expert, gather_indices = get_routing_indices(
            selected_experts, self.num_experts
        )
        assert not token_counts_by_expert.requires_grad
        assert not gather_indices.requires_grad
        return token_counts_by_expert, gather_indices

    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
        """ """
        batch_size, sequence_length, hidden_dim = hidden_states.shape
        num_tokens = batch_size * sequence_length
        total_tokens = num_tokens * self.top_k
        hidden_states = hidden_states.view(-1, hidden_dim)

        if self.overlap_router_shared:
            # Marker for all prior ops on default stream
            self.default_event.record()

        router_logits, routing_weights, selected_experts = self.run_router(
            hidden_states
        )
        assert routing_weights.shape == (
            num_tokens,
            self.top_k,
        ), f"{routing_weights.shape} != {(num_tokens, self.top_k)}"

        if self.overlap_router_shared:
            with torch.cuda.stream(self.shared_expert_stream):
                # Ensure prior kernels on default stream complete
                self.default_event.wait()

                shared_expert_out = self.shared_expert(hidden_states)
                # Ensure hidden states remains valid on this stream
                hidden_states.record_stream(self.shared_expert_stream)

                self.shared_expert_end_event.record()

            # Ensure shared expert still valid on default stream
            shared_expert_out.record_stream(torch.cuda.current_stream())
            self.shared_expert_end_event.wait()
        else:
            shared_expert_out = self.shared_expert(hidden_states)

        hidden_states = (
            hidden_states.view(num_tokens, self.top_k, hidden_dim)
            * routing_weights[..., None]
        )

        if self.top_k > 1:
            hidden_states = hidden_states.sum(dim = 1)
        hidden_states_after_weight_merge = hidden_states.view(-1, hidden_dim)

        # 1. Compute tokens per expert and indices for gathering tokes from token order to expert order
        # NOTE: these are auxiliary data structs which don't need to be recorded in autograd graph
        token_counts_by_expert, gather_indices = (
            self.get_token_counts_and_gather_indices(selected_experts)
        )

        # 2. Permute tokens from token order to expert order
        hidden_states = permute(
            hidden_states_after_weight_merge, gather_indices, self.top_k
        )
        assert hidden_states.shape == (total_tokens, hidden_dim)

        # Start expert computation
        first_gemm = torch_grouped_gemm(
            X = hidden_states, W = self.experts.gate_up_proj, m_sizes = token_counts_by_expert
        )
        assert first_gemm.shape == (total_tokens, 2 * self.experts.expert_dim)

        intermediate = self.act_and_mul(first_gemm)
        assert intermediate.shape == (total_tokens, self.experts.expert_dim)

        # See comment above
        second_gemm = torch_grouped_gemm(
            X = intermediate, W = self.experts.down_proj, m_sizes = token_counts_by_expert
        )
        assert second_gemm.shape == (total_tokens, hidden_dim)

        # Post-processing
        hidden_states_unpermute = unpermute(second_gemm, gather_indices)
        assert hidden_states_unpermute.shape == (total_tokens, hidden_dim)
        # grouped_gemm_out = hidden_states.view(batch_size, sequence_length, hidden_dim)

        final_out = hidden_states_unpermute + shared_expert_out

        result = (
            Llama4MoeResult(
                token_counts_by_expert = token_counts_by_expert,
                gather_indices = gather_indices,
                topk_weights = routing_weights,
                hidden_states_after_weight_merge = hidden_states_after_weight_merge,
                first_gemm = first_gemm,
                intermediate = intermediate,
                second_gemm = second_gemm,
                hidden_states_unpermute = hidden_states_unpermute,
                shared_expert_out = shared_expert_out,
                final_out = final_out,
                router_logits = router_logits,
            )
            if self.debug
            else (final_out, routing_weights)
        )

        return result


class Llama4TritonTextMoe(Llama4GroupedGemmTextMoe):
    def __init__(
        self,
        config: Llama4TextConfig,
        overlap_router_shared = False,
        permute_x: bool = False,
        permute_y: bool = True,
        autotune: bool = True,
        kernel_config_fwd: KernelConfigForward = None,
        kernel_config_bwd_dW: KernelConfigBackward_dW = None,
        kernel_config_bwd_dX: KernelConfigBackward_dX = None,
        dW_only: bool = False,
        dX_only: bool = False,
        verbose = False,
    ):
        super().__init__(config, overlap_router_shared = overlap_router_shared)
        assert not permute_x, "Llama4 triton grouped gemm does not support permute x due to pre-multiplication of router weights"
        self.permute_x = permute_x
        self.permute_y = permute_y
        self.autotune = autotune
        if not autotune:
            assert (
                kernel_config_fwd is not None
                and kernel_config_bwd_dW is not None
                and kernel_config_bwd_dX is not None
            ), "Kernel configs must be provided if autotune is False"
        self.kernel_config_fwd = kernel_config_fwd
        self.kernel_config_bwd_dW = kernel_config_bwd_dW
        self.kernel_config_bwd_dX = kernel_config_bwd_dX
        self.dW_only = dW_only
        self.dX_only = dX_only

    @torch.no_grad
    def copy_weights(self, other: Llama4TextMoe):
        for name, param_to_copy in other.named_parameters():
            if self.verbose:
                print(f"Copying {name} with shape {param_to_copy.shape}")
            param = self.get_parameter(name)

            if any(n in name for n in self.EXPERT_WEIGHT_NAMES):
                param_to_copy = param_to_copy.permute(0, 2, 1)

            assert (
                param.shape == param_to_copy.shape
            ), f"{param.shape} != {param_to_copy.shape}"
            param.copy_(param_to_copy)

        return self

    def check_weights(self, other: Llama4TextMoe):
        for name, other_param in other.named_parameters():
            if any(n in name for n in self.EXPERT_WEIGHT_NAMES):
                other_param = other_param.permute(0, 2, 1)
            param = self.get_parameter(name)
            assert param.equal(other_param), f"Param {name} not equal!"
            assert param.is_contiguous(), f"{name} not contiguous!"

    def act_and_mul(self, x: torch.Tensor) -> torch.Tensor:
        assert x.shape[-1] == 2 * self.experts.expert_dim
        gate_proj = x[..., : self.experts.expert_dim]
        up_proj = x[..., self.experts.expert_dim :]
        return self.experts.act_fn(gate_proj) * up_proj

    def run_router(self, hidden_states: torch.Tensor) -> torch.Tensor:
        # router_logits: (batch * sequence_length, n_experts)
        hidden_states = hidden_states.view(-1, self.hidden_dim)
        router_logits = self.router(hidden_states)
        routing_weights, selected_experts = torch.topk(
            router_logits, self.top_k, dim = -1
        )

        routing_weights = F.sigmoid(routing_weights.float()).to(hidden_states.dtype)

        return router_logits, routing_weights, selected_experts

    def get_token_counts_and_gather_indices(
        self, selected_experts: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        token_counts_by_expert, gather_indices = get_routing_indices(
            selected_experts, self.num_experts
        )
        assert not token_counts_by_expert.requires_grad
        assert not gather_indices.requires_grad
        return token_counts_by_expert, gather_indices

    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
        """ """
        batch_size, sequence_length, hidden_dim = hidden_states.shape
        num_tokens = batch_size * sequence_length
        total_tokens = num_tokens * self.top_k
        hidden_states = hidden_states.view(-1, hidden_dim)

        if self.overlap_router_shared:
            # Marker for all prior ops on default stream
            self.default_event.record()

        router_logits, routing_weights, selected_experts = self.run_router(
            hidden_states
        )
        assert routing_weights.shape == (
            num_tokens,
            self.top_k,
        ), f"{routing_weights.shape} != {(num_tokens, self.top_k)}"

        if self.overlap_router_shared:
            with torch.cuda.stream(self.shared_expert_stream):
                # Ensure prior kernels on default stream complete
                self.default_event.wait()

                shared_expert_out = self.shared_expert(hidden_states)
                # Ensure hidden states remains valid on this stream
                hidden_states.record_stream(self.shared_expert_stream)

                self.shared_expert_end_event.record()

            # Ensure shared expert still valid on default stream
            shared_expert_out.record_stream(torch.cuda.current_stream())
            self.shared_expert_end_event.wait()
        else:
            shared_expert_out = self.shared_expert(hidden_states)

        hidden_states = (
            hidden_states.view(num_tokens, self.top_k, hidden_dim)
            * routing_weights[..., None]
        )

        if self.top_k > 1:
            hidden_states = hidden_states.sum(dim = 1)
        hidden_states = hidden_states.view(-1, hidden_dim)

        # 1. Compute tokens per expert and indices for gathering tokes from token order to expert order
        # NOTE: these are auxiliary data structs which don't need to be recorded in autograd graph
        token_counts_by_expert, gather_indices = (
            self.get_token_counts_and_gather_indices(selected_experts)
        )

        # 2. Permute tokens from token order to expert order
        hidden_states = permute(hidden_states, gather_indices, self.top_k)
        assert hidden_states.shape == (total_tokens, hidden_dim)

        # Start expert computation
        hidden_states = grouped_gemm(
            X = hidden_states,
            W = self.experts.gate_up_proj,
            m_sizes = token_counts_by_expert,
            gather_indices = gather_indices,
            topk = self.top_k,
            permute_x = self.permute_x,
            permute_y = False,  # output of first grouped gemm should never be permuted
            autotune = self.autotune,
            kernel_config_fwd = self.kernel_config_fwd,
            kernel_config_bwd_dW = self.kernel_config_bwd_dW,
            kernel_config_bwd_dX = self.kernel_config_bwd_dX,
            is_first_gemm = True,
            dW_only = self.dW_only,
            dX_only = self.dX_only,
        )
        hidden_states = self.act_and_mul(hidden_states)
        hidden_states = grouped_gemm(
            X = hidden_states,
            W = self.experts.down_proj,
            m_sizes = token_counts_by_expert,
            gather_indices = gather_indices,
            topk = self.top_k,
            permute_x = False,
            permute_y = self.permute_y,
            autotune = self.autotune,
            kernel_config_fwd = self.kernel_config_fwd,
            kernel_config_bwd_dW = self.kernel_config_bwd_dW,
            kernel_config_bwd_dX = self.kernel_config_bwd_dX,
            is_first_gemm = False,
            dW_only = self.dW_only,
            dX_only = self.dX_only,
        )

        # Post-processing
        # 1. Unpermute from expert order to token order
        if not self.permute_y:
            hidden_states = unpermute(hidden_states, gather_indices)
        hidden_states += shared_expert_out

        return hidden_states, routing_weights


================================================
FILE: unsloth/kernels/moe/grouped_gemm/reference/layers/qwen3_moe.py
================================================
# SPDX-License-Identifier: GNU Affero General Public License v3.0
# Copyright 2023-present the Unsloth team. All rights reserved.

from dataclasses import dataclass
from typing import Tuple

import torch
import torch.nn.functional as F
from transformers.models.qwen3_moe.configuration_qwen3_moe import Qwen3MoeConfig
from transformers.models.qwen3_moe.modeling_qwen3_moe import (
    ACT2FN,
    Qwen3MoeSparseMoeBlock,
)

from ...interface import grouped_gemm
from ...kernels.tuning import (
    KernelConfigBackward_dW,
    KernelConfigBackward_dX,
    KernelConfigForward,
)
from ..moe_ops import (
    get_routing_indices,
    permute,
    torch_grouped_gemm,
    unpermute,
)

"""
Reference implementation of HF Qwen3 MoE block using grouped gemm.

The Qwen3MoeGroupedGEMMBlock is a reference torch-native implementation.
Qwen3MoeFusedGroupedGEMMBlock is a version using the triton grouped gemm kernel.

NOTE: This is NOT to be used for production as it contains many extra checks and saves all intermediate results for debugging.
"""


@dataclass
class GroupedGEMMResult:
    token_counts_by_expert: torch.Tensor
    gather_indices: torch.Tensor
    topk_weights: torch.Tensor
    first_gemm: torch.Tensor
    intermediate: torch.Tensor
    second_gemm: torch.Tensor
    hidden_states_unpermute: torch.Tensor
    hidden_states: torch.Tensor  # final output


class Qwen3MoeGroupedGEMMBlock(torch.nn.Module):
    def __init__(
        self,
        config,
        gate: torch.Tensor,
        gate_up_proj: torch.Tensor,
        down_proj: torch.Tensor,
    ):
        super().__init__()
        self.num_experts = config.num_experts
        self.top_k = config.num_experts_per_tok
        self.norm_topk_prob = config.norm_topk_prob
        self.hidden_size = config.hidden_size
        self.moe_intermediate_size = config.moe_intermediate_size

        assert gate.shape == (config.num_experts, config.hidden_size)
        assert gate_up_proj.shape == (
            config.num_experts,
            2 * config.moe_intermediate_size,
            config.hidden_size,
        )
        assert down_proj.shape == (
            config.num_experts,
            config.hidden_size,
            config.moe_intermediate_size,
        )

        # gating
        self.gate = torch.nn.Parameter(gate)

        # experts
        self.gate_up_proj = torch.nn.Parameter(gate_up_proj, requires_grad = True)
        self.down_proj = torch.nn.Parameter(down_proj, requires_grad = True)
        self.act_fn = ACT2FN[config.hidden_act]

    @staticmethod
    def extract_hf_weights(moe_block: Qwen3MoeSparseMoeBlock):
        config: Qwen3MoeConfig = moe_block.experts[0].config
        num_experts = config.num_experts

        gate = moe_block.gate.weight.data
        gate_proj = torch.stack(
            [moe_block.experts[i].gate_proj.weight.data for i in range(num_experts)],
            dim = 0,
        )
        up_proj = torch.stack(
            [moe_block.experts[i].up_proj.weight.data for i in range(num_experts)],
            dim = 0,
        )
        down_proj = torch.stack(
            [moe_block.experts[i].down_proj.weight.data for i in range(num_experts)],
            dim = 0,
        )
        gate_up_proj = torch.cat([gate_proj, up_proj], dim = 1)
        return gate, gate_up_proj, down_proj

    @classmethod
    def from_hf(cls, moe_block: Qwen3MoeSparseMoeBlock):
        config: Qwen3MoeConfig = moe_block.experts[0].config
        gate, gate_up_proj, down_proj = cls.extract_hf_weights(moe_block)
        return cls(config, gate, gate_up_proj, down_proj)

    def check_weights(self, moe_block: Qwen3MoeSparseMoeBlock):
        for i in range(self.num_experts):
            assert self.gate_up_proj[i].equal(
                torch.cat(
                    [
                        moe_block.experts[i].gate_proj.weight.data,
                        moe_block.experts[i].up_proj.weight.data,
                    ],
                    dim = 0,
                )
            )
            assert self.down_proj[i].equal(moe_block.experts[i].down_proj.weight.data)

    def act_and_mul(self, x: torch.Tensor) -> torch.Tensor:
        assert x.shape[-1] == 2 * self.moe_intermediate_size
        gate_proj = x[..., : self.moe_intermediate_size]
        up_proj = x[..., self.moe_intermediate_size :]
        return self.act_fn(gate_proj) * up_proj

    def run_router(self, hidden_states: torch.Tensor) -> torch.Tensor:
        # router_logits: (batch * sequence_length, n_experts)
        router_logits = torch.nn.functional.linear(hidden_states, self.gate)

        routing_weights = F.softmax(router_logits, dim = 1, dtype = torch.float)
        routing_weights, selected_experts = torch.topk(
            routing_weights, self.top_k, dim = -1
        )
        if self.norm_topk_prob:  # only diff with mixtral sparse moe block!
            routing_weights /= routing_weights.sum(dim = -1, keepdim = True)
        # we cast back to the input dtype
        routing_weights = routing_weights.to(hidden_states.dtype)

        return router_logits, routing_weights, selected_experts

    def get_token_counts_and_gather_indices(
        self, selected_experts: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        token_counts_by_expert, gather_indices = get_routing_indices(
            selected_experts, self.num_experts
        )
        assert not token_counts_by_expert.requires_grad
        assert not gather_indices.requires_grad
        return token_counts_by_expert, gather_indices

    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
        """ """
        batch_size, sequence_length, hidden_dim = hidden_states.shape
        num_tokens = batch_size * sequence_length
        total_tokens = num_tokens * self.top_k

        hidden_states = hidden_states.view(-1, hidden_dim)

        router_logits, routing_weights, selected_experts = self.run_router(
            hidden_states
        )

        # 1. Compute tokens per expert and indices for gathering tokes from token order to expert order
        # NOTE: these are auxiliary data structs which don't need to be recorded in autograd graph
        token_counts_by_expert, gather_indices = (
            self.get_token_counts_and_gather_indices(selected_experts)
        )

        # 2. Permute tokens from token order to expert order
        hidden_states = permute(hidden_states, gather_indices, self.top_k)
        assert hidden_states.shape == (total_tokens, hidden_dim)

        # Start expert computation
        first_gemm = torch_grouped_gemm(
            X = hidden_states, W = self.gate_up_proj, m_sizes = token_counts_by_expert
        )
        assert first_gemm.shape == (total_tokens, 2 * self.moe_intermediate_size)
        intermediate = self.act_and_mul(first_gemm)
        assert intermediate.shape == (total_tokens, self.moe_intermediate_size)
        second_gemm = torch_grouped_gemm(
            X = intermediate, W = self.down_proj, m_sizes = token_counts_by_expert
        )
        assert second_gemm.shape == (total_tokens, hidden_dim)

        # Post-processing
        # 1. Unpermute from expert order to token order
        hidden_states_unpermute = unpermute(second_gemm, gather_indices)
        assert hidden_states_unpermute.shape == (total_tokens, hidden_dim)

        # 2. Merge topk weights
        hidden_states = (
            hidden_states_unpermute.view(num_tokens, self.top_k, hidden_dim)
            * routing_weights[..., None]
        )
        hidden_states = hidden_states.sum(dim = 1)
        assert hidden_states.shape == (num_tokens, hidden_dim)

        hidden_states = hidden_states.view(batch_size, sequence_length, hidden_dim)
        return GroupedGEMMResult(
            token_counts_by_expert = token_counts_by_expert,
            gather_indices = gather_indices,
            topk_weights = routing_weights,
            first_gemm = first_gemm,
            intermediate = intermediate,
            second_gemm = second_gemm,
            hidden_states_unpermute = hidden_states_unpermute,
            hidden_states = hidden_states,
        ), router_logits


class Qwen3MoeFusedGroupedGEMMBlock(Qwen3MoeGroupedGEMMBlock):
    def __init__(
        self,
        config: Qwen3MoeConfig,
        gate: torch.Tensor,
        gate_up_proj: torch.Tensor,
        down_proj: torch.Tensor,
        permute_x: bool = True,
        permute_y: bool = True,
        autotune: bool = True,
        kernel_config_fwd: KernelConfigForward = None,
        kernel_config_bwd_dW: KernelConfigBackward_dW = None,
        kernel_config_bwd_dX: KernelConfigBackward_dX = None,
        dW_only: bool = False,
        dX_only: bool = False,
    ):
        super().__init__(config, gate, gate_up_proj, down_proj)
        self.permute_x = permute_x
        self.permute_y = permute_y
        self.autotune = autotune
        if not autotune:
            assert (
                kernel_config_fwd is not None
                and kernel_config_bwd_dW is not None
                and kernel_config_bwd_dX is not None
            ), "Kernel configs must be provided if autotune is False"
        self.kernel_config_fwd = kernel_config_fwd
        self.kernel_config_bwd_dW = kernel_config_bwd_dW
        self.kernel_config_bwd_dX = kernel_config_bwd_dX
        self.dW_only = dW_only
        self.dX_only = dX_only

    @classmethod
    def from_hf(
        cls,
        moe_block: Qwen3MoeSparseMoeBlock,
        permute_x: bool = True,
        permute_y: bool = True,
        autotune: bool = True,
        kernel_config_fwd: KernelConfigForward = None,
        kernel_config_bwd_dW: KernelConfigBackward_dW = None,
        kernel_config_bwd_dX: KernelConfigBackward_dX = None,
        dW_only: bool = False,
        dX_only: bool = False,
    ):
        config: Qwen3MoeConfig = moe_block.experts[0].config
        gate, gate_up_proj, down_proj = Qwen3MoeGroupedGEMMBlock.extract_hf_weights(
            moe_block
        )
        return cls(
            config,
            gate,
            gate_up_proj,
            down_proj,
            permute_x = permute_x,
            permute_y = permute_y,
            autotune = autotune,
            kernel_config_fwd = kernel_config_fwd,
            kernel_config_bwd_dW = kernel_config_bwd_dW,
            kernel_config_bwd_dX = kernel_config_bwd_dX,
            dW_only = dW_only,
            dX_only = dX_only,
        )

    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
        batch_size, sequence_length, hidden_dim = hidden_states.shape
        num_tokens = batch_size * sequence_length
        total_tokens = num_tokens * self.top_k

        hidden_states = hidden_states.view(-1, hidden_dim)

        router_logits, routing_weights, selected_experts = self.run_router(
            hidden_states
        )
        # Pre-processing
        # 1. Compute tokens per expert and indices for gathering tokes from token order to expert order
        # NOTE: these are auxiliary data structs which don't need to be recorded in autograd graph
        token_counts_by_expert, gather_indices = (
            self.get_token_counts_and_gather_indices(selected_experts)
        )

        # 2. permute_x -> permutation will be fused in prologue of first grouped gemm
        if not self.permute_x:
            hidden_states = permute(hidden_states, gather_indices, self.top_k)
        # Start expert computation
        hidden_states = grouped_gemm(
            X = hidden_states,
            W = self.gate_up_proj,
            m_sizes = token_counts_by_expert,
            gather_indices = gather_indices,
            topk = self.top_k,
            permute_x = self.permute_x,
            permute_y = False,  # output of first grouped gemm should never be permuted
            autotune = self.autotune,
            kernel_config_fwd = self.kernel_config_fwd,
            kernel_config_bwd_dW = self.kernel_config_bwd_dW,
            kernel_config_bwd_dX = self.kernel_config_bwd_dX,
            is_first_gemm = True,
            dW_only = self.dW_only,
            dX_only = self.dX_only,
        )
        hidden_states = self.act_and_mul(hidden_states)
        hidden_states = grouped_gemm(
            X = hidden_states,
            W = self.down_proj,
            m_sizes = token_counts_by_expert,
            gather_indices = gather_indices,
            topk = self.top_k,
            permute_x = False,
            permute_y = self.permute_y,
            autotune = self.autotune,
            kernel_config_fwd = self.kernel_config_fwd,
            kernel_config_bwd_dW = self.kernel_config_bwd_dW,
            kernel_config_bwd_dX = self.kernel_config_bwd_dX,
            is_first_gemm = False,
            dW_only = self.dW_only,
            dX_only = self.dX_only,
        )

        # Post-processing
        # 1. Unpermute from expert order to token order
        if not self.permute_y:
            hidden_states = unpermute(hidden_states, gather_indices)

        # 2. Merge topk weights
        hidden_states = (
            hidden_states.view(num_tokens, self.top_k, hidden_dim)
            * routing_weights[..., None]
        )
        hidden_states = hidden_states.sum(dim = 1)

        hidden_states = hidden_states.view(batch_size, sequence_length, hidden_dim)
        return hidden_states, router_logits


================================================
FILE: unsloth/kernels/moe/grouped_gemm/reference/moe_block.py
================================================
# SPDX-License-Identifier: GNU Affero General Public License v3.0
# Copyright 2023-present the Unsloth team. All rights reserved.

import torch
from transformers.models.qwen3_moe.configuration_qwen3_moe import Qwen3MoeConfig
from transformers.models.qwen3_moe.modeling_qwen3_moe import Qwen3MoeSparseMoeBlock

from ..interface import grouped_gemm
from ..kernels.tuning import (
    KernelConfigBackward_dW,
    KernelConfigBackward_dX,
    KernelConfigForward,
)
from .moe_ops import (
    Qwen3MoeGroupedGEMMBlock,
    permute,
    unpermute,
)

"""
Reference implementation of MoE block using grouped gemm.

This is the same as the Qwen3MoeGroupedGEMMBlock but with triton grouped gemm in place of torch-native grouped gemm implementation.

NOTE: This is NOT to be used for production as it contains many extra checks and saves all intermediate results for debugging.
"""


class Qwen3MoeFusedGroupedGEMMBlock(Qwen3MoeGroupedGEMMBlock):
    def __init__(
        self,
        config: Qwen3MoeConfig,
        gate: torch.Tensor,
        gate_up_proj: torch.Tensor,
        down_proj: torch.Tensor,
        permute_x: bool = True,
        permute_y: bool = True,
        autotune: bool = True,
        kernel_config_fwd: KernelConfigForward = None,
        kernel_config_bwd_dW: KernelConfigBackward_dW = None,
        kernel_config_bwd_dX: KernelConfigBackward_dX = None,
        dW_only: bool = False,
        dX_only: bool = False,
    ):
        super().__init__(config, gate, gate_up_proj, down_proj)
        self.permute_x = permute_x
        self.permute_y = permute_y
        self.autotune = autotune
        if not autotune:
            assert (
                kernel_config_fwd is not None
                and kernel_config_bwd_dW is not None
                and kernel_config_bwd_dX is not None
            ), "Kernel configs must be provided if autotune is False"
        self.kernel_config_fwd = kernel_config_fwd
        self.kernel_config_bwd_dW = kernel_config_bwd_dW
        self.kernel_config_bwd_dX = kernel_config_bwd_dX
        self.dW_only = dW_only
        self.dX_only = dX_only

    @classmethod
    def from_hf(
        cls,
        moe_block: Qwen3MoeSparseMoeBlock,
        permute_x: bool = True,
        permute_y: bool = True,
        autotune: bool = True,
        kernel_config_fwd: KernelConfigForward = None,
        kernel_config_bwd_dW: KernelConfigBackward_dW = None,
        kernel_config_bwd_dX: KernelConfigBackward_dX = None,
        dW_only: bool = False,
        dX_only: bool = False,
    ):
        config: Qwen3MoeConfig = moe_block.experts[0].config
        gate, gate_up_proj, down_proj = Qwen3MoeGroupedGEMMBlock.extract_hf_weights(
            moe_block
        )
        return cls(
            config,
            gate,
            gate_up_proj,
            down_proj,
            permute_x = permute_x,
            permute_y = permute_y,
            autotune = autotune,
            kernel_config_fwd = kernel_config_fwd,
            kernel_config_bwd_dW = kernel_config_bwd_dW,
            kernel_config_bwd_dX = kernel_config_bwd_dX,
            dW_only = dW_only,
            dX_only = dX_only,
        )

    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
        batch_size, sequence_length, hidden_dim = hidden_states.shape
        num_tokens = batch_size * sequence_length
        total_tokens = num_tokens * self.top_k

        hidden_states = hidden_states.view(-1, hidden_dim)

        router_logits, routing_weights, selected_experts = self.run_router(
            hidden_states
        )
        # Pre-processing
        # 1. Compute tokens per expert and indices for gathering tokes from token order to expert order
        # NOTE: these are auxiliary data structs which don't need to be recorded in autograd graph
        token_counts_by_expert, gather_indices = (
            self.get_token_counts_and_gather_indices(selected_experts)
        )

        # 2. permute_x -> permutation will be fused in prologue of first grouped gemm
        if not self.permute_x:
            hidden_states = permute(hidden_states, gather_indices, self.top_k)
        # Start expert computation
        hidden_states = grouped_gemm(
            X = hidden_states,
            W = self.gate_up_proj,
            m_sizes = token_counts_by_expert,
            gather_indices = gather_indices,
            topk = self.top_k,
            permute_x = self.permute_x,
            permute_y = False,  # output of first grouped gemm should never be permuted
            autotune = self.autotune,
            kernel_config_fwd = self.kernel_config_fwd,
            kernel_config_bwd_dW = self.kernel_config_bwd_dW,
            kernel_config_bwd_dX = self.kernel_config_bwd_dX,
            is_first_gemm = True,
            dW_only = self.dW_only,
            dX_only = self.dX_only,
        )
        hidden_states = self.act_and_mul(hidden_states)
        hidden_states = grouped_gemm(
            X = hidden_states,
            W = self.down_proj,
            m_sizes = token_counts_by_expert,
            gather_indices = gather_indices,
            topk = self.top_k,
            permute_x = False,
            permute_y = self.permute_y,
            autotune = self.autotune,
            kernel_config_fwd = self.kernel_config_fwd,
            kernel_config_bwd_dW = self.kernel_config_bwd_dW,
            kernel_config_bwd_dX = self.kernel_config_bwd_dX,
            is_first_gemm = False,
            dW_only = self.dW_only,
            dX_only = self.dX_only,
        )

        # Post-processing
        # 1. Unpermute from expert order to token order
        if not self.permute_y:
            hidden_states = unpermute(hidden_states, gather_indices)

        # 2. Merge topk weights
        hidden_states = (
            hidden_states.view(num_tokens, self.top_k, hidden_dim)
            * routing_weights[..., None]
        )
        hidden_states = hidden_states.sum(dim = 1)

        hidden_states = hidden_states.view(batch_size, sequence_length, hidden_dim)
        return hidden_states, router_logits


================================================
FILE: unsloth/kernels/moe/grouped_gemm/reference/moe_ops.py
================================================
# SPDX-License-Identifier: GNU Affero General Public License v3.0
# Copyright 2023-present the Unsloth team. All rights reserved.

import torch
import torch.nn.functional as F


def permute(X: torch.Tensor, gather_indices: torch.Tensor, topk: int):
    """
    Scatters X to a new tensor with shape [total_tokens, hidden_dim] where total_tokens is num_tokens * topk,
    permuting the tokens according to sorted_token_idx.

    Helper for grouped gemm where hidden states need be ordered by expert.
    X: [num_tokens, hidden_dim]
    sorted_token_idx: [num_tokens * topk]
    topk: int

    Returns:
        [total_tokens, hidden_dim]
    """
    assert gather_indices.ndim == 1
    X = X.view(-1, X.shape[-1])
    # Shortcut for topk == 1
    if topk == 1:
        return X[gather_indices]

    return X[gather_indices // topk]


def unpermute(X: torch.Tensor, gather_indices: torch.Tensor):
    X = X.view(-1, X.shape[-1]) if X.ndim > 2 else X
    unpermuted = torch.empty_like(X)
    unpermuted.index_copy_(0, gather_indices, X)
    return unpermuted.view_as(X)


def calculate_topk(
    gating_output: torch.Tensor,
    top_k: int,
    use_sigmoid: bool,
    renormalize: bool,
    pre_act: bool = True,
    post_act: bool = False,
):
    """
    If post_act is True, then activation function is run AFTER topk
    If post_act is False, then activation function is run BEFORE topk

    This is to align with triton_bench implementation (post_act) whereas most models use pre_act (e.g. llama4, deepseek)
    """
    assert pre_act ^ post_act, "only one of pre_act or post_act can be True"

    def _activation(gating_output: torch.Tensor):
        if use_sigmoid:
            scores = torch.sigmoid(gating_output.to(torch.float32)).to(
                gating_output.dtype
            )
        else:
            scores = F.softmax(gating_output.to(torch.float32), dim = 1).to(
                gating_output.dtype
            )

        return scores

    if pre_act:
        scores = _activation(gating_output)
    else:
        scores = gating_output

    topk_weights, topk_ids = torch.topk(scores, k = top_k, dim = 1)

    if post_act:
        topk_weights = _activation(topk_weights)

    if renormalize:
        topk_weights /= torch.sum(topk_weights, dim = -1, keepdim = True).to(
            gating_output.dtype
        )

    return topk_weights, topk_ids


@torch.no_grad()
def get_routing_indices(
    selected_experts, num_experts, return_scatter_indices: bool = False
):
    """
    Returns:
        token_counts_by_expert: [num_experts]
        gather_indices: [num_tokens]
        scatter_indices [Optional] (torch.Tensor):
            Indices for unpermuting gathered inputs back to token order, shape ``(bs * seqlen * top_k,)``.
    """
    # group tokens together by expert indices from 0 to num_experts and pass that to experts forward
    token_counts_by_expert = torch.histc(
        selected_experts.view(-1),
        bins = num_experts,
        min = 0,
        max = num_experts,
    )
    # token_indices_experts_sorted shape (bs*slen*top_k,)
    gather_indices = torch.argsort(selected_experts.view(-1), stable = True)
    if return_scatter_indices:
        scatter_indices = gather_indices.argsort()
        return token_counts_by_expert, gather_indices, scatter_indices
    else:
        return token_counts_by_expert, gather_indices


def torch_grouped_gemm(X, W, m_sizes, transpose = True):
    """
    X: [M, K] if forward, else [M, N]
    W: [E, N, K]
    m_sizes: [E]

    Returns:
        Y: [M, N] if forward, else [M, K]
    """
    X = X.view(-1, X.shape[-1])
    M, K = X.shape

    assert m_sizes.ndim == 1
    E = m_sizes.shape[0]

    assert W.ndim == 3
    assert W.shape[0] == E

    N = W.shape[1]

    result = torch.zeros((M, N), dtype = X.dtype, device = X.device)

    m_start = 0
    for g in range(E):
        m_size = m_sizes[g]
        if m_size > 0:
            m_end = m_start + m_size

            # Extract group input
            # m_size x K
            X_g = X[m_start:m_end]
            # N x K
            W_g = W[g]

            # Y_g = X_g @ W_g.T -> [m_size, N]
            W_g = W_g.T if transpose else W_g
            Y_g = X_g @ W_g

            result[m_start:m_end] = Y_g

            m_start = m_end
    return result


================================================
FILE: unsloth/kernels/moe/requirements.txt
================================================
torch
git+https://github.com/huggingface/transformers.git@main
pytest
pandas
ruff

================================================
FILE: unsloth/kernels/moe/tests/__init__.py
================================================


================================================
FILE: unsloth/kernels/moe/tests/common.py
================================================
# SPDX-License-Identifier: GNU Affero General Public License v3.0
# Copyright 2023-present the Unsloth team. All rights reserved.

import itertools
from contextlib import contextmanager
from dataclasses import dataclass, field

import torch

from grouped_gemm.kernels.tuning import (
    KernelConfig,
    KernelConfigBackward_dW,
    KernelConfigBackward_dX,
    KernelConfigForward,
    prune_kernel_configs_backward_dW,
    prune_kernel_configs_backward_dX,
    prune_kernel_configs_fwd,
)


def print_delimiter(char = "-", length = 80):
    print(char * length)


@contextmanager
def delimiter_context():
    print_delimiter()
    yield
    print_delimiter()


def make_inputs(M, N, K, E, topk, dtype, requires_grad = False):
    X1 = (
        torch.randn((M, K), device = "cuda", dtype = dtype, requires_grad = requires_grad)
        / 10
    )
    X2 = (
        torch.randn(
            (M * topk, N), device = "cuda", dtype = dtype, requires_grad = requires_grad
        )
        / 10
    )
    W1 = (
        torch.randn(
            (E, 2 * N, K), device = "cuda", dtype = dtype, requires_grad = requires_grad
        )
        / 10
    )
    W2 = (
        torch.randn((E, K, N), device = "cuda", dtype = dtype, requires_grad = requires_grad)
        / 10
    )
    score = torch.randn((M, E), device = "cuda", dtype = dtype, requires_grad = requires_grad)
    if requires_grad:
        X1.retain_grad()
        X2.retain_grad()
        W1.retain_grad()
        W2.retain_grad()
        score.retain_grad()
    return X1, X2, W1, W2, score


@dataclass(kw_only = True)
class DataConfig:
    seq_len: int
    dtype: torch.dtype
    device: str = "cuda"
    bs: int = 1


@dataclass(kw_only = True)
class ModelConfig:
    hidden_size: int
    intermediate_size: int
    num_experts: int
    topk: int
    use_sigmoid: bool
    renormalize: bool
    pre_mul: bool = False
    post_mul: bool = field(init = False)

    def __post_init__(self):
        self.post_mul = not self.pre_mul


@dataclass(kw_only = True)
class GroupedGEMMTestConfig:
    name: str = "test"
    data_config: DataConfig
    model_config: ModelConfig


TOLERANCE = {
    torch.bfloat16: (1e-3, 1e-3),
    torch.float16: (1e-4, 1e-4),
    torch.float32: (1e-5, 1e-5),
}


# from https://github.com/triton-lang/triton/blob/main/bench/triton_bench/testing.py
def assert_equal(ref, tri):
    if isinstance(ref, torch.Tensor):
        assert torch.all(ref == tri), f"tensors not equal {ref} != {tri}"
    else:
        assert ref == tri, f"ref not equal to tri {ref} != {tri}"


def assert_close(ref, tri, maxtol = None, rmstol = None, description = "--", verbose = True):
    if tri.dtype.itemsize == 1:
        ref_as_type = ref.to(tri.dtype)
        if ref.dtype == tri.dtype:
            assert torch.all(ref_as_type == tri)
            return
        ref = ref_as_type

    if maxtol is None:
        maxtol = 2e-2
    if rmstol is None:
        rmstol = 4e-3
    """
    Compare reference values against obtained values.
    """

    # cast to float32:
    ref = ref.to(torch.float32).detach()
    tri = tri.to(torch.float32).detach()
    assert (
        ref.shape == tri.shape
    ), f"Tensors must have same size {ref.shape = } {tri.shape = }"

    # deal with infinite elements:
    inf_mask_ref = torch.isinf(ref)
    inf_mask_tri = torch.isinf(tri)
    assert torch.equal(
        inf_mask_ref, inf_mask_tri
    ), "Tensor must have same infinite elements"
    refn = torch.where(inf_mask_ref, 0, ref)
    trin = torch.where(inf_mask_tri, 0, tri)

    # normalise so that RMS calculation doesn't overflow:
    eps = 1.0e-30
    multiplier = 1.0 / (torch.max(torch.abs(refn)) + eps)
    refn *= multiplier
    trin *= multiplier

    ref_rms = torch.sqrt(torch.square(refn).mean()) + eps

    rel_err = torch.abs(refn - trin) / torch.maximum(ref_rms, torch.abs(refn))
    max_err = torch.max(rel_err).item()
    rms_err = torch.sqrt(torch.square(rel_err).mean()).item()

    if verbose:
        print(
            "%s maximum relative error = %s (threshold = %s)"
            % (description, max_err, maxtol)
        )
        print(
            "%s RMS relative error = %s (threshold = %s)"
            % (description, rms_err, rmstol)
        )

    if max_err > maxtol:
        bad_idxs = torch.nonzero(rel_err > maxtol)
        num_nonzero = bad_idxs.size(0)
        bad_idxs = bad_idxs[:1000]
        print(
            "%d / %d mismatched elements (shape = %s) at coords %s"
            % (num_nonzero, rel_err.numel(), tuple(rel_err.shape), bad_idxs.tolist())
        )

        bad_idxs = bad_idxs.unbind(-1)
        print("ref values: ", ref[*bad_idxs].cpu())
        print("tri values: ", tri[*bad_idxs].cpu())

    assert max_err <= maxtol
    assert rms_err <= rmstol


def assert_indx_equal(ref, tri):
    assert_equal(ref, tri[: len(ref)])
    assert torch.all(tri[len(ref) :] == -1)


def get_kernel_test_configs(
    BLOCK_SIZE_M = 32,
    BLOCK_SIZE_N = 32,
    BLOCK_SIZE_K = 32,
    num_warps = 4,
    num_stages = 2,
) -> list[KernelConfig]:
    configs_fwd = []
    configs_bwd_dX = []
    configs_bwd_dW = []

    for permute_x in [False, True]:
        for permute_y in [False, True]:
            for use_tma_load_w in [True, False]:
                for use_tma_load_x in [True, False]:
                    for use_tma_store in [True, False]:
                        configs_fwd.append(
                            KernelConfigForward(
                                BLOCK_SIZE_M = BLOCK_SIZE_M,
                                BLOCK_SIZE_N = BLOCK_SIZE_N,
                                BLOCK_SIZE_K = BLOCK_SIZE_K,
                                num_warps = num_warps,
                                num_stages = num_stages,
                                use_tma_load_w = use_tma_load_w,
                                use_tma_load_x = use_tma_load_x,
                                use_tma_store = use_tma_store,
                                permute_x = permute_x,
                                permute_y = permute_y,
                            )
                        )
                        configs_bwd_dX.append(
                            KernelConfigBackward_dX(
                                BLOCK_SIZE_M = BLOCK_SIZE_M,
                                BLOCK_SIZE_N = BLOCK_SIZE_N,
                                BLOCK_SIZE_K = BLOCK_SIZE_K,
                                num_warps = num_warps,
                                num_stages = num_stages,
                                use_tma_load_dy = use_tma_load_x,
                                use_tma_load_w = use_tma_load_w,
                                permute_x = permute_x,
                                permute_y = permute_y,
                                use_tma_store = use_tma_store,
                            )
                        )
                        configs_bwd_dW.append(
                            KernelConfigBackward_dW(
                                BLOCK_SIZE_M = BLOCK_SIZE_M,
                                BLOCK_SIZE_N = BLOCK_SIZE_N,
                                BLOCK_SIZE_K = BLOCK_SIZE_K,
                                num_warps = num_warps,
                                num_stages = num_stages,
                                use_tma_load_dy = use_tma_load_w,
                                use_tma_load_x = use_tma_load_x,
                                permute_x = permute_x,
                                permute_y = permute_y,
                                use_tma_store = use_tma_store,
                            )
                        )
    configs_fwd = prune_kernel_configs_fwd(configs_fwd)
    configs_bwd_dX = prune_kernel_configs_backward_dX(configs_bwd_dX)
    configs_bwd_dW = prune_kernel_configs_backward_dW(configs_bwd_dW)
    return configs_fwd, configs_bwd_dX, configs_bwd_dW


def remove_feature_flags(
    kernel_configs: list[KernelConfig],
    permute_x: bool = True,
    permute_y: bool = True,
    tma_loads: bool = True,
    tma_store: bool = True,
):
    pruned_configs = []
    for config in kernel_configs:
        # Remove permute flags first:
        if permute_x and config.permute_x:
            continue
        if permute_y and config.permute_y:
            continue
        if tma_loads:
            if isinstance(config, KernelConfigForward):
                if config.use_tma_load_w or config.use_tma_load_x:
                    continue
            if isinstance(config, KernelConfigBackward_dX):
                if config.use_tma_load_dy or config.use_tma_load_w:
                    continue
            if isinstance(config, KernelConfigBackward_dW):
                if config.use_tma_load_dy or config.use_tma_load_x:
                    continue
        if tma_store:
            if config.use_tma_store:
                continue
        pruned_configs.append(config)
    return pruned_configs


# Test Configs

TOPK = [1, 4]
NUM_EXPERTS = [4, 16]

TEST_MODEL_SIZES = [
    (32, 32),  # Debug
    (128, 128),  # Small
    (512, 512),  # Medium
]

SMALL_MODEL_CONFIGS = [
    ModelConfig(
        topk = topk,
        num_experts = num_experts,
        hidden_size = model_size[0],
        intermediate_size = model_size[1],
        use_sigmoid = False,
        renormalize = False,
    )
    for topk, num_experts, model_size in itertools.product(
        TOPK, NUM_EXPERTS, TEST_MODEL_SIZES
    )
]
LLAMA_MODEL_CONFIG = ModelConfig(
    topk = 1,
    num_experts = 16,
    hidden_size = 5120,
    intermediate_size = 8192,
    use_sigmoid = True,
    renormalize = False,
)
QWEN_MODEL_CONFIG = ModelConfig(
    topk = 8,
    num_experts = 128,
    hidden_size = 2048,
    intermediate_size = 768,
    use_sigmoid = False,
    renormalize = False,
)

SEQLENS = [128, 1024]
DTYPE = [torch.bfloat16]

DATA_CONFIGS = [
    DataConfig(seq_len = seq_len, dtype = dtype)
    for seq_len, dtype in itertools.product(SEQLENS, DTYPE)
]
KERNEL_CONFIGS_FWD, KERNEL_CONFIGS_BWD_dX, KERNEL_CONFIGS_BWD_dW = (
    get_kernel_test_configs()
)

if __name__ == "__main__":
    print(
        KERNEL_CONFIGS_BWD_dX[0].to_string(
            include_tuning_params = False, include_tma = False
        )
    )


================================================
FILE: unsloth/kernels/moe/tests/moe_utils.py
================================================
# SPDX-License-Identifier: GNU Affero General Public License v3.0
# Copyright 2023-present the Unsloth team. All rights reserved.

from dataclasses import dataclass, fields

import torch
import torch.nn as nn
from huggingface_hub import HfApi
from huggingface_hub.utils import _safetensors
from transformers.models.qwen3_moe.configuration_qwen3_moe import Qwen3MoeConfig
from transformers.models.qwen3_moe.modeling_qwen3_moe import Qwen3MoeSparseMoeBlock

from grouped_gemm.interface import grouped_gemm
from grouped_gemm.kernels.tuning import (
    KernelConfigBackward_dW,
    KernelConfigBackward_dX,
    KernelConfigForward,
)
from grouped_gemm.reference.layers.qwen3_moe import (
    GroupedGEMMResult,
    Qwen3MoeGroupedGEMMBlock,
)
from grouped_gemm.reference.moe_ops import permute, unpermute


def rebind_experts_to_shared_buffer(
    moe_block: Qwen3MoeSparseMoeBlock, config: Qwen3MoeConfig
):
    num_experts = config.num_experts
    hidden_size = config.hidden_size
    interm_size = config.moe_intermediate_size
    device = moe_block.experts[0].down_proj.weight.device
    dtype = moe_block.experts[0].down_proj.weight.dtype

    buffer_up = torch.empty(
        num_experts, interm_size, hidden_size, device = device, dtype = dtype
    )
    buffer_gate = torch.empty(
        num_experts, interm_size, hidden_size, device = device, dtype = dtype
    )
    buffer_down = torch.empty(
        num_experts, hidden_size, interm_size, device = device, dtype = dtype
    )

    # Step 2: Copy existing expert weights into buffers
    for i, expert in enumerate(moe_block.experts):
        buffer_up[i].copy_(expert.up_proj.weight.data)
        buffer_gate[i].copy_(expert.gate_proj.weight.data)
        buffer_down[i].copy_(expert.down_proj.weight.data)

    # Step 3: Rebind expert weights to views in shared buffer
    for i, expert in enumerate(moe_block.experts):
        expert.up_proj.weight = torch.nn.Parameter(buffer_up[i])
        expert.gate_proj.weight = torch.nn.Parameter(buffer_gate[i])
        expert.down_proj.weight = torch.nn.Parameter(buffer_down[i])

    return buffer_up, buffer_gate, buffer_down


def get_expert_metadata(model_id: str):
    api = HfApi()
    metadata: _safetensors.SafetensorsRepoMetadata = api.get_safetensors_metadata(
        model_id
    )
    return metadata.files_metadata


def clone_experts(
    moe_block: Qwen3MoeSparseMoeBlock, config: Qwen3MoeConfig, copy: bool = True
):
    down_projs = torch.empty(
        config.num_experts, config.hidden_size, config.moe_intermediate_size
    )
    up_projs = torch.empty(
        config.num_experts, config.moe_intermediate_size, config.hidden_size
    )
    gate_projs = torch.empty(
        config.num_experts, config.moe_intermediate_size, config.hidden_size
    )
    for expert_idx, expert in enumerate(moe_block.experts):
        down_projs[expert_idx].copy_(expert.down_proj.weight.data)
        up_projs[expert_idx].copy_(expert.up_proj.weight.data)
        gate_projs[expert_idx].copy_(expert.gate_proj.weight.data)
    return gate_projs, up_projs, down_projs


@dataclass
class ForwardResult:
    output: torch.Tensor
    router_logits: torch.Tensor
    X: torch.Tensor
    # When using grouped gemm MoE implementation to additional debugging / checking of intermediate results
    grouped_gemm_result: GroupedGEMMResult = None


@dataclass
class BackwardResult:
    X_grad: torch.Tensor
    gate_grad: torch.Tensor
    gate_proj_grad: torch.Tensor
    up_proj_grad: torch.Tensor
    down_proj_grad: torch.Tensor


def check_down_proj_grad(
    moe_block: Qwen3MoeSparseMoeBlock,
    grouped_gemm_block: Qwen3MoeGroupedGEMMBlock,
    atol: float,
    rtol: float,
):
    for i, expert in enumerate(moe_block.experts):
        ref_grad = expert.down_proj.weight.grad
        assert ref_grad is not None
        test_grad = grouped_gemm_block.down_proj.grad[i]
        assert test_grad is not None
        diff = (ref_grad - test_grad).abs().max()
        if not torch.allclose(ref_grad, test_grad, atol = atol, rtol = rtol):
            print(f"expert {i} down_proj_grad_diff: {diff.detach().cpu().item():.6f}")


def check_gate_up_proj_grad(
    moe_block: Qwen3MoeSparseMoeBlock,
    grouped_gemm_block: Qwen3MoeGroupedGEMMBlock,
    atol: float,
    rtol: float,
):
    moe_intermediate_size = grouped_gemm_block.moe_intermediate_size
    for i, expert in enumerate(moe_block.experts):
        ref_gate_proj_grad = expert.gate_proj.weight.grad
        ref_up_proj_grad = expert.up_proj.weight.grad
        assert ref_gate_proj_grad is not None
        assert ref_up_proj_grad is not None

        # Extract gradients
        test_gate_proj_grad = grouped_gemm_block.gate_up_proj.grad[
            i, :moe_intermediate_size
        ]
        test_up_proj_grad = grouped_gemm_block.gate_up_proj.grad[
            i, moe_intermediate_size:
        ]
        assert test_gate_proj_grad is not None
        assert test_up_proj_grad is not None

        # Sanity check shapes
        assert (
            ref_gate_proj_grad.shape == test_gate_proj_grad.shape
        ), f"{ref_gate_proj_grad.shape} != {test_gate_proj_grad.shape}"
        assert (
            ref_up_proj_grad.shape == test_up_proj_grad.shape
        ), f"{ref_up_proj_grad.shape} != {test_up_proj_grad.shape}"

        # Check gradients
        diff = (ref_gate_proj_grad - test_gate_proj_grad).abs().max()
        if not torch.allclose(
            ref_gate_proj_grad, test_gate_proj_grad, atol = atol, rtol = rtol
        ):
            print(f"expert {i} gate_proj_grad_diff: {diff.detach().cpu().item():.6f}")
        diff = (ref_up_proj_grad - test_up_proj_grad).abs().max()
        if not torch.allclose(
            ref_up_proj_grad, test_up_proj_grad, atol = atol, rtol = rtol
        ):
            print(f"expert {i} up_proj_grad_diff: {diff.detach().cpu().item():.6f}")


def check_gate_grad(
    moe_block: Qwen3MoeSparseMoeBlock,
    grouped_gemm_block: Qwen3MoeGroupedGEMMBlock,
    atol: float,
    rtol: float,
):
    ref_grad = moe_block.gate.weight.grad
    assert ref_grad is not None
    test_grad = grouped_gemm_block.gate.grad
    assert test_grad is not None
    diff = (ref_grad - test_grad).abs().max()
    if not torch.allclose(ref_grad, test_grad, atol = atol, rtol = rtol):
        print(f"gate_grad_diff: {diff.detach().cpu().item():.6f}")


def check_wgrad(
    moe_block: Qwen3MoeSparseMoeBlock,
    grouped_gemm_block: Qwen3MoeGroupedGEMMBlock,
    atol: float,
    rtol: float,
):
    check_down_proj_grad(moe_block, grouped_gemm_block, atol, rtol)
    check_gate_up_proj_grad(moe_block, grouped_gemm_block, atol, rtol)
    check_gate_grad(moe_block, grouped_gemm_block, atol, rtol)


def check_tensor_allclose(
    X_ref: torch.Tensor,
    X_test: torch.Tensor,
    atol: float,
    rtol: float,
    name: str,
    verbose: bool = False,
):
    diff = (X_ref - X_test).abs().max()
    if verbose:
        print(f"{name} diff: {diff.detach().cpu().item():.6f}")
    assert torch.allclose(
        X_ref, X_test, atol = atol, rtol = rtol
    ), f"{name} diff: {diff.detach().cpu().item():.6f}"


def check_expert_grads(
    ref_result: BackwardResult,
    test_result: BackwardResult,
    atol: float,
    rtol: float,
    verbose: bool = False,
):
    fields_to_check = [f.name for f in fields(BackwardResult) if "proj" in f.name]
    assert len(fields_to_check) == 3

    for field in fields_to_check:
        ref_grads = getattr(ref_result, field)
        test_grads = getattr(test_result, field)
        assert (
            ref_grads.shape == test_grads.shape
        ), f"{field}: {ref_grads.shape} != {test_grads.shape}"

        # Test each expert
        for i in range(ref_grads.shape[0]):
            ref_grad = ref_grads[i]
            test_grad = test_grads[i]
            diff = (ref_grad - test_grad).abs().max()
            assert torch.allclose(
                ref_grad, test_grad, atol = atol, rtol = rtol
            ), f"{field}[{i}] diff: {diff.detach().cpu().item():.6f}"

        # Test all experts
        diff = (ref_grads - test_grads).abs().max()
        if verbose:
            print(f"{field} diff: {diff.detach().cpu().item():.6f}")
        assert torch.allclose(
            ref_grads, test_grads, atol = atol, rtol = rtol
        ), f"{field} diff: {diff.detach().cpu().item():.6f}"


def check_grads(
    ref_result: BackwardResult,
    test_result: BackwardResult,
    atol: float,
    rtol: float,
    verbose: bool = False,
):
    check_tensor_allclose(
        ref_result.X_grad, test_result.X_grad, atol, rtol, "X.grad", verbose
    )
    check_tensor_allclose(
        ref_result.gate_grad, test_result.gate_grad, atol, rtol, "gate.grad", verbose
    )
    check_expert_grads(ref_result, test_result, atol, rtol, verbose)


def check_fwd(
    ref_result: ForwardResult,
    test_result: ForwardResult,
    atol: float,
    rtol: float,
    verbose: bool = False,
):
    # First check hidden states (output)
    ref_output = ref_result.output
    test_output = test_result.output
    diff = (ref_output - test_output).abs().max()
    if verbose:
        print(f"output diff: {diff.detach().cpu().item():.6f}")
    assert torch.allclose(
        ref_output, test_output, atol = atol, rtol = rtol
    ), f"output diff: {diff.detach().cpu().item():.6f}"

    # Check router logits
    ref_router_logits = ref_result.router_logits
    test_router_logits = test_result.router_logits
    diff = (ref_router_logits - test_router_logits).abs().max()
    if verbose:
        print(f"router_logits diff: {diff.detach().cpu().item():.6f}")
    assert torch.allclose(
        ref_router_logits, test_router_logits, atol = atol, rtol = rtol
    ), f"router_logits diff: {diff.detach().cpu().item():.6f}"


def check_grouped_gemm_results(
    grouped_result: GroupedGEMMResult,
    fused_result: GroupedGEMMResult,
    permute_y: bool,
    atol: float,
    rtol: float,
    verbose: bool = False,
):
    for field in fields(GroupedGEMMResult):
        ref_value = getattr(grouped_result, field.name)
        test_value = getattr(fused_result, field.name)
        diff = (ref_value - test_value).abs().max()

        # second_gemm in torch grouped gemm is not yet unpermuted so comparing the fused unpermuted second_gemm will result in error
        # instead the hidden_states_unpermute should match since hidden_states_unpermute for the fused result is the same as second_gemm
        if field.name == "second_gemm" and permute_y:
            continue

        if verbose:
            print(f"{field.name} diff: {diff.detach().cpu().item():.6f}")

        assert torch.allclose(
            ref_value, test_value, atol = atol, rtol = rtol
        ), f"{field.name} diff: {diff.detach().cpu().item():.6f}"


def run_forward(model: nn.Module, X: torch.Tensor, is_grouped_gemm: bool = False):
    X = X.detach().clone().requires_grad_(True)
    output, router_logits = model(X)
    if is_grouped_gemm:
        result = ForwardResult(
            output = output.hidden_states,
            router_logits = router_logits,
            X = X,
            grouped_gemm_result = output,
        )
    else:
        result = ForwardResult(output = output, router_logits = router_logits, X = X)
    return result


def run_backward(
    model: nn.Module, grad_output: torch.Tensor, output: torch.Tensor, X: torch.Tensor
):
    output.backward(grad_output)
    assert X.grad is not None
    for name, param in model.named_parameters():
        assert param.grad is not None, f"{name} grad is None"
    if isinstance(model, Qwen3MoeSparseMoeBlock):
        gate_grad = model.gate.weight.grad
        gate_proj_grad = torch.stack(
            [expert.gate_proj.weight.grad for expert in model.experts]
        )
        up_proj_grad = torch.stack(
            [expert.up_proj.weight.grad for expert in model.experts]
        )
        down_proj_grad = torch.stack(
            [expert.down_proj.weight.grad for expert in model.experts]
        )
    elif isinstance(model, Qwen3MoeGroupedGEMMBlock):
        gate_grad = model.gate.grad
        gate_proj_grad, up_proj_grad = model.gate_up_proj.grad.chunk(2, dim = 1)
        down_proj_grad = model.down_proj.grad
    else:
        raise ValueError(f"Unsupported model type: {type(model)}")
    return BackwardResult(
        X_grad = X.grad,
        gate_grad = gate_grad,
        gate_proj_grad = gate_proj_grad,
        up_proj_grad = up_proj_grad,
        down_proj_grad = down_proj_grad,
    )


class Qwen3MoeFusedGroupedGEMMBlock(Qwen3MoeGroupedGEMMBlock):
    """
    Reference implementation of MoE block using grouped gemm.

    This is the same as the Qwen3MoeGroupedGEMMBlock but with triton grouped gemm in place of torch-native grouped gemm implementation.

    NOTE: This is NOT to be used for production as it contains many extra checks and saves all intermediate results for debugging.
    See grouped_gemm/reference/moe_block.py for a cleaner implementation.
    """

    def __init__(
        self,
        config: Qwen3MoeConfig,
        gate: torch.Tensor,
        gate_up_proj: torch.Tensor,
        down_proj: torch.Tensor,
        permute_x: bool = False,
        permute_y: bool = False,
        autotune: bool = True,
        kernel_config_fwd: KernelConfigForward = None,
        kernel_config_bwd_dW: KernelConfigBackward_dW = None,
        kernel_config_bwd_dX: KernelConfigBackward_dX = None,
    ):
        super().__init__(config, gate, gate_up_proj, down_proj)
        self.permute_x = permute_x
        self.permute_y = permute_y
        self.autotune = autotune
        if not autotune:
            assert (
                kernel_config_fwd is not None
                and kernel_config_bwd_dW is not None
                and kernel_config_bwd_dX is not None
            ), "Kernel configs must be provided if autotune is False"
        self.kernel_config_fwd = kernel_config_fwd
        self.kernel_config_bwd_dW = kernel_config_bwd_dW
        self.kernel_config_bwd_dX = kernel_config_bwd_dX

    @classmethod
    def from_hf(
        cls,
        moe_block: Qwen3MoeSparseMoeBlock,
        permute_x: bool = False,
        permute_y: bool = False,
        autotune: bool = True,
        kernel_config_fwd: KernelConfigForward = None,
        kernel_config_bwd_dW: KernelConfigBackward_dW = None,
        kernel_config_bwd_dX: KernelConfigBackward_dX = None,
    ):
        config: Qwen3MoeConfig = moe_block.experts[0].config
        gate, gate_up_proj, down_proj = Qwen3MoeGroupedGEMMBlock.extract_hf_weights(
            moe_block
        )
        return cls(
            config,
            gate,
            gate_up_proj,
            down_proj,
            permute_x = permute_x,
            permute_y = permute_y,
            autotune = autotune,
            kernel_config_fwd = kernel_config_fwd,
            kernel_config_bwd_dW = kernel_config_bwd_dW,
            kernel_config_bwd_dX = kernel_config_bwd_dX,
        )

    def forward(self, hidden_states: torch.Tensor, debug: bool = False) -> torch.Tensor:
        batch_size, sequence_length, hidden_dim = hidden_states.shape
        num_tokens = batch_size * sequence_length
        total_tokens = num_tokens * self.top_k

        hidden_states = hidden_states.view(-1, hidden_dim)

        router_logits, routing_weights, selected_experts = self.run_router(
            hidden_states
        )
        # Pre-processing
        # 1. Compute tokens per expert and indices for gathering tokes from token order to expert order
        # NOTE: these are auxiliary data structs which don't need to be recorded in autograd graph
        token_counts_by_expert, gather_indices = (
            self.get_token_counts_and_gather_indices(selected_experts)
        )

        # 2. permute_x -> permutation will be fused in prologue of first grouped gemm
        if not self.permute_x:
            hidden_states = permute(hidden_states, gather_indices, self.top_k)
            assert hidden_states.shape == (total_tokens, hidden_dim)

        # Start expert computation
        first_gemm = grouped_gemm(
            X = hidden_states,
            W = self.gate_up_proj,
            m_sizes = token_counts_by_expert,
            gather_indices = gather_indices,
            topk = self.top_k,
            permute_x = self.permute_x,
            permute_y = False,  # output of first grouped gemm should never be permuted
            autotune = self.autotune,
            kernel_config_fwd = self.kernel_config_fwd,
            kernel_config_bwd_dW = self.kernel_config_bwd_dW,
            kernel_config_bwd_dX = self.kernel_config_bwd_dX,
            is_first_gemm = True,
        )
        assert first_gemm.shape == (total_tokens, 2 * self.moe_intermediate_size)
        intermediate = self.act_and_mul(first_gemm)
        assert intermediate.shape == (total_tokens, self.moe_intermediate_size)
        second_gemm = grouped_gemm(
            X = intermediate,
            W = self.down_proj,
            m_sizes = token_counts_by_expert,
            gather_indices = gather_indices,
            topk = self.top_k,
            permute_x = False,
            permute_y = self.permute_y,
            autotune = self.autotune,
            kernel_config_fwd = self.kernel_config_fwd,
            kernel_config_bwd_dW = self.kernel_config_bwd_dW,
            kernel_config_bwd_dX = self.kernel_config_bwd_dX,
            is_first_gemm = False,
        )
        assert second_gemm.shape == (total_tokens, hidden_dim)

        # Post-processing
        # 1. Unpermute from expert order to token order
        if not self.permute_y:
            hidden_states_unpermute = unpermute(second_gemm, gather_indices)
            assert hidden_states_unpermute.shape == (total_tokens, hidden_dim)
        else:
            hidden_states_unpermute = second_gemm

        # 2. Merge topk weights
        hidden_states = (
            hidden_states_unpermute.view(num_tokens, self.top_k, hidden_dim)
            * routing_weights[..., None]
        )
        hidden_states = hidden_states.sum(dim = 1)
        assert hidden_states.shape == (num_tokens, hidden_dim)

        hidden_states = hidden_states.view(batch_size, sequence_length, hidden_dim)
        return GroupedGEMMResult(
            token_counts_by_expert = token_counts_by_expert,
            gather_indices = gather_indices,
            topk_weights = routing_weights,
            first_gemm = first_gemm,
            intermediate = intermediate,
            second_gemm = second_gemm,
            hidden_states_unpermute = hidden_states_unpermute,
            hidden_states = hidden_states,
        ), router_logits


================================================
FILE: unsloth/kernels/moe/tests/run_qwen3_moe_tests.sh
================================================
#!/bin/bash

set -euo pipefail

SEQLENS=(1024)  
DTYPES=(bfloat16)
PERMUTE_X=(false true)
PERMUTE_Y=(false true)
AUTOTUNE=(false true)

for SEQLEN in "${SEQLENS[@]}"; do
    for DTYPE in "${DTYPES[@]}"; do
        for PX in "${PERMUTE_X[@]}"; do
            for PY in "${PERMUTE_Y[@]}"; do
                for AT in "${AUTOTUNE[@]}"; do

                    ARGS=()
                    [[ "$PX" == "true" ]] && ARGS+=("--permute_x")
                    [[ "$PY" == "true" ]] && ARGS+=("--permute_y")
                    [[ "$AT" == "true" ]] && ARGS+=("--autotune")

                    ARGS+=(--seqlen "$SEQLEN" --dtype "$DTYPE")

                    echo "Running with args: ${ARGS[*]}"
                    if ! python -m tests.test_qwen3_moe "${ARGS[@]}"; then
                        echo "❌ Test failed with args: --permute_x=$PX --permute_y=$PY --autotune=$AT --seqlen=$SEQLEN --dtype=$DTYPE" >&2
                    else
                        echo "✅ Test passed with args: --permute_x=$PX --permute_y=$PY --autotune=$AT --seqlen=$SEQLEN --dtype=$DTYPE"
                    fi

                done
            done
        done
    done
done


================================================
FILE: unsloth/kernels/moe/tests/test_grouped_gemm.py
================================================
# SPDX-License-Identifier: GNU Affero General Public License v3.0
# Copyright 2023-present the Unsloth team. All rights reserved.

from dataclasses import asdict

import pytest
import torch

from grouped_gemm.interface import (
    grouped_gemm,
    grouped_gemm_dW,
    grouped_gemm_dX,
    grouped_gemm_forward,
)
from grouped_gemm.kernels.tuning import (
    KernelConfig,
    KernelConfigBackward_dW,
    KernelConfigBackward_dX,
    KernelConfigForward,
)
from grouped_gemm.reference.moe_ops import (
    calculate_topk,
    get_routing_indices,
    permute,
    torch_grouped_gemm,
    unpermute,
)

from .common import (
    DATA_CONFIGS,
    KERNEL_CONFIGS_FWD,
    LLAMA_MODEL_CONFIG,
    QWEN_MODEL_CONFIG,
    SMALL_MODEL_CONFIGS,
    TOLERANCE,
    DataConfig,
    KERNEL_CONFIGS_BWD_dW,
    KERNEL_CONFIGS_BWD_dX,
    ModelConfig,
    make_inputs,
)

SEED = 0


# Only certain combinations of permute_x, permute_y, use_W1 are valid.
# use_W1 => first grouped GEMM in a fused MoE MLP
# use_W2 => second grouped GEMM in a fused MoE MLP
# permute_x => permute the input to the grouped GEMM, only done for the first grouped GEMM
# permute_y => permute the output of the grouped GEMM, only done for the second grouped GEMM
# fuse_mul_post => fuse the multiplication of topk weights in the epilogue of the second grouped GEMM; only used for inference, not currently tested
def check_valid_config(
    permute_x, permute_y, use_W1, fuse_mul_post = False, is_backward = False, verbose = False
):
    use_W2 = not use_W1

    if permute_x and permute_y:
        if verbose:
            print(f"Skipping test: {permute_x = } {permute_y = }")
        return False
    if use_W2 and permute_x:
        if verbose:
            print(f"Skipping test: {permute_x = } {use_W2 = }")
        return False
    if use_W1 and permute_y:
        if verbose:
            print(f"Skipping test: {permute_y = } {use_W1 = }")
        return False
    if fuse_mul_post and use_W1:
        if verbose:
            print(f"Skipping test: {fuse_mul_post = } {use_W1 = }")
        return False
    if is_backward and fuse_mul_post:
        if verbose:
            print(f"Skipping test: {fuse_mul_post = } {is_backward = }")
        return False

    return True


"""
grouped_gemm_forward

permute_x: typically in a fused MoE MLP, we can fuse the permutation of hidden states (X) from token order to expert grouped order needed for grouped GEMM by directly loading X in permuted order rather than launching a separate permutation kernel.
permute_y: We can also fuse the unpermutation of tokens after the second grouped GEMM to restore to original token order.  This is fused into the second grouped GEMM by directly storing the output in unpermuted order.
fuse_mul: We can also fuse the multiplication of topk weights in the epilogue of the second grouped GEMM.  Note that this is only supported for inference and not training, although this may change in the future.
use_W1 test the shapes for the first grouped GEMM in a fused MoE MLP
use_W2 = `not use_W1` tests the shapes for the second grouped GEMM in a fused MoE MLP

Given the above, only certain combinations are valid:
- use_W1 is always False when permute_y is True since we only permute the second grouped GEMM
- use_W2 is always False when permute_x is True since we only permute the first grouped GEMM
- only one of permute_x and permute_y can be True
- fuse_mul is only True if permute_y is also True

See `check_valid_config` for more details.
"""


def _test_grouped_gemm_forward(
    data_config: DataConfig,
    model_config: ModelConfig,
    permute_x: bool,
    permute_y: bool,
    use_W1: bool,  # W1 -> first grouped GEMM in a fused MoE MLP, not W1 -> second grouped GEMM in a fused MoE MLP
    fuse_mul_post: bool = False,
    flatten: bool = True,
    # Manually tuned parameters
    use_tma_load_w: bool = False,
    use_tma_load_x: bool = False,
    use_tma_store: bool = False,
    BLOCK_SIZE_M: int = None,
    BLOCK_SIZE_N: int = None,
    BLOCK_SIZE_K: int = None,
    num_warps: int = None,
    num_stages: int = None,
    # Autotuning parameters
    autotune: bool = False,
    num_autotune_configs: int = None,
    # Flag to manually enable TMA store
    allow_tma_store: bool = False,
    use_autograd: bool = False,
):
    if not check_valid_config(
        permute_x, permute_y, use_W1 = use_W1, fuse_mul_post = fuse_mul_post
    ):
        pytest.skip(
            f"Skipping test due to invalid config: {permute_x = } {permute_y = } {use_W1 = } {fuse_mul_post = }"
        )

    if use_tma_store and not allow_tma_store:
        pytest.skip("TMA store needs to be debugged due to non-deterministic behavior")

    X1, X2, W1, W2, gating_output = make_inputs(
        M = data_config.bs * data_config.seq_len,
        N = model_config.intermediate_size,
        K = model_config.hidden_size,
        E = model_config.num_experts,
        topk = model_config.topk,
        dtype = data_config.dtype,
    )
    topk = model_config.topk
    use_sigmoid = model_config.use_sigmoid
    renormalize = model_config.renormalize

    X = X1 if use_W1 else X2
    num_tokens = data_config.bs * data_config.seq_len
    E, K, N = W2.shape  # E = num_experts, K = hidden_size, N = intermediate_size
    assert W1.shape == (E, 2 * N, K)
    W = W1 if use_W1 else W2

    if use_W1:
        assert X.shape == (
            num_tokens,
            K,
        ), f"X.shape: {X.shape}, num_tokens: {num_tokens}, K: {K}"
    else:
        assert X.shape == (
            num_tokens * topk,
            N,
        ), f"X.shape: {X.shape}, num_tokens: {num_tokens}, topk: {topk}, N: {N}"

    total_tokens = num_tokens * topk
    output_shape = (total_tokens, 2 * N) if use_W1 else (total_tokens, K)

    topk_weights, topk_ids = calculate_topk(
        gating_output, topk, use_sigmoid = use_sigmoid, renormalize = renormalize
    )
    topk_weights = topk_weights.view(-1)  # num_tokens * topk
    topk_ids = topk_ids.view(-1)  # num_tokens * topk

    expert_token_counts, gather_indices = get_routing_indices(topk_ids, num_experts = E)
    assert len(gather_indices) == total_tokens
    assert len(expert_token_counts) == E

    atol, rtol = TOLERANCE[X.dtype]

    Xperm = permute(X, gather_indices, topk)

    Xref = Xperm

    assert (
        Xperm.shape == (total_tokens, K) if use_W1 else (total_tokens, N)
    ), f"Xperm.shape: {Xperm.shape}, total_tokens: {total_tokens}, K: {K}"

    ref_output = torch_grouped_gemm(X = Xref, W = W, m_sizes = expert_token_counts)

    if permute_x:
        X_test = X
    else:
        X_test = Xperm

    # No need to run all configs for tests, otherwise takes too long
    if autotune:
        from grouped_gemm.kernels.forward import _autotuned_grouped_gemm_forward_kernel

        if num_autotune_configs is not None:
            _autotuned_grouped_gemm_forward_kernel.configs = (
                _autotuned_grouped_gemm_forward_kernel.configs[:num_autotune_configs]
            )

    # Use autograd.Function interface
    if use_autograd:
        from grouped_gemm.interface import grouped_gemm

        kernel_config_fwd = KernelConfigForward(
            BLOCK_SIZE_M = BLOCK_SIZE_M,
            BLOCK_SIZE_N = BLOCK_SIZE_N,
            BLOCK_SIZE_K = BLOCK_SIZE_K,
            num_warps = num_warps,
            num_stages = num_stages,
            permute_x = permute_x,
            permute_y = permute_y,
            fuse_mul_post = fuse_mul_post,
            use_tma_load_w = use_tma_load_w,
            use_tma_load_x = use_tma_load_x,
            use_tma_store = use_tma_store,
        )

        test_output = grouped_gemm(
            X = X_test,
            W = W,
            topk = topk,
            m_sizes = expert_token_counts,
            gather_indices = gather_indices,
            topk_weights = topk_weights if fuse_mul_post else None,
            permute_x = permute_x,
            permute_y = permute_y,
            fuse_mul_post = fuse_mul_post,
            kernel_config_fwd = kernel_config_fwd,
            autotune = autotune,
            is_first_gemm = use_W1,
        )
    # Use manual interface
    else:
        test_output = grouped_gemm_forward(
            X = X_test,
            W = W,
            topk = topk,
            m_sizes = expert_token_counts,
            gather_indices = gather_indices,
            topk_weights = topk_weights if fuse_mul_post else None,
            permute_x = permute_x,
            permute_y = permute_y,
            fuse_mul_post = fuse_mul_post,
            use_tma_load_w = use_tma_load_w,
            use_tma_load_x = use_tma_load_x,
            use_tma_store = use_tma_store,
            autotune = autotune,
            BLOCK_SIZE_M = BLOCK_SIZE_M,
            BLOCK_SIZE_N = BLOCK_SIZE_N,
            BLOCK_SIZE_K = BLOCK_SIZE_K,
            num_warps = num_warps,
            num_stages = num_stages,
            flatten = flatten,
        )
    assert ref_output.shape == output_shape
    assert test_output.shape == output_shape

    if permute_y:
        ref_output = unpermute(ref_output, gather_indices)
    if fuse_mul_post:
        # if we don't permute_y, then test output is permuted with topk weights applied
        # the ref output needs to be unpermuted before multiplying by topk weights since topk weights are in token order
        if not permute_y:
            ref_output = unpermute(ref_output, gather_indices)
            test_output = unpermute(test_output, gather_indices)
        ref_output = ref_output * topk_weights[:, None]

    assert torch.allclose(
        ref_output, test_output, atol = atol, rtol = rtol
    ), f"Grouped gemm forward failed: {(ref_output - test_output).abs().max().item():.6f}"


# NOTE: Fuse multiplication of topk weights is only supported for inference and not training, although this may change in the future; not currently tested.
@pytest.mark.parametrize(
    "kernel_config",
    KERNEL_CONFIGS_FWD,
    ids = lambda x: x.to_string(include_tuning_params = True, include_tma = True),
)
@pytest.mark.parametrize(
    "model_config",
    SMALL_MODEL_CONFIGS + [QWEN_MODEL_CONFIG, LLAMA_MODEL_CONFIG],
    ids = lambda x: f"topk={x.topk} num_experts={x.num_experts} hidden_size={x.hidden_size} intermediate_size={x.intermediate_size}",
)
@pytest.mark.parametrize(
    "data_config", DATA_CONFIGS, ids = lambda x: f"seq_len={x.seq_len} dtype={x.dtype}"
)
@pytest.mark.parametrize("use_W1", [True, False], ids = lambda x: f"use_W1={x}")
def test_grouped_gemm_forward_manual(
    data_config: DataConfig,
    model_config: ModelConfig,
    kernel_config: KernelConfigForward,
    use_W1: bool,
):
    _test_grouped_gemm_forward(
        data_config = data_config,
        model_config = model_config,
        use_W1 = use_W1,
        **asdict(kernel_config),
    )


@pytest.mark.parametrize(
    "kernel_config",
    KERNEL_CONFIGS_FWD,
    ids = lambda x: x.to_string(include_tuning_params = True, include_tma = True),
)
@pytest.mark.parametrize(
    "model_config",
    SMALL_MODEL_CONFIGS + [QWEN_MODEL_CONFIG, LLAMA_MODEL_CONFIG],
    ids = lambda x: f"topk={x.topk} num_experts={x.num_experts} hidden_size={x.hidden_size} intermediate_size={x.intermediate_size}",
)
@pytest.mark.parametrize(
    "data_config", DATA_CONFIGS, ids = lambda x: f"seq_len={x.seq_len} dtype={x.dtype}"
)
@pytest.mark.parametrize("use_W1", [True, False], ids = lambda x: f"use_W1={x}")
def test_grouped_gemm_forward_manual_autograd(
    data_config: DataConfig,
    model_config: ModelConfig,
    kernel_config: KernelConfigForward,
    use_W1: bool,
):
    _test_grouped_gemm_forward(
        data_config = data_config,
        model_config = model_config,
        use_W1 = use_W1,
        use_autograd = True,
        **asdict(kernel_config),
    )


@pytest.mark.parametrize(
    "num_autotune_configs", [10], ids = lambda x: f"num_autotune_configs={x}"
)
@pytest.mark.parametrize(
    "permute_x", [True, False], ids = lambda x: "permute_x" if x else ""
)
@pytest.mark.parametrize(
    "permute_y", [True, False], ids = lambda x: "permute_y" if x else ""
)
@pytest.mark.parametrize(
    "model_config",
    [LLAMA_MODEL_CONFIG, QWEN_MODEL_CONFIG],
    ids = lambda x: f"topk={x.topk} num_experts={x.num_experts} hidden_size={x.hidden_size} intermediate_size={x.intermediate_size}",
)
@pytest.mark.parametrize(
    "data_config", DATA_CONFIGS, ids = lambda x: f"seq_len={x.seq_len} dtype={x.dtype}"
)
@pytest.mark.parametrize("use_W1", [True, False], ids = lambda x: f"use_W1={x}")
def test_grouped_gemm_forward_autotune(
    data_config: DataConfig,
    model_config: ModelConfig,
    permute_x: bool,
    permute_y: bool,
    use_W1: bool,
    num_autotune_configs: int,
):
    _test_grouped_gemm_forward(
        data_config = data_config,
        model_config = model_config,
        permute_x = permute_x,
        permute_y = permute_y,
        use_W1 = use_W1,
        num_autotune_configs = num_autotune_configs,
        autotune = True,
        use_autograd = False,
    )


@pytest.mark.parametrize(
    "num_autotune_configs", [10], ids = lambda x: f"num_autotune_configs={x}"
)
@pytest.mark.parametrize(
    "permute_x", [True, False], ids = lambda x: "permute_x" if x else ""
)
@pytest.mark.parametrize(
    "permute_y", [True, False], ids = lambda x: "permute_y" if x else ""
)
@pytest.mark.parametrize(
    "model_config",
    [LLAMA_MODEL_CONFIG, QWEN_MODEL_CONFIG],
    ids = lambda x: f"topk={x.topk} num_experts={x.num_experts} hidden_size={x.hidden_size} intermediate_size={x.intermediate_size}",
)
@pytest.mark.parametrize(
    "data_config", DATA_CONFIGS, ids = lambda x: f"seq_len={x.seq_len} dtype={x.dtype}"
)
@pytest.mark.parametrize("use_W1", [True, False], ids = lambda x: f"use_W1={x}")
def test_grouped_gemm_forward_autotune_autograd(
    data_config: DataConfig,
    model_config: ModelConfig,
    permute_x: bool,
    permute_y: bool,
    use_W1: bool,
    num_autotune_configs: int,
):
    _test_grouped_gemm_forward(
        data_config = data_config,
        model_config = model_config,
        permute_x = permute_x,
        permute_y = permute_y,
        use_W1 = use_W1,
        num_autotune_configs = num_autotune_configs,
        autotune = True,
        use_autograd = True,
    )


"""
grouped_gemm_backward_dX

use_W1 test the shapes for the first grouped GEMM in a fused MoE MLP
use_W2 = `not use_W1` tests the shapes for the second grouped GEMM in a fused MoE MLP

Only certain combinations of permute_x, permute_y, and fuse_mul are supported.

Typically in a fused MoE MLP, we can fuse the permutation of hidden states (X) from token order to expert grouped order needed for grouped GEMM by directly loading X in permuted order rather than launching a separate permutation kernel.
We can also fuse the unpermutation of tokens after the second grouped GEMM to restore to original token order.  This is fused into the second grouped GEMM by directly storing the output in unpermuted order.

Hence the following conditions:
- If use_W1 there are two cases:
    - permute_x is False and topk > 1:
    - dX_test is still in permuted order and has shape (total_tokens, K)
    - it needs to be unpermuted and summed across topk before comparing to ref_grad
- permute_x is True:
    - dX_test is already unpermuted and summed across topk with shape (num_tokens, K)
    - no further processing is needed
- permute_x is False and topk == 1:
    - dX_test needs to be permuted, no need to sum since topk == 1

- If use_W2:
    - permute_x is always False
    - if permute_y:
        - grad_output needs to be unpermuted before passing to grouped_gemm_dX
        - dX_test is permuted and has shape (total_tokens, N)
        - it needs to be unpermuted before comparing to ref_grad or can be compared directly to Xperm.grad
    - if not permute_y:
        - dX_test is not permuted and has shape (total_tokens, N)
        - no further processing is needed
"""


def _test_grouped_gemm_backward_dX(
    data_config: DataConfig,
    model_config: ModelConfig,
    permute_x: bool = False,
    permute_y: bool = False,
    use_tma_load_dy: bool = False,
    use_tma_load_w: bool = False,
    use_tma_store: bool = False,
    use_W1: bool = True,
    autotune: bool = False,
    num_autotune_configs: int = None,
    BLOCK_SIZE_M: int = None,
    BLOCK_SIZE_N: int = None,
    BLOCK_SIZE_K: int = None,
    num_warps: int = None,
    num_stages: int = None,
    flatten: bool = True,
    allow_tma_store: bool = False,
    use_autograd: bool = False,
    fuse_mul_post: bool = False,
):
    if not check_valid_config(permute_x, permute_y, use_W1 = use_W1, is_backward = True):
        pytest.skip(
            f"Skipping test due to invalid config: {permute_x = } {permute_y = } {use_W1 = }"
        )

    if use_tma_store and not allow_tma_store:
        pytest.skip("TMA store needs to be debugged due to non-deterministic behavior")

    if (
        autotune
        and model_config.intermediate_size <= 128
        and model_config.hidden_size <= 128
    ):
        pytest.skip("Skipping autotuning for small model configs")

    # Prevent OOM for large intermediate sizes
    if model_config.intermediate_size > 2048:
        model_config.intermediate_size = 1024
    if model_config.hidden_size > 2048:
        model_config.hidden_size = 1024

    use_W2 = not use_W1
    X1, X2, W1, W2, gating_output = make_inputs(
        M = data_config.bs * data_config.seq_len,
        N = model_config.intermediate_size,
        K = model_config.hidden_size,
        E = model_config.num_experts,
        topk = model_config.topk,
        dtype = data_config.dtype,
        requires_grad = True,
    )
    topk = model_config.topk
    num_experts = model_config.num_experts
    use_sigmoid = model_config.use_sigmoid
    renormalize = model_config.renormalize

    X = X1 if use_W1 else X2
    num_tokens = data_config.bs * data_config.seq_len
    total_tokens = num_tokens * topk

    E, K, N = W2.shape  # E = num_experts, K = hidden_size, N = intermediate_size
    assert W1.shape == (E, 2 * N, K)
    W = W1 if use_W1 else W2

    if use_W1:
        assert X.shape == (
            num_tokens,
            K,
        ), f"X.shape: {X.shape}, num_tokens: {num_tokens}, K: {K}"
    else:
        assert X.shape == (
            total_tokens,
            N,
        ), f"X.shape: {X.shape}, total_tokens: {total_tokens}, N: {N}"

    W_test = W.detach().clone().requires_grad_(True)

    topk_weights, topk_ids = calculate_topk(
        gating_output, topk, use_sigmoid = use_sigmoid, renormalize = renormalize
    )
    topk_weights = topk_weights.view(-1)  # num_tokens * topk
    topk_ids = topk_ids.view(-1)  # num_tokens * topk

    expert_token_counts, gather_indices = get_routing_indices(topk_ids, num_experts = E)
    assert len(gather_indices) == total_tokens
    assert len(expert_token_counts) == num_experts

    atol, rtol = TOLERANCE[X.dtype]
    Xperm = permute(X, gather_indices, topk)

    # Need to retain grad otherwise grad is not propagated
    X.retain_grad()
    W.retain_grad()
    Xperm.retain_grad()

    assert Xperm.shape == (total_tokens, K) if use_W1 else (total_tokens, N)

    output_shape = (total_tokens, 2 * N) if use_W1 else (total_tokens, K)
    ref_output = torch_grouped_gemm(X = Xperm, W = W, m_sizes = expert_token_counts)
    assert (
        ref_output.shape == output_shape
    ), f"ref_output.shape: {ref_output.shape}, output_shape: {output_shape}"

    if permute_y:
        ref_output = unpermute(ref_output, gather_indices)

    grad_output = torch.randn_like(ref_output)
    ref_output.backward(grad_output)

    assert X.grad is not None
    assert W.grad is not None

    ref_grad = Xperm.grad

    if autotune:
        # No need to run all configs for autotuning
        from grouped_gemm.kernels.backward import _autotuned_grouped_gemm_dX_kernel

        if num_autotune_configs is not None:
            _autotuned_grouped_gemm_dX_kernel.configs = (
                _autotuned_grouped_gemm_dX_kernel.configs[:num_autotune_configs]
            )

    if use_autograd:
        from grouped_gemm.interface import grouped_gemm

        if not autotune:
            kernel_config_fwd = KernelConfigForward()
            kernel_config_bwd_dX = KernelConfigBackward_dX(
                use_tma_load_dy = use_tma_load_dy,
                use_tma_load_w = use_tma_load_w,
                use_tma_store = use_tma_store,
                BLOCK_SIZE_M = BLOCK_SIZE_M,
                BLOCK_SIZE_N = BLOCK_SIZE_N,
                BLOCK_SIZE_K = BLOCK_SIZE_K,
                num_warps = num_warps,
                num_stages = num_stages,
            )
            kernel_config_bwd_dW = KernelConfigBackward_dW()
        else:
            from grouped_gemm.kernels.backward import (
                _autotuned_grouped_gemm_dW_kernel,
                _autotuned_grouped_gemm_dX_kernel,
            )
            from grouped_gemm.kernels.forward import (
                _autotuned_grouped_gemm_forward_kernel,
            )

            if num_autotune_configs is not None:
                _autotuned_grouped_gemm_dX_kernel.configs = (
                    _autotuned_grouped_gemm_dX_kernel.configs[:num_autotune_configs]
                )
                _autotuned_grouped_gemm_forward_kernel.configs = (
                    _autotuned_grouped_gemm_forward_kernel.configs[
                        :num_autotune_configs
                    ]
                )

            kernel_config_fwd = None
            kernel_config_bwd_dX = None
        X_ = (
            X.detach().clone().requires_grad_(True)
            if permute_x
            else Xperm.detach().clone().requires_grad_(True)
        )
        test_output = grouped_gemm(
            X = X_,
            W = W_test,
            m_sizes = expert_token_counts,
            gather_indices = gather_indices,
            topk = topk,
            permute_x = permute_x,
            permute_y = permute_y,
            autotune = autotune,
            kernel_config_fwd = kernel_config_fwd,
            kernel_config_bwd_dX = kernel_config_bwd_dX,
            is_first_gemm = use_W1,
            dX_only = True,
        )
        assert (
            test_output.shape == ref_output.shape
        ), f"test_output.shape: {test_output.shape}, ref_output.shape: {ref_output.shape}"
        assert torch.allclose(
            test_output, ref_output, atol = atol, rtol = rtol
        ), f"Grouped gemm backward_dX forward outputs mismatch: {(test_output - ref_output).abs().max().item():.6f}"
        test_output.backward(grad_output)
        assert X_.grad is not None

        # NOTE:need to handle grad differenlty in this case due to errors arising to do how torch autograd handles unpermute and sum reduction
        # the grad of Xperm unpermuted and reduced across topk should match X_.grad
        # However, both will have a numerical difference with that of ref_grad
        # This is due to the fact that torch autograd handles unpermute and sum reduction differently see: https://discuss.pytorch.org/t/permute-unpermute-gradient/219557    else:
        if permute_x and use_W1:
            X_grad_unperm = unpermute(Xperm.grad, gather_indices)
            manual_grad_check = X_grad_unperm.view(num_tokens, topk, K).sum(dim = 1)
            assert (
                manual_grad_check.shape == X_.grad.shape
            ), f"manual_grad_check.shape: {manual_grad_check.shape}, X_.grad.shape: {X_.grad.shape}"
            assert torch.allclose(
                manual_grad_check, X_.grad, atol = atol, rtol = rtol
            ), f"Grouped gemm backward_dX forward outputs mismatch: {(manual_grad_check - X_.grad).abs().max().item():.6f}"
            manual_diff = (X_.grad - manual_grad_check).abs().max().item()
            autograd_diff = (X_.grad - X.grad).abs().max().item()
            print(f"manual_diff: {manual_diff:.6f}, autograd_diff: {autograd_diff:.6f}")
        else:
            assert torch.allclose(
                X_.grad, ref_grad, atol = atol, rtol = rtol
            ), f"Grouped gemm backward_dX forward outputs mismatch: {(X_.grad - ref_grad).abs().max().item():.6f}"
        return
    else:
        dX_test = grouped_gemm_dX(
            dY = grad_output,
            W = W_test,
            gather_indices = gather_indices,
            m_sizes = expert_token_counts,
            topk = topk,
            permute_x = permute_x,
            permute_y = permute_y,
            use_tma_load_w = use_tma_load_w,
            use_tma_load_dy = use_tma_load_dy,
            use_tma_store = use_tma_store,
            autotune = autotune,
            BLOCK_SIZE_M = BLOCK_SIZE_M,
            BLOCK_SIZE_N = BLOCK_SIZE_N,
            BLOCK_SIZE_K = BLOCK_SIZE_K,
            num_warps = num_warps,
            num_stages = num_stages,
            flatten = flatten,
            # debug=True,
        )

    # if permute_x and use_W1 (first grouped GEMM) then the kernel should have unpermuted the dX
    # therefore we need to unpermute the ref_grad to compare to the output of the kernel
    if permute_x and use_W1:
        ref_grad = unpermute(ref_grad, gather_indices)

    assert (
        ref_grad.shape == dX_test.shape
    ), f"Grouped gemm manual backward_dX outputs mismatch: ref_grad: {ref_grad.shape}, dX_test: {dX_test.shape}"
    diff = (ref_grad - dX_test).abs().max().item()

    assert torch.allclose(
        ref_grad, dX_test, atol = atol, rtol = rtol
    ), f"Grouped gemm manual backward_dX outputs mismatch: {diff:.6f}"

    if permute_x and use_W1:
        # Show that reduction results in diffs
        # First calculate X.grad manually by backpropping through unpermuted ref_grad
        dX_ref_check = ref_grad.view(num_tokens, topk, K).sum(dim = 1)
        # Do the same for the actual output of the kernel
        dX_test_check = dX_test.view(num_tokens, topk, K).sum(dim = 1)
        # Show diffs for each combination
        diff_ref_check = (X.grad - dX_ref_check).abs().max().item()
        diff_test_check = (X.grad - dX_test_check).abs().max().item()
        diff_check_test = (dX_ref_check - dX_test_check).abs().max().item()
        print(
            f"diff_ref_check: {diff_ref_check:.6f}, diff_test_check: {diff_test_check:.6f}, diff_check_test: {diff_check_test:.6f}"
        )


# NOTE: We reduce the size of the Llama4 model configs to prevent OOM
# Important to note that for the full model size (5120, 8192), the tests do result in diffs on the order of 1e-2.
@pytest.mark.parametrize(
    "kernel_config",
    KERNEL_CONFIGS_BWD_dX,
    ids = lambda x: x.to_string(include_tuning_params = True, include_tma = True),
)
@pytest.mark.parametrize(
    "model_config",
    SMALL_MODEL_CONFIGS[:1] + [LLAMA_MODEL_CONFIG, QWEN_MODEL_CONFIG],
    ids = lambda x: f"topk={x.topk} num_experts={x.num_experts} hidden_size={x.hidden_size} intermediate_size={x.intermediate_size}",
)
@pytest.mark.parametrize(
    "data_config", DATA_CONFIGS, ids = lambda x: f"seq_len={x.seq_len} dtype={x.dtype}"
)
@pytest.mark.parametrize("use_W1", [True, False], ids = lambda x: f"use_W1={x}")
def test_grouped_gemm_backward_dX_manual(
    data_config: DataConfig,
    model_config: ModelConfig,
    kernel_config: KernelConfigBackward_dX,
    use_W1: bool,
):
    _test_grouped_gemm_backward_dX(
        data_config = data_config,
        model_config = model_config,
        use_W1 = use_W1,
        use_autograd = False,
        **asdict(kernel_config),
    )


@pytest.mark.parametrize(
    "kernel_config",
    KERNEL_CONFIGS_BWD_dX,
    ids = lambda x: x.to_string(include_tuning_params = True, include_tma = True),
)
@pytest.mark.parametrize(
    "model_config",
    SMALL_MODEL_CONFIGS[:1] + [LLAMA_MODEL_CONFIG, QWEN_MODEL_CONFIG],
    ids = lambda x: f"topk={x.topk} num_experts={x.num_experts} hidden_size={x.hidden_size} intermediate_size={x.intermediate_size}",
)
@pytest.mark.parametrize(
    "data_config", DATA_CONFIGS, ids = lambda x: f"seq_len={x.seq_len} dtype={x.dtype}"
)
@pytest.mark.parametrize("use_W1", [True, False], ids = lambda x: f"use_W1={x}")
def test_grouped_gemm_backward_dX_manual_autograd(
    data_config: DataConfig,
    model_config: ModelConfig,
    kernel_config: KernelConfigBackward_dX,
    use_W1: bool,
):
    _test_grouped_gemm_backward_dX(
        data_config = data_config,
        model_config = model_config,
        use_W1 = use_W1,
        use_autograd = True,
        **asdict(kernel_config),
    )


@pytest.mark.parametrize(
    "num_autotune_configs", [20], ids = lambda x: f"num_autotune_configs={x}"
)
@pytest.mark.parametrize(
    "permute_x", [True, False], ids = lambda x: "permute_x" if x else ""
)
@pytest.mark.parametrize(
    "permute_y", [True, False], ids = lambda x: "permute_y" if x else ""
)
@pytest.mark.parametrize(
    "model_config",
    [LLAMA_MODEL_CONFIG, QWEN_MODEL_CONFIG],
    ids = lambda x: f"topk={x.topk} num_experts={x.num_experts} hidden_size={x.hidden_size} intermediate_size={x.intermediate_size}",
)
@pytest.mark.parametrize(
    "data_config", DATA_CONFIGS, ids = lambda x: f"seq_len={x.seq_len} dtype={x.dtype}"
)
@pytest.mark.parametrize("use_W1", [True, False], ids = lambda x: f"use_W1={x}")
def test_grouped_gemm_backward_dX_autotune(
    data_config: DataConfig,
    model_config: ModelConfig,
    permute_x: bool,
    permute_y: bool,
    use_W1: bool,
    num_autotune_configs: int,
):
    # TMA loads / stores will be autotuned
    _test_grouped_gemm_backward_dX(
        data_config = data_config,
        model_config = model_config,
        permute_x = permute_x,
        permute_y = permute_y,
        use_W1 = use_W1,
        autotune = True,
        use_autograd = False,
        num_autotune_configs = num_autotune_configs,
    )


@pytest.mark.parametrize(
    "num_autotune_configs", [20], ids = lambda x: f"num_autotune_configs={x}"
)
@pytest.mark.parametrize(
    "permute_x", [True, False], ids = lambda x: "permute_x" if x else ""
)
@pytest.mark.parametrize(
    "permute_y", [True, False], ids = lambda x: "permute_y" if x else ""
)
@pytest.mark.parametrize(
    "model_config",
    [LLAMA_MODEL_CONFIG, QWEN_MODEL_CONFIG],
    ids = lambda x: f"topk={x.topk} num_experts={x.num_experts} hidden_size={x.hidden_size} intermediate_size={x.intermediate_size}",
)
@pytest.mark.parametrize(
    "data_config", DATA_CONFIGS, ids = lambda x: f"seq_len={x.seq_len} dtype={x.dtype}"
)
@pytest.mark.parametrize("use_W1", [True, False], ids = lambda x: f"use_W1={x}")
def test_grouped_gemm_backward_dX_autotune_autograd(
    data_config: DataConfig,
    model_config: ModelConfig,
    permute_x: bool,
    permute_y: bool,
    use_W1: bool,
    num_autotune_configs: int,
):
    # TMA loads / stores will be autotuned
    _test_grouped_gemm_backward_dX(
        data_config = data_config,
        model_config = model_config,
        permute_x = permute_x,
        permute_y = permute_y,
        use_W1 = use_W1,
        autotune = True,
        use_autograd = True,
        num_autotune_configs = num_autotune_configs,
    )


def _test_grouped_gemm_backward_dW(
    data_config: DataConfig,
    model_config: ModelConfig,
    permute_x: bool,
    permute_y: bool,
    use_W1: bool,
    use_tma_load_dy: bool = False,
    use_tma_load_x: bool = False,
    use_tma_store: bool = False,
    BLOCK_SIZE_M: int = None,
    BLOCK_SIZE_N: int = None,
    BLOCK_SIZE_K: int = None,
    num_warps: int = None,
    num_stages: int = None,
    flatten: bool = True,
    autotune: bool = False,
    num_autotune_configs: int = None,
    allow_tma_store: bool = False,
    debug: bool = False,
    fuse_mul_post: bool = False,  # Unused for backward_dW
    use_autograd: bool = False,
):
    if not check_valid_config(
        permute_x,
        permute_y,
        fuse_mul_post = fuse_mul_post,
        use_W1 = use_W1,
        is_backward = True,
    ):
        pytest.skip(
            f"Skipping test due to invalid config: {permute_x = } {permute_y = } {use_W1 = }"
        )

    if use_tma_store and not allow_tma_store:
        pytest.skip("TMA store needs to be debugged due to non-deterministic behavior")

    X1, X2, W1, W2, gating_output = make_inputs(
        M = data_config.bs * data_config.seq_len,
        N = model_config.intermediate_size,
        K = model_config.hidden_size,
        E = model_config.num_experts,
        topk = model_config.topk,
        dtype = data_config.dtype,
        requires_grad = True,
    )
    topk = model_config.topk
    num_experts = model_config.num_experts
    use_sigmoid = model_config.use_sigmoid
    renormalize = model_config.renormalize

    X = X1 if use_W1 else X2
    num_tokens = data_config.bs * data_config.seq_len
    E, K, N = W2.shape  # E = num_experts, K = hidden_size, N = intermediate_size
    assert W1.shape == (E, 2 * N, K)
    W = W1 if use_W1 else W2

    if use_W1:
        assert X.shape == (
            num_tokens,
            K,
        ), f"X.shape: {X.shape}, num_tokens: {num_tokens}, K: {K}"
    else:
        assert X.shape == (
            num_tokens * topk,
            N,
        ), f"X.shape: {X.shape}, num_tokens: {num_tokens}, topk: {topk}, N: {N}"

    total_tokens = num_tokens * topk
    output_shape = (total_tokens, 2 * N) if use_W1 else (total_tokens, K)

    X_test = X.detach().clone().requires_grad_(True)
    W_test = W.detach().clone().requires_grad_(True)

    topk_weights, topk_ids = calculate_topk(
        gating_output, topk, use_sigmoid = use_sigmoid, renormalize = renormalize
    )
    topk_weights = topk_weights.view(-1)  # num_tokens * topk
    topk_ids = topk_ids.view(-1)  # num_tokens * topk

    expert_token_counts, gather_indices = get_routing_indices(topk_ids, num_experts = E)
    assert len(gather_indices) == total_tokens
    assert len(expert_token_counts) == num_experts

    atol, rtol = TOLERANCE[X.dtype]
    Xperm = permute(X, gather_indices, topk)
    Xperm_test = Xperm.detach().clone().requires_grad_(True)

    # Need to retain grad otherwise grad is not propagated
    X.retain_grad()
    W.retain_grad()
    Xperm.retain_grad()
    assert Xperm.shape == (total_tokens, K) if use_W1 else (total_tokens, N)

    output_shape = (total_tokens, 2 * N) if use_W1 else (total_tokens, K)

    ref_output = torch_grouped_gemm(X = Xperm, W = W, m_sizes = expert_token_counts)
    assert ref_output.shape == output_shape

    # if permute_y then the assumption is that the output of grouped_gemm was unpermuted on store
    # Therefore we have to unpermute before backpropping to ensure proper alignment
    if permute_y:
        ref_output = unpermute(ref_output, gather_indices)

    grad_output = torch.randn_like(ref_output)
    ref_output.backward(grad_output)
    assert X.grad is not None
    assert W.grad is not None

    # Test backward kernel directly
    X_ = X_test if permute_x else Xperm_test

    if debug:
        torch.set_printoptions(precision = 4)
        for i in range(num_experts):
            print(f"Expert {i} weight grad:\n{W.grad[i, :5, :5]}")

    if autotune:
        from grouped_gemm.kernels.backward import _autotuned_grouped_gemm_dW_kernel

        if num_autotune_configs is not None:
            _autotuned_grouped_gemm_dW_kernel.configs = (
                _autotuned_grouped_gemm_dW_kernel.configs[:num_autotune_configs]
            )

    if use_autograd:
        from grouped_gemm.interface import grouped_gemm

        if not autotune:
            kernel_config_fwd = KernelConfigForward(
                # Only care about backward_dW config
                use_tma_load_w = False,
                use_tma_load_x = False,
                use_tma_store = False,
                BLOCK_SIZE_M = BLOCK_SIZE_M,
                BLOCK_SIZE_N = BLOCK_SIZE_N,
                BLOCK_SIZE_K = BLOCK_SIZE_K,
                num_warps = num_warps,
                num_stages = num_stages,
            )
            kernel_config_bwd_dW = KernelConfigBackward_dW(
                use_tma_load_dy = use_tma_load_dy,
                use_tma_load_x = use_tma_load_x,
                use_tma_store = use_tma_store,
                BLOCK_SIZE_M = BLOCK_SIZE_M,
                BLOCK_SIZE_N = BLOCK_SIZE_N,
                BLOCK_SIZE_K = BLOCK_SIZE_K,
                num_warps = num_warps,
                num_stages = num_stages,
            )
        else:
            from grouped_gemm.kernels.backward import _autotuned_grouped_gemm_dW_kernel
            from grouped_gemm.kernels.forward import (
                _autotuned_grouped_gemm_forward_kernel,
            )

            if num_autotune_configs is not None:
                _autotuned_grouped_gemm_forward_kernel.configs = (
                    _autotuned_grouped_gemm_forward_kernel.configs[
                        :num_autotune_configs
                    ]
                )
                _autotuned_grouped_gemm_dW_kernel.configs = (
                    _autotuned_grouped_gemm_dW_kernel.configs[:num_autotune_configs]
                )
            kernel_config_fwd = None
            kernel_config_bwd_dW = None

        test_output = grouped_gemm(
            X = X_,
            W = W_test,
            m_sizes = expert_token_counts,
            gather_indices = gather_indices,
            topk = topk,
            permute_x = permute_x,
            permute_y = permute_y,
            kernel_config_fwd = kernel_config_fwd,
            kernel_config_bwd_dW = kernel_config_bwd_dW,
            autotune = autotune,
            is_first_gemm = use_W1,
            dW_only = True,
        )
        assert (
            test_output.shape == ref_output.shape
        ), f"Grouped gemm autograd backward_dW outputs mismatch: {test_output.shape} != {ref_output.shape}"
        assert torch.allclose(
            test_output, ref_output, atol = atol, rtol = rtol
        ), f"Grouped gemm autograd backward_dW forward outputs mismatch: {test_output.shape} != {ref_output.shape}"
        test_output.backward(grad_output)
        assert W_test.grad is not None
        dW_test = W_test.grad
    else:
        dW_test = grouped_gemm_dW(
            dY = grad_output,
            X = X_,
            m_sizes = expert_token_counts,
            gather_indices = gather_indices,
            topk = topk,
            permute_x = permute_x,
            permute_y = permute_y,
            use_tma_load_dy = use_tma_load_dy,
            use_tma_load_x = use_tma_load_x,
            use_tma_store = use_tma_store,
            BLOCK_SIZE_M = BLOCK_SIZE_M,
            BLOCK_SIZE_N = BLOCK_SIZE_N,
            BLOCK_SIZE_K = BLOCK_SIZE_K,
            num_warps = num_warps,
            num_stages = num_stages,
            flatten = flatten,
            autotune = autotune,
            debug = debug,
        )
    assert (
        W.grad.shape == dW_test.shape
    ), f"Grouped gemm manual backward_dW outputs mismatch: W.grad: {W.grad.shape}, dW_test: {dW_test.shape}"

    if debug:
        with torch.no_grad():
            if not torch.allclose(W.grad, dW_test, atol = atol, rtol = rtol):
                print(f"Ref Wgrad sum: {W.grad.sum().item():.4f}")
            print(f"Test Wgrad sum: {dW_test.sum().item():.4f}")

            for i in range(num_experts):
                print(f"Expert {i} weight grad:\n{W.grad[i, :5, :5]}")
                print(f"Expert {i} dW_test:\n{dW_test[i, :5, :5]}")
                expert_diff = (W.grad[i, :, :] - dW_test[i, :, :]).abs().max().item()
                print(f"Expert {i} diff: {expert_diff:.6f}")

            diff = (W.grad - dW_test).abs().max().item()
            assert (
                False
            ), f"Grouped gemm manual backward_dW outputs mismatch: {diff:.6f}"
    else:
        diff = (W.grad - dW_test).abs().max().item()
        assert torch.allclose(
            W.grad, dW_test, atol = atol, rtol = rtol
        ), f"Grouped gemm manual backward_dW outputs mismatch: {diff:.6f}"


@pytest.mark.parametrize(
    "kernel_config",
    KERNEL_CONFIGS_BWD_dW,
    ids = lambda x: x.to_string(include_tuning_params = False, include_tma = True),
)
@pytest.mark.parametrize(
    "model_config",
    SMALL_MODEL_CONFIGS + [LLAMA_MODEL_CONFIG, QWEN_MODEL_CONFIG],
    ids = lambda x: f"topk={x.topk} num_experts={x.num_experts} hidden_size={x.hidden_size} intermediate_size={x.intermediate_size}",
)
@pytest.mark.parametrize(
    "data_config", DATA_CONFIGS, ids = lambda x: f"seq_len={x.seq_len} dtype={x.dtype}"
)
@pytest.mark.parametrize("use_W1", [True, False], ids = lambda x: f"use_W1={x}")
def test_grouped_gemm_backward_dW_manual(
    data_config: DataConfig,
    model_config: ModelConfig,
    kernel_config: KernelConfig,
    use_W1: bool,
    debug: bool = False,
):
    _test_grouped_gemm_backward_dW(
        data_config = data_config,
        model_config = model_config,
        use_W1 = use_W1,
        use_autograd = False,
        **asdict(kernel_config),
    )


@pytest.mark.parametrize(
    "kernel_config",
    KERNEL_CONFIGS_BWD_dW,
    ids = lambda x: x.to_string(include_tuning_params = False, include_tma = True),
)
@pytest.mark.parametrize(
    "model_config",
    SMALL_MODEL_CONFIGS + [LLAMA_MODEL_CONFIG, QWEN_MODEL_CONFIG],
    ids = lambda x: f"topk={x.topk} num_experts={x.num_experts} hidden_size={x.hidden_size} intermediate_size={x.intermediate_size}",
)
@pytest.mark.parametrize(
    "data_config", DATA_CONFIGS, ids = lambda x: f"seq_len={x.seq_len} dtype={x.dtype}"
)
@pytest.mark.parametrize("use_W1", [True, False], ids = lambda x: f"use_W1={x}")
def test_grouped_gemm_backward_dW_manual_autograd(
    data_config: DataConfig,
    model_config: ModelConfig,
    kernel_config: KernelConfig,
    use_W1: bool,
    debug: bool = False,
):
    _test_grouped_gemm_backward_dW(
        data_config = data_config,
        model_config = model_config,
        use_W1 = use_W1,
        use_autograd = True,
        **asdict(kernel_config),
    )


@pytest.mark.parametrize(
    "num_autotune_configs", [20], ids = lambda x: f"num_autotune_configs={x}"
)
@pytest.mark.parametrize(
    "permute_x", [True, False], ids = lambda x: "permute_x" if x else ""
)
@pytest.mark.parametrize(
    "permute_y", [True, False], ids = lambda x: "permute_y" if x else ""
)
@pytest.mark.parametrize(
    "model_config",
    [LLAMA_MODEL_CONFIG, QWEN_MODEL_CONFIG],
    ids = lambda x: f"topk={x.topk} num_experts={x.num_experts} hidden_size={x.hidden_size} intermediate_size={x.intermediate_size}",
)
@pytest.mark.parametrize(
    "data_config", DATA_CONFIGS, ids = lambda x: f"seq_len={x.seq_len} dtype={x.dtype}"
)
@pytest.mark.parametrize("use_W1", [True, False], ids = lambda x: f"use_W1={x}")
def test_grouped_gemm_backward_dW_autotune(
    data_config: DataConfig,
    model_config: ModelConfig,
    permute_x: bool,
    permute_y: bool,
    use_W1: bool,
    num_autotune_configs: int,
):
    _test_grouped_gemm_backward_dW(
        data_config = data_config,
        model_config = model_config,
        use_W1 = use_W1,
        permute_x = permute_x,
        permute_y = permute_y,
        autotune = True,
        use_autograd = False,
        num_autotune_configs = num_autotune_configs,
    )


@pytest.mark.parametrize(
    "num_autotune_configs", [20], ids = lambda x: f"num_autotune_configs={x}"
)
@pytest.mark.parametrize(
    "permute_x", [True, False], ids = lambda x: "permute_x" if x else ""
)
@pytest.mark.parametrize(
    "permute_y", [True, False], ids = lambda x: "permute_y" if x else ""
)
@pytest.mark.parametrize(
    "model_config",
    [LLAMA_MODEL_CONFIG, QWEN_MODEL_CONFIG],
    ids = lambda x: f"topk={x.topk} num_experts={x.num_experts} hidden_size={x.hidden_size} intermediate_size={x.intermediate_size}",
)
@pytest.mark.parametrize(
    "data_config", DATA_CONFIGS, ids = lambda x: f"seq_len={x.seq_len} dtype={x.dtype}"
)
@pytest.mark.parametrize("use_W1", [True, False], ids = lambda x: f"use_W1={x}")
def test_grouped_gemm_backward_dW_autotune_autograd(
    data_config: DataConfig,
    model_config: ModelConfig,
    permute_x: bool,
    permute_y: bool,
    use_W1: bool,
    num_autotune_configs: int,
):
    _test_grouped_gemm_backward_dW(
        data_config = data_config,
        model_config = model_config,
        use_W1 = use_W1,
        permute_x = permute_x,
        permute_y = permute_y,
        autotune = True,
        use_autograd = True,
        num_autotune_configs = num_autotune_configs,
    )


================================================
FILE: unsloth/kernels/moe/tests/test_llama4_moe.py
================================================
# SPDX-License-Identifier: GNU Affero General Public License v3.0
# Copyright 2023-present the Unsloth team. All rights reserved.

import argparse
import sys
from contextlib import contextmanager
from functools import partial

import pytest
import torch
from transformers import AutoConfig
from transformers.models.llama4 import Llama4Config, Llama4TextConfig
from transformers.models.llama4.modeling_llama4 import Llama4TextMoe

from grouped_gemm.kernels.tuning import (
    KernelConfigBackward_dW,
    KernelConfigBackward_dX,
    KernelConfigForward,
)
from grouped_gemm.reference.layers.llama4_moe import (
    Llama4GroupedGemmTextMoe,
    Llama4TritonTextMoe,
)

TOLERANCES = {
    torch.bfloat16: (1e-2, 1e-2),
    torch.float16: (1e-3, 1e-3),
    torch.float: (1e-5, 1e-5),
}

LLAMA4_SCOUT_ID = "meta-llama/Llama-4-Scout-17B-16E"
SEED = 42
SEQ_LENS = [1024]
DTYPES = [torch.bfloat16]
# Reduce the number of autotuning configs to prevent excessive runtime
NUM_AUTOTUNE_CONFIGS = 50


@contextmanager
def annotated_context(prelude, epilogue = "Passed!", char = "-", num_chars = 80):
    print(char * num_chars)
    print(prelude)
    yield
    print(epilogue)
    print(char * num_chars)


def get_text_config(model_id):
    config: Llama4Config = AutoConfig.from_pretrained(model_id)
    return config.text_config


def prep_triton_kernel_traits(autotune):
    if not autotune:
        kernel_config_fwd = KernelConfigForward()
        kernel_config_bwd_dW = KernelConfigBackward_dW()
        kernel_config_bwd_dX = KernelConfigBackward_dX()
    else:
        from grouped_gemm.kernels.backward import (
            _autotuned_grouped_gemm_dW_kernel,
            _autotuned_grouped_gemm_dX_kernel,
        )
        from grouped_gemm.kernels.forward import _autotuned_grouped_gemm_forward_kernel

        # Hack to reduce number of autotuning configs
        _autotuned_grouped_gemm_forward_kernel.configs = (
            _autotuned_grouped_gemm_forward_kernel.configs[:NUM_AUTOTUNE_CONFIGS]
        )
        _autotuned_grouped_gemm_dW_kernel.configs = (
            _autotuned_grouped_gemm_dW_kernel.configs[:NUM_AUTOTUNE_CONFIGS]
        )
        _autotuned_grouped_gemm_dX_kernel.configs = (
            _autotuned_grouped_gemm_dX_kernel.configs[:NUM_AUTOTUNE_CONFIGS]
        )

        kernel_config_fwd = None
        kernel_config_bwd_dW = None
        kernel_config_bwd_dX = None

    return kernel_config_fwd, kernel_config_bwd_dW, kernel_config_bwd_dX


def sparse_to_dense(t: torch.Tensor):
    t = t.sum(dim = 0).view(-1)
    return t


@torch.no_grad()
def _check_diff(
    t1: torch.Tensor,
    t2: torch.Tensor,
    atol,
    rtol,
    precision = ".6f",
    verbose = False,
    msg = "",
):
    t2 = t2.view_as(t1)
    diff = t1.sub(t2).abs().max().item()
    if verbose:
        if msg == "":
            msg = "diff"
        print(f"{msg}: {diff:{precision}}")
    assert torch.allclose(t1, t2, atol = atol, rtol = rtol)


def run_backwards(y: torch.Tensor, grad_output: torch.Tensor, module: torch.nn.Module):
    y.backward(grad_output)
    for name, param in module.named_parameters():
        assert param.grad is not None, f"{name} missing grad!"


def _check_grads(
    m1: torch.nn.Module,
    m2: torch.nn.Module,
    atol,
    rtol,
    precision = ".6f",
    verbose = False,
    msg = "",
):
    for name, param in m1.named_parameters():
        _check_diff(
            param.grad,
            m2.get_parameter(name).grad,
            atol = atol,
            rtol = rtol,
            precision = precision,
            verbose = verbose,
            msg = f"{msg}:{name}.grad",
        )


@pytest.fixture
def model_config():
    return AutoConfig.from_pretrained(LLAMA4_SCOUT_ID).text_config


@pytest.mark.parametrize(
    "overlap_router_shared",
    [False, True],
    ids = lambda x: "overlap_router_shared" if x else "no_overlap",
)
@pytest.mark.parametrize(
    "permute_y", [False, True], ids = lambda x: "permute_y" if x else "no_permute_y"
)
@pytest.mark.parametrize(
    "permute_x", [False], ids = lambda x: "permute_x" if x else "no_permute_x"
)  # Llama4 does not support permute_x
@pytest.mark.parametrize(
    "autotune", [True], ids = lambda x: "autotune" if x else "manual"
)
@pytest.mark.parametrize("seqlen", SEQ_LENS, ids = lambda x: f"seqlen={x}")
@pytest.mark.parametrize("dtype", DTYPES, ids = str)
def test_llama4_ref(
    dtype: torch.dtype,
    seqlen,
    autotune: bool,
    permute_x: bool,
    permute_y: bool,
    overlap_router_shared: bool,
    model_config: Llama4TextConfig,  # test fixture
    bs: int = 1,
    device = "cuda",
    precision = ".6f",
    verbose = False,
):
    torch.manual_seed(
        SEED
    )  # Should not be needed when running using pytest -- autouse fixture in conftest.py
    device = "cuda"
    hidden_dim = model_config.hidden_size
    atol, rtol = TOLERANCES[dtype]
    check_diff = partial(
        _check_diff, atol = atol, rtol = rtol, precision = precision, verbose = verbose
    )
    check_grads = partial(
        _check_grads, atol = atol, rtol = rtol, precision = precision, verbose = verbose
    )

    # Reference op -- HF
    llama4_ref = Llama4TextMoe(model_config).to(dtype = dtype, device = device)

    # Torch grouped gemm impl
    llama4_gg_ref = Llama4GroupedGemmTextMoe(
        model_config, overlap_router_shared = overlap_router_shared
    ).to(dtype = dtype, device = device)
    llama4_gg_ref.copy_weights(llama4_ref)
    llama4_gg_ref.check_weights(llama4_ref)

    x_ref = torch.randn(
        bs, seqlen, hidden_dim, dtype = dtype, device = device, requires_grad = True
    )
    x_torch_gg = x_ref.detach().clone().requires_grad_()
    x_triton = x_ref.detach().clone().requires_grad_()

    y_ref, routing_ref = llama4_ref(x_ref)
    y_torch_gg, routing_torch_gg = llama4_gg_ref(x_torch_gg)
    assert y_ref.shape == y_torch_gg.shape, f"{y_ref.shape} != {y_torch_gg.shape}"
    with annotated_context("Testing torch grouped gemm Llama4TextMoe"):
        check_diff(y_ref, y_torch_gg, msg = "y_torch_gg")
        check_diff(
            sparse_to_dense(routing_ref), routing_torch_gg, msg = "routing_torch_gg"
        )

    kernel_config_fwd, kernel_config_bwd_dW, kernel_config_bwd_dX = (
        prep_triton_kernel_traits(autotune)
    )

    llama4_triton = Llama4TritonTextMoe(
        model_config,
        overlap_router_shared = overlap_router_shared,
        permute_x = permute_x,
        permute_y = permute_y,
        autotune = autotune,
        kernel_config_fwd = kernel_config_fwd,
        kernel_config_bwd_dW = kernel_config_bwd_dW,
        kernel_config_bwd_dX = kernel_config_bwd_dX,
    ).to(device = device, dtype = dtype)
    llama4_triton.copy_weights(llama4_ref)
    llama4_triton.check_weights(llama4_ref)

    y_triton, routing_triton = llama4_triton(x_triton)
    with annotated_context("Testing triton grouped gemm Llama4TextMoe forward"):
        check_diff(y_ref, y_triton, msg = "y_triton")
        check_diff(sparse_to_dense(routing_ref), routing_triton, msg = "routing_triton")

    ref_grad = torch.randn_like(y_ref)
    run_backwards(y_ref, ref_grad, llama4_ref)
    run_backwards(y_torch_gg, ref_grad, llama4_gg_ref)
    with annotated_context("Testing torch group gemm Llama4TextMoe backward"):
        check_grads(llama4_ref, llama4_gg_ref, msg = "torch_gg")

    run_backwards(y_triton, ref_grad, llama4_triton)
    with annotated_context("Testing triton group gemm Llama4TextMoe backward"):
        check_grads(llama4_ref, llama4_triton, msg = "triton")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--seqlen", type = int, default = 1024)
    parser.add_argument(
        "--dtype", type = str, choices = ["bfloat16", "float16"], default = "bfloat16"
    )
    args = parser.parse_args()
    args.dtype = getattr(torch, args.dtype)
    args_dict = vars(args)

    model_id = LLAMA4_SCOUT_ID

    text_config: Llama4TextConfig = get_text_config(model_id)
    for overlap in [False, True]:
        test_llama4_ref(
            seqlen = args.seqlen,
            model_config = text_config,
            dtype = args.dtype,
            autotune = True,
            permute_x = False,
            permute_y = True,
            overlap_router_shared = overlap,
            verbose = True,
        )


================================================
FILE: unsloth/kernels/moe/tests/test_qwen3_moe.py
================================================
# SPDX-License-Identifier: GNU Affero General Public License v3.0
# Copyright 2023-present the Unsloth team. All rights reserved.

import argparse
from contextlib import contextmanager

import pytest
import torch
from transformers import AutoConfig
from transformers.models.qwen3_moe import Qwen3MoeConfig
from transformers.models.qwen3_moe.modeling_qwen3_moe import Qwen3MoeSparseMoeBlock

from grouped_gemm.kernels.tuning import (
    KernelConfigBackward_dW,
    KernelConfigBackward_dX,
    KernelConfigForward,
)
from grouped_gemm.reference.layers.qwen3_moe import Qwen3MoeGroupedGEMMBlock

from .moe_utils import (
    Qwen3MoeFusedGroupedGEMMBlock,
    check_fwd,
    check_grads,
    check_grouped_gemm_results,
    run_backward,
    run_forward,
)

"""
Qwen3 MoE tests

NOTE: Test this as a module and NOT with pytest as running with pytest results in random numerical errors: python -m tests.test_qwen3_moe --permute_x --permute_y --autotune NOT pytest -sv tests/test_qwen3_moe.py
More specifically, all tests pass when run individually, but some will fail randomly (even with the same seed) when the entire test is run as a parametrized test suite using pytest, likely due to how pytest interacts with triton / autotuning.

See tests/run_qwen3_moe_tests.sh for a script that runs all the tests

The tests run the following:
Huggingface's Qwen3 MoE block (Qwen3MoeSparseMoeBlock)
Torch-native grouped gemm version of MoE block (Qwen3MoeGroupedGEMMBlock), which is the HF block with the expert computation replaced with a torch-native grouped gemm
Triton kernel grouped gemm version of MoE block (Qwen3MoeFusedGroupedGEMMBlock), which is the HF block with the expert computation replaced with the fused triton grouped gemm kernel

The tests check the following:
- HF MoE block vs torch grouped gemm MoE block (sanity check)
- torch grouped gemm MoE block vs fused grouped gemm MoE block -- this allows us to test each of the intermediate results for easier debugging
- HF MoE block vs fused grouped gemm MoE block -- this is the actual test

Both forward and backward passes are tests:
- forward: output of the moe block
- backwards:
    - X: gradient of the input to the moe block
    - gate.weight: gradient of the gate weights (router weights)
    - gate_proj: gradient of concatenated gate projections
    - up_proj: gradient of the concatenated up projections
    - down_proj: gradient of the concatenated down projections

Additionally, for the torch grouped gemm and triton grouped gemm versions, the intermediate outputs of the forward pass are checked:
- first_gemm: output of the first grouped gemm (X @ fused_gate_proj)
- intermediate: output of silu_mul(first_gemm)
- second_gemm: output of the second grouped gemm (intermediate @ down_proj)
- hidden_states_unpermute: output of the second_gemm after unpermuting back to token order (from expert grouped order); in the case where the permutation is fused in the triton kernel, this is the same as second_gemm
- hidden_states: output with the topk_weights applied
"""

TOLERANCES = {
    torch.bfloat16: (1e-2, 1e-2),
    torch.float16: (1e-3, 1e-3),
    torch.float: (1e-5, 1e-5),
}


@pytest.fixture(scope = "module")
def model_id():
    return "Qwen/Qwen3-30B-A3B"


@pytest.fixture(scope = "module")
def config(model_id: str):
    return AutoConfig.from_pretrained(model_id)


@contextmanager
def annotated_context(prelude, epilogue = "Passed!", char = "-", num_chars = 80):
    print(char * num_chars)
    print(prelude)
    yield
    print(epilogue)
    print(char * num_chars)


SEED = 42
SEQ_LENS = [1024]
DTYPES = [torch.bfloat16]

# Reduce the number of autotuning configs to prevent excessive runtime
NUM_AUTOTUNE_CONFIGS = 50


@pytest.mark.parametrize(
    "permute_y", [True], ids = lambda x: "permute_y" if x else "no_permute_y"
)
@pytest.mark.parametrize(
    "permute_x", [True], ids = lambda x: "permute_x" if x else "no_permute_x"
)
@pytest.mark.parametrize(
    "autotune", [True], ids = lambda x: "autotune" if x else "manual"
)
@pytest.mark.parametrize("seqlen", SEQ_LENS, ids = lambda x: f"seqlen={x}")
@pytest.mark.parametrize("dtype", DTYPES, ids = str)
def test_qwen3_moe(
    config: Qwen3MoeConfig,
    seqlen: int,
    dtype: torch.dtype,
    permute_x: bool,
    permute_y: bool,
    autotune: bool,
):
    torch.manual_seed(
        SEED
    )  # Should not be needed when running using pytest -- autouse fixture in conftest.py
    device = "cuda"
    hidden_size = config.hidden_size
    bs = 1
    atol, rtol = TOLERANCES[dtype]
    # Reference op -- HF
    moe_block = Qwen3MoeSparseMoeBlock(config).to(device, dtype)

    # Torch-native grouped gemm version of MoE Block -- for sanity checking
    grouped_gemm_block = Qwen3MoeGroupedGEMMBlock.from_hf(moe_block).to(device, dtype)
    grouped_gemm_block.check_weights(moe_block)

    if not autotune:
        kernel_config_fwd = KernelConfigForward()
        kernel_config_bwd_dW = KernelConfigBackward_dW()
        kernel_config_bwd_dX = KernelConfigBackward_dX()
    else:
        from grouped_gemm.kernels.backward import (
            _autotuned_grouped_gemm_dW_kernel,
            _autotuned_grouped_gemm_dX_kernel,
        )
        from grouped_gemm.kernels.forward import _autotuned_grouped_gemm_forward_kernel

        # Hack to reduce number of autotuning configs
        _autotuned_grouped_gemm_forward_kernel.configs = (
            _autotuned_grouped_gemm_forward_kernel.configs[:NUM_AUTOTUNE_CONFIGS]
        )
        _autotuned_grouped_gemm_dW_kernel.configs = (
            _autotuned_grouped_gemm_dW_kernel.configs[:NUM_AUTOTUNE_CONFIGS]
        )
        _autotuned_grouped_gemm_dX_kernel.configs = (
            _autotuned_grouped_gemm_dX_kernel.configs[:NUM_AUTOTUNE_CONFIGS]
        )

        kernel_config_fwd = None
        kernel_config_bwd_dW = None
        kernel_config_bwd_dX = None

    # Triton kernel grouped gemm version of MoE Block -- this is what we're testing
    fused_gemm_block = Qwen3MoeFusedGroupedGEMMBlock.from_hf(
        moe_block,
        permute_x = permute_x,
        permute_y = permute_y,
        autotune = autotune,
        kernel_config_fwd = kernel_config_fwd,
        kernel_config_bwd_dW = kernel_config_bwd_dW,
        kernel_config_bwd_dX = kernel_config_bwd_dX,
    ).to(device, dtype)
    fused_gemm_block.check_weights(moe_block)

    X = torch.randn(
        bs, seqlen, hidden_size, dtype = dtype, device = device, requires_grad = True
    )

    # Forward
    ref_result = run_forward(moe_block, X, is_grouped_gemm = False)
    grouped_result = run_forward(grouped_gemm_block, X, is_grouped_gemm = True)
    fused_result = run_forward(fused_gemm_block, X, is_grouped_gemm = True)

    with annotated_context(
        "Testing forward pass",
        epilogue = "Passed forward tests!",
        char = "=",
        num_chars = 100,
    ):
        # Sanity checks

        with annotated_context(
            "Checking HF vs torch grouped gemm MoE forward outputs..."
        ):
            check_fwd(ref_result, grouped_result, atol, rtol, verbose = False)

        with annotated_context(
            "Checking torch grouped gemm MoE vs fused grouped gemm MoE forward outputs..."
        ):
            # We implement a custom check for grouped gemm results to test each of the intermediate results for easier debugging
            check_grouped_gemm_results(
                grouped_result.grouped_gemm_result,
                fused_result.grouped_gemm_result,
                permute_y = permute_y,
                atol = atol,
                rtol = rtol,
                verbose = False,
            )
        # Actual test
        with annotated_context(
            "Checking HF vs fused grouped gemm MoE forward outputs..."
        ):
            check_fwd(ref_result, fused_result, atol, rtol, verbose = True)

    # Backward
    grad_output = torch.randn_like(ref_result.output)
    ref_backward_result = run_backward(
        moe_block, grad_output, output = ref_result.output, X = ref_result.X
    )
    grouped_backward_result = run_backward(
        grouped_gemm_block,
        grad_output,
        output = grouped_result.output,
        X = grouped_result.X,
    )
    fused_backward_result = run_backward(
        fused_gemm_block, grad_output, output = fused_result.output, X = fused_result.X
    )

    with annotated_context(
        "Testing backward pass",
        epilogue = "Passed backward tests!",
        char = "=",
        num_chars = 100,
    ):
        # Sanity checks
        with annotated_context("Checking HF vs torch grouped gemm MoE grads..."):
            check_grads(
                ref_backward_result, grouped_backward_result, atol, rtol, verbose = False
            )
        with annotated_context(
            "Checking torch grouped gemm MoE vs fused grouped gemm MoE grads..."
        ):
            check_grads(
                grouped_backward_result,
                fused_backward_result,
                atol,
                rtol,
                verbose = False,
            )

        # Actual test
        with annotated_context("Checking HF vs fused grouped gemm MoE grads..."):
            check_grads(
                ref_backward_result, fused_backward_result, atol, rtol, verbose = True
            )


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--seqlen", type = int, default = 1024)
    parser.add_argument(
        "--dtype", type = str, choices = ["bfloat16", "float16"], default = "bfloat16"
    )
    parser.add_argument("--permute_x", action = "store_true")
    parser.add_argument("--permute_y", action = "store_true")
    parser.add_argument("--autotune", action = "store_true")
    args = parser.parse_args()
    args.dtype = getattr(torch, args.dtype)
    args_dict = vars(args)

    model_id = "Qwen/Qwen3-30B-A3B"
    config = AutoConfig.from_pretrained(model_id)
    atol, rtol = TOLERANCES[args.dtype]

    print(
        f"Testing {model_id} with seqlen={args.seqlen}, dtype={args.dtype}, permute_x={args.permute_x}, permute_y={args.permute_y}, autotune={args.autotune}, atol={atol}, rtol={rtol}"
    )
    test_qwen3_moe(config, **args_dict)


================================================
FILE: unsloth/kernels/rms_layernorm.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import triton
import triton.language as tl
import torch
from .utils import calculate_settings, torch_gpu_device


@triton.jit
def _rms_layernorm_forward(
    Y,
    Y_row_stride: tl.constexpr,
    X,
    X_row_stride: tl.constexpr,
    W,
    W_row_stride: tl.constexpr,
    r,
    r_row_stride: tl.constexpr,
    n_cols: tl.constexpr,
    eps: tl.constexpr,
    BLOCK_SIZE: tl.constexpr,
):
    """
    Fast RMS Layernorm kernel
    Inspiration from a Triton tutorial:
    https://triton-lang.org/main/getting-started/tutorials/05-layer-norm.html
    """
    row_idx = tl.program_id(0)
    col_offsets = tl.arange(0, BLOCK_SIZE)
    mask = col_offsets < n_cols

    Y += row_idx * Y_row_stride
    X += row_idx * X_row_stride
    r += row_idx * r_row_stride

    X_row = tl.load(X + col_offsets, mask = mask, other = 0).to(tl.float32)
    W_row = tl.load(W + col_offsets, mask = mask, other = 0)  # .to(tl.float32)

    row_var = tl.sum(X_row * X_row, axis = 0) / n_cols
    # Explicit float32 scalar to ensure correct type promotion on HIP/ROCm
    eps_f32 = tl.full((), eps, tl.float32)
    inv_var = tl.math.rsqrt(row_var + eps_f32)
    tl.store(r, inv_var)
    normed = X_row * inv_var
    normed = normed.to(W_row.dtype)  # Exact copy from HF
    output = normed * W_row
    tl.store(Y + col_offsets, output, mask = mask)


def _rms_layernorm_backward(
    dY,
    dY_row_stride: tl.constexpr,
    dX,
    dX_row_stride: tl.constexpr,
    X,
    X_row_stride: tl.constexpr,
    W,
    W_row_stride: tl.constexpr,
    r,
    r_row_stride: tl.constexpr,
    # dW, dW_row_stride,
    n_cols: tl.constexpr,
    eps: tl.constexpr,
    GEMMA: tl.constexpr,
    BLOCK_SIZE: tl.constexpr,
):
    """
    Fast RMS Layernorm kernel for the backward pass
    Inspiration from a Triton tutorial:
    https://triton-lang.org/main/getting-started/tutorials/05-layer-norm.html
    """
    row_idx = tl.program_id(0)
    col_offsets = tl.arange(0, BLOCK_SIZE)
    mask = col_offsets < n_cols

    dY += row_idx * dY_row_stride
    X += row_idx * X_row_stride
    r += row_idx * r_row_stride

    if GEMMA:
        dX += row_idx * dY_row_stride
    else:
        dX = dY

    dY_row = tl.load(dY + col_offsets, mask = mask, other = 0).to(tl.float32)
    X_row = tl.load(X + col_offsets, mask = mask, other = 0).to(tl.float32)
    W_row = tl.load(W + col_offsets, mask = mask, other = 0).to(tl.float32)

    # Get saved row variance
    inv_var = tl.load(r).to(tl.float32)
    normed = X_row * inv_var

    if GEMMA:
        dY_W = dY_row * (W_row + 1.0)
    else:
        dY_W = dY_row * W_row

    rowsum_dY_normed = tl.sum(dY_W * normed, axis = 0)
    output = inv_var / n_cols * (n_cols * dY_W - normed * rowsum_dY_normed)
    tl.store(dX + col_offsets, output, mask = mask)


_rms_layernorm_backward = triton.jit(_rms_layernorm_backward)
_rms_layernorm_backward = triton.heuristics(
    {
        "GEMMA": lambda args: bool(args["GEMMA"]),
    }
)(_rms_layernorm_backward)


@triton.jit
def _gemma_rms_layernorm_forward(
    Y,
    Y_row_stride: tl.constexpr,
    X,
    X_row_stride: tl.constexpr,
    W,
    W_row_stride: tl.constexpr,
    r,
    r_row_stride: tl.constexpr,
    n_cols: tl.constexpr,
    eps: tl.constexpr,
    BLOCK_SIZE: tl.constexpr,
):
    # Copies https://github.com/google-deepmind/gemma/blob/main/gemma/layers.py#L31
    # and https://github.com/keras-team/keras-nlp/blob/v0.8.2/keras_nlp/models/gemma/rms_normalization.py#L33
    # exactly. Essentially all in float32!
    row_idx = tl.program_id(0)
    col_offsets = tl.arange(0, BLOCK_SIZE)
    mask = col_offsets < n_cols

    Y += row_idx * Y_row_stride
    X += row_idx * X_row_stride
    r += row_idx * r_row_stride

    X_row = tl.load(X + col_offsets, mask = mask, other = 0).to(tl.float32)
    W_row = tl.load(W + col_offsets, mask = mask, other = 0).to(tl.float32)

    row_var = tl.sum(X_row * X_row, axis = 0) / n_cols
    # Explicit float32 scalar to ensure correct type promotion on HIP/ROCm
    eps_f32 = tl.full((), eps, tl.float32)
    inv_var = tl.math.rsqrt(row_var + eps_f32)
    tl.store(r, inv_var)
    normed = X_row * inv_var
    output = normed * (W_row + 1.0)

    tl.store(Y + col_offsets, output, mask = mask)


class Fast_RMS_Layernorm(torch.autograd.Function):
    @staticmethod
    def forward(ctx, X: torch.Tensor, W: torch.Tensor, eps: float, gemma: bool = False):
        shape = X.shape
        dim: int = shape[-1]
        X = X.reshape(-1, dim)
        n_rows: int
        n_cols: int
        n_rows, n_cols = X.shape
        BLOCK_SIZE: int
        num_warps: int
        BLOCK_SIZE, num_warps = calculate_settings(n_cols)
        device = X.device

        Y = torch.empty((n_rows, n_cols), dtype = X.dtype, device = device)
        r = torch.empty(n_rows, dtype = torch.float32, device = device)

        fx = _gemma_rms_layernorm_forward if gemma else _rms_layernorm_forward
        with torch_gpu_device(device):
            fx[(n_rows,)](
                Y,
                Y.stride(0),
                X,
                X.stride(0),
                W,
                W.stride(0),
                r,
                r.stride(0),
                n_cols,
                eps,
                BLOCK_SIZE = BLOCK_SIZE,
                num_warps = num_warps,
            )
        ctx.eps = eps
        ctx.BLOCK_SIZE = BLOCK_SIZE
        ctx.num_warps = num_warps
        ctx.GEMMA = gemma
        ctx.save_for_backward(X, W, r)
        return Y.view(*shape)

    @staticmethod
    def backward(ctx, dY: torch.Tensor):
        shape = dY.shape
        dim: int = shape[-1]
        dY = dY.reshape(-1, dim)
        X, W, r = ctx.saved_tensors
        n_rows: int
        n_cols: int
        n_rows, n_cols = dY.shape
        # dW = X
        dX = torch.empty_like(dY) if ctx.GEMMA else dY

        with torch_gpu_device(dY.device):
            _rms_layernorm_backward[(n_rows,)](
                dY,
                dY.stride(0),
                dX,
                dX.stride(0),
                X,
                X.stride(0),
                W,
                W.stride(0),
                r,
                r.stride(0),
                # dW, dW.stride(0),
                n_cols,
                ctx.eps,
                GEMMA = ctx.GEMMA,
                BLOCK_SIZE = ctx.BLOCK_SIZE,
                num_warps = ctx.num_warps,
            )
        dX = dX.view(*shape)
        return dX, None, None, None


# [TODO] Unsure why RMS Layernorm is not torch.compiling properly
@torch.compiler.disable
def fast_rms_layernorm(layernorm, X: torch.Tensor, gemma: bool = False):
    W: torch.Tensor = layernorm.weight
    eps: float = (
        layernorm.variance_epsilon
        if hasattr(layernorm, "variance_epsilon")
        else layernorm.eps
    )
    out = Fast_RMS_Layernorm.apply(X, W, eps, gemma)
    return out


from transformers.models.llama.modeling_llama import LlamaRMSNorm


class Unsloth_LlamaRMSNorm(LlamaRMSNorm):
    def forward(self, X):
        return fast_rms_layernorm(self, X, gemma = False)


try:
    from transformers.models.mllama.modeling_mllama import MllamaTextRMSNorm

    class Unsloth_MllamaTextRMSNorm(MllamaTextRMSNorm):
        def forward(self, X):
            return fast_rms_layernorm(self, X, gemma = False)


except:
    pass


def patch_rms_layernorm():
    import transformers.models.llama.modeling_llama

    transformers.models.llama.modeling_llama.LlamaRMSNorm = Unsloth_LlamaRMSNorm
    try:
        import transformers.models.mllama.modeling_mllama

        transformers.models.mllama.modeling_mllama.MllamaTextRMSNorm = (
            Unsloth_MllamaTextRMSNorm
        )
    except:
        pass
    return


def unpatch_rms_layernorm():
    import transformers.models.llama.modeling_llama

    transformers.models.llama.modeling_llama.LlamaRMSNorm = LlamaRMSNorm
    try:
        import transformers.models.mllama.modeling_mllama

        transformers.models.mllama.modeling_mllama.MllamaTextRMSNorm = MllamaTextRMSNorm
    except:
        pass
    return


def test_rms_layernorm(
    dim = 1024,
    eps = 1e-5,
    dtype = torch.float16,
    bsz = 21,
    random_state = 3407,
    seqlen = 3341,
):
    from transformers.models.llama.modeling_llama import LlamaRMSNorm

    layernorm = LlamaRMSNorm((dim,), eps = eps).to("cuda")
    torch.cuda.manual_seed(random_state)
    torch.manual_seed(random_state)
    torch.nn.init.uniform_(layernorm.weight)
    X = torch.randn((bsz, seqlen, dim), dtype = dtype, device = "cuda")
    XX = X.clone()
    X.requires_grad_(True)
    XX.requires_grad_(True)
    Y = layernorm(X)
    YY = torch.randn((bsz, seqlen, dim), dtype = dtype, device = "cuda", requires_grad = True)
    Y.backward(YY)
    correct_grad = X.grad.clone()
    # from unsloth.kernels import fast_rms_layernorm
    Y = fast_rms_layernorm(layernorm, XX)
    Y.backward(YY)
    assert torch.amax(correct_grad - XX.grad).item() <= 0.05


def testing_suite_layernorm():
    for dim in [512, 1024, 2048]:
        for dtype in [torch.float16, torch.bfloat16]:
            with torch.autocast(device_type = "cuda", dtype = dtype):
                for seqlen in [3341, 2048, 349]:
                    for random_state in [3407, 42]:
                        test_rms_layernorm(
                            dim = dim,
                            eps = 1e-5,
                            dtype = dtype,
                            bsz = 21,
                            random_state = random_state,
                            seqlen = seqlen,
                        )


================================================
FILE: unsloth/kernels/rope_embedding.py
================================================
# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

import triton
import triton.language as tl
import torch
from ..device_type import DEVICE_COUNT
from .utils import calculate_settings, torch_gpu_device, torch_device_stream


def _rope_embedding_QK(
    Q,
    Q_batch_stride,
    Q_head_stride,
    Q_seq_stride,
    K,
    K_batch_stride,
    K_head_stride,
    K_seq_stride,
    cos,
    cos_row_stride,
    sin,
    sin_row_stride,
    rope_embedding_indices,
    seqlen,
    head_dim: tl.constexpr,
    n_heads_K: tl.constexpr,
    BACKWARD_PASS: tl.constexpr,
    HAS_ROPE_INDICES: tl.constexpr,
    BLOCK_SIZE: tl.constexpr,
):
    row_position = tl.program_id(0)
    head_position = tl.program_id(1)
    col_offsets = tl.arange(0, BLOCK_SIZE)
    half_head_dim = head_dim // 2
    mask = col_offsets < half_head_dim

    if HAS_ROPE_INDICES:
        rot_position = tl.load(
            rope_embedding_indices + row_position,
            eviction_policy = "evict_first",
        ).to(tl.int32)
    else:
        rot_position = row_position % seqlen

    cos_ptr = cos + rot_position * cos_row_stride
    sin_ptr = sin + rot_position * sin_row_stride
    sin1 = tl.load(
        sin_ptr + col_offsets,
        mask = mask,
        other = 0,
    )
    cos1 = tl.load(
        cos_ptr + col_offsets,
        mask = mask,
        other = 0,
    )
    if BACKWARD_PASS:
        sin1 = -sin1

    batch_id = row_position // seqlen
    seq_index = row_position - batch_id * seqlen

    q_ptr = (
        Q
        + batch_id * Q_batch_stride
        + head_position * Q_head_stride
        + seq_index * Q_seq_stride
    )
    q0 = tl.load(q_ptr + col_offsets, mask = mask, other = 0)
    q1 = tl.load(q_ptr + half_head_dim + col_offsets, mask = mask, other = 0)
    tl.store(q_ptr + col_offsets, q0 * cos1 - q1 * sin1, mask = mask)
    tl.store(q_ptr + half_head_dim + col_offsets, q1 * cos1 + q0 * sin1, mask = mask)

    if head_position < n_heads_K:
        k_ptr = (
            K
            + batch_id * K_batch_stride
            + head_position * K_head_stride
            + seq_index * K_seq_stride
        )
        k0 = tl.load(k_ptr + col_offsets, mask = mask, other = 0)
        k1 = tl.load(k_ptr + half_head_dim + col_offsets, mask = mask, other = 0)
        tl.store(k_ptr + col_offsets, k0 * cos1 - k1 * sin1, mask = mask)
        tl.store(k_ptr + half_head_dim + col_offsets, k1 * cos1 + k0 * sin1, mask = mask)


_rope_embedding_QK = triton.jit(_rope_embedding_QK)
_rope_embedding_QK = triton.heuristics(
    {
        "BACKWARD_PASS": lambda args: bool(args["BACKWARD_PASS"]),
        "HAS_ROPE_INDICES": lambda args: bool(args["HAS_ROPE_INDICES"]),
    }
)(_rope_embedding_QK)


ROPE_GROUP_SIZE: int = 4


def _rope_embedding(
    Q,
    Q_row_stride: tl.constexpr,
    cos,
    cos_row_stride: tl.constexpr,
    sin,
    sin_row_stride: tl.constexpr,
    seqlen,
    head_dim: tl.constexpr,
    n_heads: tl.constexpr,
    BACKWARD_PASS: tl.constexpr,
    BLOCK_SIZE: tl.constexpr,
):
    """
    Calculates the RoPE Embedding quickly
    RoPE is Q * cos + rotate_half(Q) * sin
    See our blog post for more info
    """
    ROPE_GROUP_SIZE = 4
    row_position = tl.program_id(0)
    group_head_position = tl.program_id(1)
    col_offsets = tl.arange(0, BLOCK_SIZE)
    half_head_dim = head_dim // 2
    mask = col_offsets < half_head_dim

    sin1 = tl.load(
        sin
        + (row_position % seqlen) * sin_row_stride
        + half_head_dim * 0
        + col_offsets,
        mask = mask,
        other = 0,
    )
    cos1 = tl.load(
        cos
        + (row_position % seqlen) * cos_row_stride
        + half_head_dim * 0
        + col_offsets,
        mask = mask,
        other = 0,
    )

    if BACKWARD_PASS:
        # See our blog post for more info.
        sin1 = -sin1

    # [TODO] Autotune ROPE_GROUP_SIZE to be 1, 2, 4, 8
    head_start = group_head_position * ROPE_GROUP_SIZE
    head_end = min((head_start + ROPE_GROUP_SIZE), n_heads)

    # 10% Faster kernel from [HuyNguyen-hust](https://github.com/unslothai/unsloth/pull/238)
    for k in range(head_start, head_end):
        offs_q1 = row_position * Q_row_stride + k * head_dim + col_offsets
        offs_q2 = (
            row_position * Q_row_stride + k * head_dim + col_offsets + half_head_dim
        )

        # For Gemma - sometimes RoPE must be done in float32 and not bfloat16
        Q1 = tl.load(Q + offs_q1, mask = mask, other = 0).to(sin1.dtype)
        Q2 = tl.load(Q + offs_q2, mask = mask, other = 0).to(sin1.dtype)

        tl.store(Q + offs_q1, Q1 * cos1 - Q2 * sin1, mask = mask)
        tl.store(Q + offs_q2, Q2 * cos1 + Q1 * sin1, mask = mask)


_rope_embedding = triton.jit(_rope_embedding)
_rope_embedding = triton.heuristics(
    {
        "BACKWARD_PASS": lambda args: bool(args["BACKWARD_PASS"]),
    }
)(_rope_embedding)


class Fast_RoPE_Embedding(torch.autograd.Function):
    @staticmethod
    def forward(ctx, Q, cos, sin):
        cos, sin = cos.squeeze(), sin.squeeze()
        batch: int
        seq_len: int
        n_heads: int
        head_dim: int
        batch, seq_len, n_heads, head_dim = Q.shape
        Q = Q.reshape(batch * seq_len, n_heads * head_dim)
        n_rows: int
        n_cols: int
        n_rows, n_cols = Q.shape
        assert seq_len <= cos.shape[0]

        # [TODO] Changing blocksize to head_dim//2 seems to have
        # some concurrency / un-deterministic issues.
        BLOCK_SIZE, num_warps = calculate_settings(head_dim // 2)  # (head_dim//2)

        # group_size = 4 # 4 or 8, too large group_size can hurt performance.
        div: int
        mod: int
        div, mod = divmod(n_heads, ROPE_GROUP_SIZE)
        n_groups: int = div + (mod != 0)

        with torch_gpu_device(Q.device):
            _rope_embedding[
                (
                    n_rows,
                    n_groups,
                )
            ](
                Q,
                Q.stride(0),
                cos,
                cos.stride(0),
                sin,
                sin.stride(0),
                seq_len,
                head_dim,
                n_heads,
                BACKWARD_PASS = False,
                BLOCK_SIZE = BLOCK_SIZE,
                num_warps = num_warps,
            )
        ctx.BLOCK_SIZE = BLOCK_SIZE
        ctx.num_warps = num_warps
        ctx.n_groups = n_groups
        ctx.cos = cos
        ctx.sin = sin
        return Q.reshape(batch, seq_len, n_heads, head_dim)

    @staticmethod
    def backward(ctx, dY):
        batch: int
        seq_len: int
        n_heads: int
        head_dim: int
        batch, seq_len, n_heads, head_dim = dY.shape
        dY = dY.reshape(batch * seq_len, n_heads * head_dim)
        n_rows: int
        n_cols: int
        n_rows, n_cols = dY.shape

        cos = ctx.cos
        sin = ctx.sin

        with torch_gpu_device(dY.device):
            _rope_embedding[
                (
                    n_rows,
                    ctx.n_groups,
                )
            ](
                dY,
                dY.stride(0),
                cos,
                cos.stride(0),
                sin,
                sin.stride(0),
                seq_len,
                head_dim,
                n_heads,
                BACKWARD_PASS = True,
                BLOCK_SIZE = ctx.BLOCK_SIZE,
                num_warps = ctx.num_warps,
            )
        dY = dY.reshape(batch, seq_len, n_heads, head_dim)
        return (
            dY,
            None,
            None,
        )


# [TODO] Unsure why RoPE Embedding is not torch.compiling properly
@torch.compiler.disable
def fast_rope_embedding(
    Q,
    K,
    cos,
    sin,
    rope_embedding_indices = None,
):
    if rope_embedding_indices is not None:
        Q_out, K_out = Fast_RoPE_Embedding_QK.apply(
            Q, K, cos, sin, rope_embedding_indices
        )
    else:
        Q_out = Fast_RoPE_Embedding.apply(
            Q.transpose(1, 2).contiguous(), cos, sin
        ).transpose(1, 2)
        K_out = Fast_RoPE_Embedding.apply(
            K.transpose(1, 2).contiguous(), cos, sin
        ).transpose(1, 2)
    if DEVICE_COUNT > 1:
        torch_device_stream(Q.device).synchronize()
    return Q_out, K_out


class Fast_RoPE_Embedding_QK(torch.autograd.Function):
    @staticmethod
    def forward(ctx, Q, K, cos, sin, rope_indices):
        has_indices = rope_indices is not None
        cos, sin = cos.squeeze(), sin.squeeze()

        batch, n_heads_Q, seq_len, head_dim = Q.shape
        _, n_heads_K, _, _ = K.shape

        # Inplace rotary embedding is generally fine
        Q_out = Q.clone() if not Q.is_contiguous() else Q
        K_out = K.clone() if not K.is_contiguous() else K

        if has_indices:
            # TRL's rotary indices are always in int32, so casting is just for safety
            rope_ptr = rope_indices.reshape(-1).to(dtype = torch.int32, device = Q.device)
        else:
            rope_ptr = cos.new_empty(1, dtype = torch.int32)

        BLOCK_SIZE, num_warps = calculate_settings(head_dim)

        Q_batch_stride, Q_head_stride, Q_seq_stride = (
            Q_out.stride(0),
            Q_out.stride(1),
            Q_out.stride(2),
        )
        K_batch_stride, K_head_stride, K_seq_stride = (
            K_out.stride(0),
            K_out.stride(1),
            K_out.stride(2),
        )

        with torch_gpu_device(Q.device):
            _rope_embedding_QK[(batch * seq_len, n_heads_Q)](
                Q_out,
                Q_batch_stride,
                Q_head_stride,
                Q_seq_stride,
                K_out,
                K_batch_stride,
                K_head_stride,
                K_seq_stride,
                cos,
                cos.stride(0),
                sin,
                sin.stride(0),
                rope_ptr,
                seq_len,
                head_dim = head_dim,
                n_heads_K = n_heads_K,
                BACKWARD_PASS = False,
                HAS_ROPE_INDICES = has_indices,
                BLOCK_SIZE = BLOCK_SIZE,
                num_warps = num_warps,
            )

        ctx.block_size = BLOCK_SIZE
        ctx.num_warps = num_warps
        ctx.has_indices = has_indices
        ctx.cos = cos
        ctx.sin = sin
        ctx.rope_indices = rope_ptr if has_indices else None
        ctx.seq_len = seq_len
        ctx.n_heads_Q = n_heads_Q
        ctx.n_heads_K = n_heads_K

        return (
            Q_out,
            K_out,
        )

    @staticmethod
    def backward(ctx, dQ, dK):
        batch, _, _, head_dim = dQ.shape

        rope_ptr = (
            ctx.rope_indices
            if ctx.has_indices
            else ctx.cos.new_empty(1, dtype = torch.int32)
        )

        # Inplace rotary embedding is generally fine
        dQ_out = dQ.clone() if not dQ.is_contiguous() else dQ
        dK_out = dK.clone() if not dK.is_contiguous() else dK

        Q_batch_stride, Q_head_stride, Q_seq_stride = (
            dQ_out.stride(0),
            dQ_out.stride(1),
            dQ_out.stride(2),
        )
        K_batch_stride, K_head_stride, K_seq_stride = (
            dK_out.stride(0),
            dK_out.stride(1),
            dK_out.stride(2),
        )

        with torch_gpu_device(dQ.device):
            _rope_embedding_QK[(batch * ctx.seq_len, ctx.n_heads_Q)](
                dQ_out,
                Q_batch_stride,
                Q_head_stride,
                Q_seq_stride,
                dK_out,
                K_batch_stride,
                K_head_stride,
                K_seq_stride,
                ctx.cos,
                ctx.cos.stride(0),
                ctx.sin,
                ctx.sin.stride(0),
                rope_ptr,
                ctx.seq_len,
                head_dim = head_dim,
                n_heads_K = ctx.n_heads_K,
                BACKWARD_PASS = True,
                HAS_ROPE_INDICES = ctx.has_indices,
                BLOCK_SIZE = ctx.block_size,
                num_warps = ctx.num_warps,
            )

        return (dQ_out, dK_out, None, None, None)


class Slow_RoPE_Embedding(torch.autograd.Function):
    @staticmethod
    def forward(ctx, Q, cos, sin, position_ids):
        if position_ids is not None:
            # The first two dimensions of cos and sin are always 1, so we can `squeeze` them.
            cos = cos.squeeze(1).squeeze(0)  # [seq_len, dim]
            sin = sin.squeeze(1).squeeze(0)  # [seq_len, dim]
            cos = cos[position_ids].unsqueeze(2)  # [bs, seq_len, 1, dim]
            sin = sin[position_ids].unsqueeze(2)  # [bs, seq_len, 1, dim]

        # Q * cos + rotate_half(Q) * sin
        half = Q.shape[-1] // 2
        RH_Q = torch.cat((-Q[..., half:], Q[..., :half]), dim = -1)
        Q *= cos
        Q.addcmul_(RH_Q, sin)
        # RH_Q *= sin
        # Q += RH_Q
        ctx.save_for_backward(cos, sin)
        return Q

    @staticmethod
    def backward(ctx, dY):
        cos, sin = ctx.saved_tensors
        # Q * cos + rotate_half.T(Q) * sin
        half = dY.shape[-1] // 2
        RH_dY = torch.cat((dY[..., half:], -dY[..., :half]), dim = -1)
        dY *= cos
        dY.addcmul_(RH_dY, sin)
        # RH_dY *= sin
        # dY += RH_dY
        return dY, None, None, None


def inplace_rope_embedding(Q, K, cos, sin, position_ids):
    Q = Slow_RoPE_Embedding.apply(Q, cos, sin, position_ids)
    K = Slow_RoPE_Embedding.apply(K, cos, sin, position_ids)
    torch_device_stream(Q.device).synchronize()
    return Q, K


================================================
FILE: unsloth/kernels/swiglu.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import triton
import triton.language as tl
import torch
from .utils import calculate_settings, torch_gpu_device

# signed int32 max is 2**31-1 so num_elements cannot exceed 2**31
NUM_INT32_ELEMENTS = 2**31
SAFE_INT32_BUFFER_MULTIPLIER = 4
BLOCK_SIZE = 1024
INT32_SAFETY_BUFFER = NUM_INT32_ELEMENTS - BLOCK_SIZE * SAFE_INT32_BUFFER_MULTIPLIER


@triton.jit
def _fg_kernel(
    e,
    g,
    h,
    n_elements,
    BLOCK_SIZE: tl.constexpr,
    LONG_INDEXING: tl.constexpr,
):
    block_idx = tl.program_id(0)
    if LONG_INDEXING:
        offsets = block_idx.to(tl.int64) * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE).to(
            tl.int64
        )
        n_elements = tl.cast(n_elements, tl.int64)
    else:
        offsets = block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
    mask = offsets < n_elements

    e_row = tl.load(e + offsets, mask = mask, other = 0).to(tl.float32)
    g_row = tl.load(g + offsets, mask = mask, other = 0)  # .to(tl.float32)

    # f = e * sigmoid(e)
    f_row = e_row * tl.sigmoid(e_row)  # e_row / (1 + tl.exp(-e_row))
    f_row = f_row.to(g_row.dtype)  # Exact copy from HF
    # h = f * g
    h_row = f_row * g_row

    # Store h
    tl.store(h + offsets, h_row, mask = mask)


def swiglu_fg_kernel(e, g):
    batch, seq_len, hd = e.shape
    n_elements = e.numel()
    h = torch.empty((batch, seq_len, hd), dtype = e.dtype, device = e.device)
    grid = lambda meta: (triton.cdiv(n_elements, meta["BLOCK_SIZE"]),)
    with torch_gpu_device(e.device):
        _fg_kernel[grid](
            e,
            g,
            h,
            n_elements,
            BLOCK_SIZE = BLOCK_SIZE,
            LONG_INDEXING = 0 if n_elements <= INT32_SAFETY_BUFFER else 1,
        )
    return h


@triton.jit
def _DWf_DW_dfg_kernel(
    DW,
    e,
    g,
    n_elements,
    BLOCK_SIZE: tl.constexpr,
    LONG_INDEXING: tl.constexpr,
):
    """
    e = e.float()
    se = 1.0 / (1.0 + torch.exp(-e))
    f = (se * e).to(dtype)
    h = f * g
    df = DW * f
    dg = DW * g
    de = (dg.float() * se * (1.0 + e * (1.0 - se))).to(dtype)
    """
    block_idx = tl.program_id(0)
    if LONG_INDEXING:
        offsets = block_idx.to(tl.int64) * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE).to(
            tl.int64
        )
        n_elements = tl.cast(n_elements, tl.int64)
    else:
        offsets = block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
    mask = offsets < n_elements

    DW_row = tl.load(DW + offsets, mask = mask, other = 0)  # .to(tl.float32)
    e_row = tl.load(e + offsets, mask = mask, other = 0).to(tl.float32)
    g_row = tl.load(g + offsets, mask = mask, other = 0)  # .to(tl.float32)

    # e = e.float()
    # se = 1.0 / (1.0 + torch.exp(-e))
    se_row = tl.sigmoid(e_row)  # 1.0 / (1.0 + tl.exp(-e_row))
    # f = (se * e).to(dtype)
    f_row = se_row * e_row
    f_row = f_row.to(DW_row.dtype)
    # h = f * g
    h_row = f_row * g_row
    # df = DW * f
    df_row = DW_row * f_row
    # dg = DW * g
    dg_row = DW_row * g_row
    # de = (dg.float() * se * (1.0 + e * (1.0 - se))).to(dtype)
    de_row = dg_row.to(tl.float32) * se_row * (1.0 + e_row * (1.0 - se_row))
    de_row = de_row.to(DW_row.dtype)

    # Store derivatives in buffers
    tl.store(DW + offsets, h_row, mask = mask)  # h  = f * g
    tl.store(e + offsets, df_row, mask = mask)  # df = DW * f
    tl.store(g + offsets, de_row, mask = mask)  # de


def swiglu_DWf_DW_dfg_kernel(DW, e, g):
    batch_seq_len, hd = e.shape  # Flattened to 2D, so 1st dim is bsz * seq_len
    n_elements = e.numel()
    grid = lambda meta: (triton.cdiv(n_elements, meta["BLOCK_SIZE"]),)
    with torch_gpu_device(e.device):
        _DWf_DW_dfg_kernel[grid](
            DW,
            e,
            g,
            n_elements,
            BLOCK_SIZE = BLOCK_SIZE,
            LONG_INDEXING = 0 if n_elements <= INT32_SAFETY_BUFFER else 1,
        )
    return DW, e, g


================================================
FILE: unsloth/kernels/utils.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import importlib
import triton
import ctypes

MAX_FUSED_SIZE: int = 65536
next_power_of_2 = triton.next_power_of_2
import functools
from typing import Optional

from ..device_type import (
    is_hip,
    get_device_type,
    DEVICE_TYPE,
    DEVICE_TYPE_TORCH,
    DEVICE_COUNT,
    ALLOW_PREQUANTIZED_MODELS,
)
from .fp8 import weight_dequant, fp8_linear
import functools

# torch.cuda.amp.custom_fwd is deprecated >= 2.4
import torch

torch_Tensor = torch.Tensor
from unsloth_zoo.utils import Version

if DEVICE_TYPE == "xpu" and Version(torch.__version__) < Version("2.6.0"):
    raise RuntimeError(
        "Intel xpu currently supports unsloth with torch.version >= 2.6.0"
    )

if Version(torch.__version__) < Version("2.4.0"):
    torch_amp_custom_fwd = torch.cuda.amp.custom_fwd
    torch_amp_custom_bwd = torch.cuda.amp.custom_bwd
else:
    torch_amp_custom_fwd = torch.amp.custom_fwd(device_type = "cuda")
    torch_amp_custom_bwd = torch.amp.custom_bwd(device_type = "cuda")

if DEVICE_TYPE == "xpu":
    torch_amp_custom_fwd = torch.amp.custom_fwd(device_type = "xpu")
    torch_amp_custom_bwd = torch.amp.custom_bwd(device_type = "xpu")


# tl.math.tanh now is libdevice.tanh
import triton
import triton.language as tl

if Version(triton.__version__) >= Version("3.0.0"):
    if DEVICE_TYPE == "xpu":
        triton_tanh = tl.extra.intel.libdevice.tanh
    else:
        from triton.language.extra import libdevice

        triton_tanh = libdevice.tanh
    triton_cast = tl.cast
else:
    triton_tanh = tl.math.tanh

    # No casting in old Triton versions
    @triton.jit
    def triton_cast(x, dtype):
        return x.to(dtype)


@functools.lru_cache(1)
def is_cdna():
    return is_hip() and triton.runtime.driver.active.get_current_target().arch in (
        "gfx940",
        "gfx941",
        "gfx942",
        "gfx950",  # CDNA4 (MI350/MI355X)
    )


@functools.lru_cache(1)
def is_rdna():
    """Detect ROCm-supported RDNA consumer/workstation GPUs (RDNA3, RDNA4)."""
    return is_hip() and triton.runtime.driver.active.get_current_target().arch in (
        "gfx1100",
        "gfx1101",
        "gfx1200",
        "gfx1201",
    )


def calculate_settings(
    n: int,
) -> (
    int,
    int,
):
    BLOCK_SIZE: int = next_power_of_2(n)
    if BLOCK_SIZE > MAX_FUSED_SIZE:
        raise RuntimeError(
            f"Cannot launch Triton kernel since n = {n} exceeds "
            f"the maximum CUDA blocksize = {MAX_FUSED_SIZE}."
        )
    num_warps: int = 4
    if BLOCK_SIZE >= 32768:
        num_warps = 32
    elif BLOCK_SIZE >= 8192:
        num_warps = 16
    elif BLOCK_SIZE >= 2048:
        num_warps = 8
    return BLOCK_SIZE, num_warps


HAS_CUDA_STREAM = False
import bitsandbytes as bnb

# https://github.com/bitsandbytes-foundation/bitsandbytes/pull/1330/files
HAS_CUDA_STREAM = Version(bnb.__version__) > Version("0.43.3")
get_ptr = bnb.functional.get_ptr

if DEVICE_TYPE == "xpu":
    HAS_XPU_STREAM = True

if DEVICE_COUNT > 1:
    if DEVICE_TYPE in ("cuda", "hip"):
        torch_gpu_device = torch.cuda.device
    elif DEVICE_TYPE == "xpu":
        torch_gpu_device = torch.xpu.device
else:
    from contextlib import nullcontext

    def torch_gpu_device(device):
        return nullcontext()


# INTEL GPU Specific Logic
if DEVICE_TYPE == "xpu":
    _gpu_getCurrentRawStream = torch._C._xpu_getCurrentRawStream
# NVIDIA GPU Default Logic
else:
    _gpu_getCurrentRawStream = torch._C._cuda_getCurrentRawStream

c_void_p = ctypes.c_void_p


def _get_tensor_stream(tensor: torch_Tensor) -> c_void_p:
    return c_void_p(_gpu_getCurrentRawStream(tensor.device.index))


# Get array of CUDA streams and other buffers
global CUDA_STREAMS
global XPU_STREAMS
global WEIGHT_BUFFERS
global ABSMAX_BUFFERS

# INTEL GPU Specific Logic
if DEVICE_TYPE == "xpu":
    _XPU_STREAMS = {
        (index := torch.xpu.device(i).idx): ctypes.c_void_p(
            torch._C._xpu_getCurrentRawStream(index)
        )
        for i in range(DEVICE_COUNT)
    }
    XPU_STREAMS = [None] * (max(_XPU_STREAMS.keys()) + 1)
    WEIGHT_BUFFERS = [None] * (max(_XPU_STREAMS.keys()) + 1)
    ABSMAX_BUFFERS = [None] * (max(_XPU_STREAMS.keys()) + 1)
    for k, v in _XPU_STREAMS.items():
        XPU_STREAMS[k] = v
    XPU_STREAMS = tuple(XPU_STREAMS)
    del _XPU_STREAMS
else:
    # NVIDIA GPU Default Logic
    _CUDA_STREAMS = {
        (index := torch.cuda.device(i).idx): ctypes.c_void_p(
            torch._C._cuda_getCurrentRawStream(index)
        )
        for i in range(DEVICE_COUNT)
    }
    CUDA_STREAMS = [None] * (max(_CUDA_STREAMS.keys()) + 1)
    WEIGHT_BUFFERS = [None] * (max(_CUDA_STREAMS.keys()) + 1)
    ABSMAX_BUFFERS = [None] * (max(_CUDA_STREAMS.keys()) + 1)
    for k, v in _CUDA_STREAMS.items():
        CUDA_STREAMS[k] = v
    CUDA_STREAMS = tuple(CUDA_STREAMS)
    del _CUDA_STREAMS

# Bitsandbytes operations
ctypes_c_int = ctypes.c_int
ctypes_c_int32 = ctypes.c_int32
cdequantize_blockwise_fp32 = bnb.functional.lib.cdequantize_blockwise_fp32
cdequantize_blockwise_fp16_nf4 = bnb.functional.lib.cdequantize_blockwise_fp16_nf4
cdequantize_blockwise_bf16_nf4 = bnb.functional.lib.cdequantize_blockwise_bf16_nf4

if DEVICE_TYPE == "xpu":
    # https://github.com/bitsandbytes-foundation/bitsandbytes/blob/c3b8de268fdb55a88f92feada23fc811a1e6877a/bitsandbytes/backends/xpu/ops.py#L115
    # for xpu, inference gemv using above link
    cgemm_4bit_inference_naive_fp16 = bnb.functional.lib.cgemv_4bit_inference_fp16
    cgemm_4bit_inference_naive_bf16 = bnb.functional.lib.cgemv_4bit_inference_bf16
else:
    cgemm_4bit_inference_naive_fp16 = bnb.functional.lib.cgemm_4bit_inference_naive_fp16
    cgemm_4bit_inference_naive_bf16 = bnb.functional.lib.cgemm_4bit_inference_naive_bf16


torch_device_stream = (
    torch.xpu.current_stream if DEVICE_TYPE == "xpu" else torch.cuda.current_stream
)

torch_mm = torch.mm
torch_mv = torch.mv
torch_matmul = torch.matmul
torch_addmm = torch.addmm
torch_empty = torch.empty
torch_float32 = torch.float32
torch_float16 = torch.float16
torch_bfloat16 = torch.bfloat16


# Check whether torchao can be imported to get Float8Tensor
if importlib.util.find_spec("torchao") is not None:
    try:
        from torchao.quantization import Float8Tensor
    except:
        import torchao

        if Version(torchao.__version__) >= Version("0.15.0"):
            print(
                f"Unsloth: `from torchao.quantization import Float8Tensor` failed on version={torchao.__version__}"
            )
        Float8Tensor = type(None)
else:
    Float8Tensor = type(None)


def QUANT_STATE(W):
    return getattr(W, "quant_state", None)


def get_lora_parameters(proj):
    """
    Return a 5-tuple of (weight, weight quant_state, lora A, lora B, and lora scale).
    If QAT is enabled, additionally fake quantize the base layer and lora weights.
    """
    # For DPO or disabled adapters
    base_layer = getattr(
        proj, "base_layer", proj
    )  # (proj.base_layer if hasattr(proj, "base_layer") else proj)
    W = base_layer.weight

    # Optionally apply fake quantization to base layer weights for QAT
    if hasattr(base_layer, "weight_fake_quantizer"):
        weight_fake_quantizer = getattr(base_layer, "weight_fake_quantizer", None)
        if weight_fake_quantizer is not None:
            W = weight_fake_quantizer(W)

    # Get quant state for 4bit or FP8
    W_quant = getattr(W, "quant_state", None)
    if W_quant is None:
        W_quant = getattr(base_layer, "weight_scale_inv", None)
        if W_quant is None:
            W_quant = getattr(base_layer, "weight_scale", None)

    if getattr(base_layer, "quant_method", None) == "fp8":
        # we need to somehow store and pass this information :)
        W.block_size = getattr(base_layer, "block_size", [128, 128])
        W_quant.block_size = W.block_size

    # if not hasattr(proj, "disable_adapters") or proj.disable_adapters or proj.merged:
    if getattr(proj, "disable_adapters", True) or proj.merged:
        return W, W_quant, None, None, None

    adapter = getattr(proj, "active_adapters", None)
    if adapter is None:
        adapter = getattr(proj, "active_adapter", ("default"))
    adapter = adapter[0]

    # Optionally apply fake quantization to lora weights for QAT
    lora_A_linear = proj.lora_A[adapter]
    lora_B_linear = proj.lora_B[adapter]
    A = lora_A_linear.weight
    B = lora_B_linear.weight
    if hasattr(lora_A_linear, "weight_fake_quantizer"):
        lora_A_fake_quantizer = getattr(lora_A_linear, "weight_fake_quantizer", None)
        if lora_A_fake_quantizer is not None:
            A = lora_A_fake_quantizer(A)
    if hasattr(lora_B_linear, "weight_fake_quantizer"):
        lora_B_fake_quantizer = getattr(lora_B_linear, "weight_fake_quantizer", None)
        if lora_B_fake_quantizer is not None:
            B = lora_B_fake_quantizer(B)

    return (
        W,
        W_quant,
        A,
        B,
        proj.scaling[adapter],
    )


def get_lora_parameters_bias(proj):
    # For DPO or disabled adapters
    base_layer = getattr(
        proj, "base_layer", proj
    )  # (proj.base_layer if hasattr(proj, "base_layer") else proj)
    W = base_layer.weight

    # Get quant state for 4bit or FP8
    W_quant = getattr(W, "quant_state", None)
    if W_quant is None:
        W_quant = getattr(base_layer, "weight_scale_inv", None)
        if W_quant is None:
            W_quant = getattr(base_layer, "weight_scale", None)

    # if not hasattr(proj, "disable_adapters") or proj.disable_adapters or proj.merged:
    if getattr(proj, "disable_adapters", True) or proj.merged:
        return W, W_quant, None, None, None, base_layer.bias

    if getattr(base_layer, "quant_method", None) == "fp8":
        # we need to somehow store and pass this information :)
        W.block_size = getattr(base_layer, "block_size", [128, 128])
        W_quant.block_size = W.block_size

    adapter = getattr(proj, "active_adapters", None)
    if adapter is None:
        adapter = getattr(proj, "active_adapter", ("default"))
    adapter = adapter[0]

    return (
        W,
        W_quant,
        proj.lora_A[adapter].weight,
        proj.lora_B[adapter].weight,
        proj.scaling[adapter],
        base_layer.bias,
    )


def _maybe_fake_quantize_activations(
    X: torch.Tensor, proj: torch.nn.Module
) -> torch.Tensor:
    """
    If QAT is enabled, fake quantize the input activations.
    Otherwise, just return the input activations as is.
    Weights are fake quantized separately in `get_lora_parameters`.
    """
    base_layer = getattr(proj, "base_layer", proj)
    activation_fake_quantizer = getattr(base_layer, "activation_fake_quantizer", None)
    if activation_fake_quantizer is not None:
        X = activation_fake_quantizer(X)
    return X


# INTEL GPU Specific Logic
if DEVICE_TYPE == "xpu" and HAS_XPU_STREAM:

    @torch.inference_mode
    def fast_dequantize(W, quant_state = None, out = None, use_global_buffer = False):
        # TODO: After adding XPU BNB support, check this function
        if isinstance(W, Float8Tensor):
            return W.dequantize()
        if quant_state is None:
            return W
        if W.dtype == torch.float8_e4m3fn:
            return weight_dequant(W, quant_state)
        if type(quant_state) is not list:
            # New quant_state as a class
            # https://github.com/TimDettmers/bitsandbytes/pull/763/files
            absmax = quant_state.absmax
            shape = quant_state.shape
            dtype = quant_state.dtype
            blocksize = quant_state.blocksize
            offset = quant_state.offset
            state2 = quant_state.state2
            absmax2 = state2.absmax
            code2 = state2.code
            blocksize2 = state2.blocksize
        else:
            # Old quant_state as a list of lists
            absmax, shape, dtype, blocksize, compressed_stats, _, _ = quant_state
            offset, state2 = compressed_stats
            absmax2, code2, blocksize2, _, _, _, _ = state2
        global XPU_STREAMS
        device = W.device
        device_index = device.index
        XPU_STREAM = XPU_STREAMS[device_index]

        n_elements_absmax = absmax.numel()
        # Create weight matrix
        if use_global_buffer:
            # Use same buffers for faster inference
            size = shape[0] * shape[1]
            global WEIGHT_BUFFERS
            global ABSMAX_BUFFERS
            WEIGHT_BUFFER = WEIGHT_BUFFERS[device_index]
            ABSMAX_BUFFER = ABSMAX_BUFFERS[device_index]
            if WEIGHT_BUFFER is None or WEIGHT_BUFFER.dtype != dtype:
                WEIGHT_BUFFERS[device_index] = WEIGHT_BUFFER = torch_empty(
                    size, dtype = dtype, device = device, requires_grad = False
                )
                ABSMAX_BUFFERS[device_index] = ABSMAX_BUFFER = torch_empty(
                    n_elements_absmax,
                    dtype = torch.float32,
                    device = device,
                    requires_grad = False,
                )

            if size > WEIGHT_BUFFER.numel():
                WEIGHT_BUFFER.resize_(size)
            if n_elements_absmax > ABSMAX_BUFFER.numel():
                ABSMAX_BUFFER.resize_(n_elements_absmax)

            out = WEIGHT_BUFFER[:size].view(shape)
            out_absmax = ABSMAX_BUFFER[:n_elements_absmax]
        else:
            if out is None:
                out = torch_empty(
                    shape, dtype = dtype, device = device, requires_grad = False
                )
            else:
                assert out.shape == shape
                assert out.dtype == dtype
            out_absmax = torch_empty(
                n_elements_absmax,
                dtype = torch_float32,
                device = device,
                requires_grad = False,
            )

        # NF4 dequantization of statistics
        ptr_out_absmax = get_ptr(out_absmax)
        with torch_gpu_device(device):
            cdequantize_blockwise_fp32(
                get_ptr(code2),
                get_ptr(absmax),
                get_ptr(absmax2),
                ptr_out_absmax,
                ctypes_c_int(blocksize2),
                ctypes_c_int(n_elements_absmax),
                XPU_STREAM,
            )
            out_absmax += offset

            # Dequantize W
            fx = (
                cdequantize_blockwise_fp16_nf4
                if dtype == torch_float16
                else cdequantize_blockwise_bf16_nf4
            )
            fx(
                get_ptr(None),
                get_ptr(W),
                ptr_out_absmax,
                get_ptr(out),
                ctypes_c_int(blocksize),
                ctypes_c_int(out.numel()),
                XPU_STREAM,
            )
        # Careful returning transposed data
        is_transposed = True if W.shape[0] == 1 else False
        return out.t() if is_transposed else out

# NVIDIA GPU Default Logic
elif DEVICE_TYPE in ("cuda", "hip") and HAS_CUDA_STREAM:

    @torch.inference_mode
    def fast_dequantize(W, quant_state = None, out = None, use_global_buffer = False):
        if isinstance(W, Float8Tensor):
            return W.dequantize()
        if quant_state is None:
            return W
        if W.dtype == torch.float8_e4m3fn:
            return weight_dequant(W, quant_state)
        if type(quant_state) is not list:
            # New quant_state as a class
            # https://github.com/TimDettmers/bitsandbytes/pull/763/files
            absmax = quant_state.absmax
            shape = quant_state.shape
            dtype = quant_state.dtype
            blocksize = quant_state.blocksize
            offset = quant_state.offset
            state2 = quant_state.state2
            absmax2 = state2.absmax
            code2 = state2.code
            blocksize2 = state2.blocksize
        else:
            # Old quant_state as a list of lists
            absmax, shape, dtype, blocksize, compressed_stats, _, _ = quant_state
            offset, state2 = compressed_stats
            absmax2, code2, blocksize2, _, _, _, _ = state2
        pass
        global CUDA_STREAMS
        device = W.device
        device_index = device.index
        CUDA_STREAM = CUDA_STREAMS[device_index]

        n_elements_absmax = absmax.numel()

        # Create weight matrix
        if use_global_buffer:
            # Use same buffers for faster inference
            size = shape[0] * shape[1]
            global WEIGHT_BUFFERS
            global ABSMAX_BUFFERS
            WEIGHT_BUFFER = WEIGHT_BUFFERS[device_index]
            ABSMAX_BUFFER = ABSMAX_BUFFERS[device_index]
            if WEIGHT_BUFFER is None or WEIGHT_BUFFER.dtype != dtype:
                WEIGHT_BUFFERS[device_index] = WEIGHT_BUFFER = torch_empty(
                    size, dtype = dtype, device = device, requires_grad = False
                )
                ABSMAX_BUFFERS[device_index] = ABSMAX_BUFFER = torch_empty(
                    n_elements_absmax,
                    dtype = torch_float32,
                    device = device,
                    requires_grad = False,
                )

            if size > WEIGHT_BUFFER.numel():
                WEIGHT_BUFFER.resize_(size)
            if n_elements_absmax > ABSMAX_BUFFER.numel():
                ABSMAX_BUFFER.resize_(n_elements_absmax)

            out = WEIGHT_BUFFER[:size].view(shape)
            out_absmax = ABSMAX_BUFFER[:n_elements_absmax]
        else:
            if out is None:
                out = torch_empty(
                    shape, dtype = dtype, device = device, requires_grad = False
                )
            else:
                assert out.shape == shape
                assert out.dtype == dtype
            out_absmax = torch_empty(
                n_elements_absmax,
                dtype = torch_float32,
                device = device,
                requires_grad = False,
            )
        pass

        # NF4 dequantization of statistics
        ptr_out_absmax = get_ptr(out_absmax)
        with torch_gpu_device(device):
            cdequantize_blockwise_fp32(
                get_ptr(code2),
                get_ptr(absmax),
                get_ptr(absmax2),
                ptr_out_absmax,
                ctypes_c_int(blocksize2),
                ctypes_c_int(n_elements_absmax),
                CUDA_STREAM,
            )
            out_absmax += offset

            # Dequantize W
            fx = (
                cdequantize_blockwise_fp16_nf4
                if dtype == torch_float16
                else cdequantize_blockwise_bf16_nf4
            )
            fx(
                get_ptr(None),
                get_ptr(W),
                ptr_out_absmax,
                get_ptr(out),
                ctypes_c_int(blocksize),
                ctypes_c_int(out.numel()),
                CUDA_STREAM,
            )
        pass
        # Careful returning transposed data
        is_transposed = True if W.shape[0] == 1 else False
        return out.t() if is_transposed else out

    pass
else:

    @torch.inference_mode
    def fast_dequantize(W, quant_state = None, out = None, use_global_buffer = False):
        if isinstance(W, Float8Tensor):
            return W.dequantize()
        if quant_state is None:
            return W
        if W.dtype == torch.float8_e4m3fn:
            return weight_dequant(W, quant_state)
        if type(quant_state) is not list:
            # New quant_state as a class
            # https://github.com/TimDettmers/bitsandbytes/pull/763/files
            absmax = quant_state.absmax
            shape = quant_state.shape
            dtype = quant_state.dtype
            blocksize = quant_state.blocksize
            offset = quant_state.offset
            state2 = quant_state.state2
            absmax2 = state2.absmax
            code2 = state2.code
            blocksize2 = state2.blocksize
        else:
            # Old quant_state as a list of lists
            absmax, shape, dtype, blocksize, compressed_stats, _, _ = quant_state
            offset, state2 = compressed_stats
            absmax2, code2, blocksize2, _, _, _, _ = state2
        pass

        n_elements_absmax = absmax.numel()
        device = W.device

        # Create weight matrix
        if out is None:
            out = torch_empty(shape, dtype = dtype, device = device, requires_grad = False)
        else:
            assert out.shape == shape
            assert out.dtype == dtype
        out_absmax = torch_empty(
            n_elements_absmax, dtype = torch_float32, device = device, requires_grad = False
        )

        # Do dequantization
        ptr_out_absmax = get_ptr(out_absmax)
        cdequantize_blockwise_fp32(
            get_ptr(code2),
            get_ptr(absmax),
            get_ptr(absmax2),
            ptr_out_absmax,
            ctypes_c_int(blocksize2),
            ctypes_c_int(n_elements_absmax),
        )
        out_absmax += offset

        fx = (
            cdequantize_blockwise_fp16_nf4
            if dtype == torch_float16
            else cdequantize_blockwise_bf16_nf4
        )
        fx(
            get_ptr(None),
            get_ptr(W),
            ptr_out_absmax,
            get_ptr(out),
            ctypes_c_int(blocksize),
            ctypes_c_int(out.numel()),
        )

        # Careful returning transposed data
        is_transposed = True if W.shape[0] == 1 else False
        return out.t() if is_transposed else out

    pass


# INTEL GPU Specific Logic
if DEVICE_TYPE == "xpu" and HAS_XPU_STREAM:

    def fast_gemv(X, W, quant_state, out = None):
        if quant_state is None:
            return torch_matmul(X, W, out = out)
        # For fast X @ W where seq_len == 1
        # From https://github.com/TimDettmers/bitsandbytes/blob/main/bitsandbytes/functional.py#L1469
        _, q_len, hd = X.shape
        # assert(q_len == 1)

        if type(quant_state) is not list:
            # https://github.com/TimDettmers/bitsandbytes/pull/763/files
            absmax = quant_state.absmax
            shape = quant_state.shape
            dtype = quant_state.dtype
            blocksize = quant_state.blocksize
            stats = quant_state.code
            offset = quant_state.offset
            state2 = quant_state.state2
            absmax2 = state2.absmax
            code2 = state2.code
            blocksize2 = state2.blocksize
        else:
            absmax, shape, dtype, blocksize, compressed_stats, quant_type, stats = (
                quant_state
            )
            offset, state2 = compressed_stats
            absmax2, code2, blocksize2, _, _, _, _ = state2
        global XPU_STREAMS
        device = W.device
        device_index = device.index
        XPU_STREAM = XPU_STREAMS[device_index]

        # assert(dtype == X.dtype)
        bout = shape[0]

        if out is None:
            out = torch_empty(
                (
                    1,
                    1,
                    bout,
                ),
                dtype = dtype,
                device = device,
            )
        # else:
        #     assert(out.shape == (1, 1, bout,))
        # pass

        if DEVICE_TYPE == "xpu":
            m = 1
            n = shape[0]
        else:
            n = 1
            m = shape[0]
        k = shape[1]
        lda = shape[0]
        ldc = shape[0]
        ldb = (hd + 1) // 2
        m = ctypes_c_int32(m)
        n = ctypes_c_int32(n)
        k = ctypes_c_int32(k)
        lda = ctypes_c_int32(lda)
        ldb = ctypes_c_int32(ldb)
        ldc = ctypes_c_int32(ldc)

        df = torch_empty(absmax.shape, dtype = torch_float32, device = device)
        with torch_gpu_device(device):
            cdequantize_blockwise_fp32(
                get_ptr(code2),
                get_ptr(absmax),
                get_ptr(absmax2),
                get_ptr(df),
                ctypes_c_int(blocksize2),
                ctypes_c_int(df.numel()),
                XPU_STREAM,
            )
            df += offset
            absmax = df

            fx = (
                cgemm_4bit_inference_naive_fp16
                if dtype == torch_float16
                else cgemm_4bit_inference_naive_bf16
            )

            blocksize = ctypes_c_int32(blocksize)
            fx(
                m,
                n,
                k,
                get_ptr(X),
                get_ptr(W),
                get_ptr(absmax),
                get_ptr(stats),
                get_ptr(out),
                lda,
                ldb,
                ldc,
                blocksize,
                XPU_STREAM,
            )

        return out

elif DEVICE_TYPE in ("cuda", "hip") and HAS_CUDA_STREAM:

    def fast_gemv(X, W, quant_state, out = None):
        if quant_state is None:
            return torch_matmul(X, W, out = out)
        # For fast X @ W where seq_len == 1
        # From https://github.com/TimDettmers/bitsandbytes/blob/main/bitsandbytes/functional.py#L1469
        _, q_len, hd = X.shape
        # assert(q_len == 1)

        if type(quant_state) is not list:
            # https://github.com/TimDettmers/bitsandbytes/pull/763/files
            absmax = quant_state.absmax
            shape = quant_state.shape
            dtype = quant_state.dtype
            blocksize = quant_state.blocksize
            stats = quant_state.code
            offset = quant_state.offset
            state2 = quant_state.state2
            absmax2 = state2.absmax
            code2 = state2.code
            blocksize2 = state2.blocksize
        else:
            absmax, shape, dtype, blocksize, compressed_stats, quant_type, stats = (
                quant_state
            )
            offset, state2 = compressed_stats
            absmax2, code2, blocksize2, _, _, _, _ = state2
        pass
        global CUDA_STREAMS
        device = W.device
        device_index = device.index
        CUDA_STREAM = CUDA_STREAMS[device_index]

        # assert(dtype == X.dtype)
        bout = shape[0]

        if out is None:
            out = torch_empty(
                (
                    1,
                    1,
                    bout,
                ),
                dtype = dtype,
                device = device,
            )
        # else:
        #     assert(out.shape == (1, 1, bout,))
        # pass

        n = 1
        m = shape[0]
        k = shape[1]
        lda = shape[0]
        ldc = shape[0]
        ldb = (hd + 1) // 2
        m = ctypes_c_int32(m)
        n = ctypes_c_int32(n)
        k = ctypes_c_int32(k)
        lda = ctypes_c_int32(lda)
        ldb = ctypes_c_int32(ldb)
        ldc = ctypes_c_int32(ldc)

        df = torch_empty(absmax.shape, dtype = torch_float32, device = device)
        with torch_gpu_device(device):
            cdequantize_blockwise_fp32(
                get_ptr(code2),
                get_ptr(absmax),
                get_ptr(absmax2),
                get_ptr(df),
                ctypes_c_int(blocksize2),
                ctypes_c_int(df.numel()),
                CUDA_STREAM,
            )
            df += offset
            absmax = df

            fx = (
                cgemm_4bit_inference_naive_fp16
                if dtype == torch_float16
                else cgemm_4bit_inference_naive_bf16
            )

            blocksize = ctypes_c_int32(blocksize)
            fx(
                m,
                n,
                k,
                get_ptr(X),
                get_ptr(W),
                get_ptr(absmax),
                get_ptr(stats),
                get_ptr(out),
                lda,
                ldb,
                ldc,
                blocksize,
                CUDA_STREAM,
            )
        pass

        return out

    pass
else:

    def fast_gemv(X, W, quant_state, out = None):
        if quant_state is None:
            return torch_matmul(X, W, out = out)
        # For fast X @ W where seq_len == 1
        # From https://github.com/TimDettmers/bitsandbytes/blob/main/bitsandbytes/functional.py#L1469
        _, q_len, hd = X.shape
        # assert(q_len == 1)

        if type(quant_state) is not list:
            # https://github.com/TimDettmers/bitsandbytes/pull/763/files
            absmax = quant_state.absmax
            shape = quant_state.shape
            dtype = quant_state.dtype
            blocksize = quant_state.blocksize
            stats = quant_state.code
            offset = quant_state.offset
            state2 = quant_state.state2
            absmax2 = state2.absmax
            code2 = state2.code
            blocksize2 = state2.blocksize
        else:
            absmax, shape, dtype, blocksize, compressed_stats, quant_type, stats = (
                quant_state
            )
            offset, state2 = compressed_stats
            absmax2, code2, blocksize2, _, _, _, _ = state2
        pass
        # assert(dtype == X.dtype)
        bout = shape[0]
        device = W.device

        if out is None:
            out = torch_empty(
                (
                    1,
                    1,
                    bout,
                ),
                dtype = dtype,
                device = device,
            )
        # else:
        #     assert(out.shape == (1, 1, bout,))
        # pass

        n = 1
        m = shape[0]
        k = shape[1]
        lda = shape[0]
        ldc = shape[0]
        ldb = (hd + 1) // 2
        m = ctypes_c_int32(m)
        n = ctypes_c_int32(n)
        k = ctypes_c_int32(k)
        lda = ctypes_c_int32(lda)
        ldb = ctypes_c_int32(ldb)
        ldc = ctypes_c_int32(ldc)

        df = torch_empty(absmax.shape, dtype = torch_float32, device = device)
        cdequantize_blockwise_fp32(
            get_ptr(code2),
            get_ptr(absmax),
            get_ptr(absmax2),
            get_ptr(df),
            ctypes_c_int(blocksize2),
            ctypes_c_int(df.numel()),
        )
        df += offset
        absmax = df

        fx = (
            cgemm_4bit_inference_naive_fp16
            if dtype == torch_float16
            else cgemm_4bit_inference_naive_bf16
        )

        blocksize = ctypes_c_int32(blocksize)
        fx(
            m,
            n,
            k,
            get_ptr(X),
            get_ptr(W),
            get_ptr(absmax),
            get_ptr(stats),
            get_ptr(out),
            lda,
            ldb,
            ldc,
            blocksize,
        )

        return out

    pass


def fast_linear_forward(proj, X, temp_lora = None, out = None):
    W, W_quant, lora_A, lora_B, lora_S, bias = get_lora_parameters_bias(proj)
    bsz, q_len, in_dim = X.shape
    if q_len != 1:
        return matmul_lora(X, W, W_quant, lora_A, lora_B, lora_S)

    if W_quant is None:
        out = torch_matmul(X, W.t(), out = out)
    elif W.dtype == torch.float8_e4m3fn:
        out = fp8_linear(X, W, W_quant, bias)
    elif bsz == 1 and q_len == 1:
        out = fast_gemv(X, W, W_quant, out = out)
    else:
        W = fast_dequantize(W.t(), W_quant, use_global_buffer = True)
        out = torch_matmul(X, W, out = out)

    # Add in LoRA weights
    if lora_A is not None:
        out_dim = out.shape[2]
        dtype = X.dtype

        if not hasattr(lora_A, "_fast_lora"):
            lora_A._fast_lora = lora_A.to(dtype)
            lora_B._fast_lora = lora_B.to(dtype)

        if bsz == 1:
            out = out.view(out_dim)
            temp_lora = torch_mv(lora_A._fast_lora, X.ravel(), out = temp_lora)
            out.addmv_(lora_B._fast_lora, temp_lora, alpha = lora_S)
        else:
            out = out.view(bsz, out_dim)
            temp_lora = torch_mm(
                X.view(bsz, in_dim), lora_A._fast_lora.t(), out = temp_lora
            )
            out.addmm_(temp_lora, lora_B._fast_lora.t(), alpha = lora_S)
        out = out.view(bsz, 1, out_dim)

    if bias is not None:
        out += bias

    return out


def matmul_lora(X, W, W_quant, A, B, s, out = None):
    dtype = X.dtype

    if X.dim() == 3:
        batch, seq_len, d = X.shape
        X = X.view(-1, X.shape[-1])
        reshape = True
    else:
        reshape = False

    if isinstance(W, Float8Tensor):
        assert W.ndim == 2
        if W.block_size[0] == W.shape[0] and W.block_size[1] == 1:
            # In the backward pass, rowwise scaled becomes colwise scaled after we
            # transpose the weight tensor. Use this case to detect backward.
            # TODO: would be simpler if we simply don't call `matmul_lora` in backward
            W = W.dequantize()
        else:
            W = W.contiguous()
        out = torch_matmul(X, W.t(), out = out)
    elif W.dtype == torch.float8_e4m3fn:
        out = fp8_linear(X, W, W_quant)
    else:
        W = fast_dequantize(W, W_quant, use_global_buffer = True)
        out = torch_matmul(X, W.t(), out = out)
    if W_quant is not None:
        del W

    if A is not None:
        # LoRA is enabled
        A, B = A.t(), B.t()
        XA = torch_matmul(X, A.to(dtype))
        out.addmm_(XA, B.to(dtype), alpha = s)
        # out += (X @ A.to(dtype)) @ (s * B.to(dtype))

    return out.view(batch, seq_len, -1) if reshape else out


================================================
FILE: unsloth/models/__init__.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .llama import FastLlamaModel
from .loader import FastLanguageModel, FastVisionModel, FastTextModel, FastModel
from .mistral import FastMistralModel
from .qwen2 import FastQwen2Model
from .qwen3 import FastQwen3Model
from .qwen3_moe import FastQwen3MoeModel
from .granite import FastGraniteModel
from .sentence_transformer import FastSentenceTransformer

try:
    from .falcon_h1 import FastFalconH1Model
except:
    # transformers_version < 4.53.0 does not have falcon_h1 so silently skip it for now
    pass
from .dpo import PatchDPOTrainer, PatchKTOTrainer
from ._utils import is_bfloat16_supported, is_vLLM_available, __version__
from .rl import PatchFastRL, vLLMSamplingParams


================================================
FILE: unsloth/models/_utils.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "2026.3.8"

__all__ = [
    "SUPPORTS_BFLOAT16",
    "is_bfloat16_supported",
    "is_vLLM_available",
    "prepare_model_for_kbit_training",
    "xformers",
    "xformers_attention",
    "xformers_version",
    "__version__",
    "importlib_version",
    "HAS_FLASH_ATTENTION",
    "HAS_FLASH_ATTENTION_SOFTCAPPING",
    "USE_MODELSCOPE",
    "platform_system",
    "resolve_hip_gpu_stats_name",
    "patch_tokenizer",
    "get_statistics",
    "Unsloth_Offloaded_Gradient_Checkpointer",
    "offload_to_disk",
    "offload_input_embeddings",
    "offload_output_embeddings",
    "unsloth_offloaded_gradient_checkpoint",
    "torch_compile_options",
    "patch_linear_scaling",
    "patch_llama_rope_scaling",
    "create_boolean_mask",
    "torch_amp_custom_fwd",
    "torch_amp_custom_bwd",
    # "accelerate_old_send_to_device",
    # "accelerate_new_send_to_device",
    "patch_gradient_accumulation_fix",
    "patch_compiling_bitsandbytes",
    "patch_regional_compilation",
    "patch_layernorm",
    "patch_torch_compile",
    "patch_model_and_tokenizer",
    "patch_unsloth_gradient_checkpointing",
    "unpatch_unsloth_gradient_checkpointing",
    "patch_gradient_checkpointing",
    "unpatch_gradient_checkpointing",
    "HAS_CUT_CROSS_ENTROPY",
    "EMPTY_LOGITS",
    "fused_linear_cross_entropy",
    "unsloth_fused_ce_loss",
    "patch_unsloth_smart_gradient_checkpointing",
    "unpatch_unsloth_smart_gradient_checkpointing",
    "apply_unsloth_gradient_checkpointing",
    "patch_compiled_autograd",
    "process_vision_info",
    "unsloth_compile_transformers",
    "prefer_flex_attn_if_supported",
    "patch_fast_lora",
    "validate_loftq_config",
    "RaiseUninitialized",
    "fast_inference_setup",
    "patch_peft_fast_inference",
    "error_out_no_vllm",
    "dequantize_module_weight",
    "patch_hf_quantizer",
    "verify_fp8_support_if_applicable",
    "_get_inference_mode_context_manager",
    "hf_login",
    "is_moe_model",
    "get_moe_target_parameters",
    "make_fast_generate_wrapper",
]

import torch
from typing import Union, Optional, List, Any, Callable, Tuple, Iterator
from platform import system as platform_system

platform_system = platform_system()
import numpy as np
import contextlib
import re
from dataclasses import dataclass, field
import functools
import textwrap
import logging
import warnings, subprocess, inspect, psutil, os, math
from unsloth_zoo.utils import Version, get_quant_type
from importlib.metadata import version as importlib_version
from ..device_type import (
    is_hip,
    get_device_type,
    DEVICE_TYPE,
    DEVICE_TYPE_TORCH,
    DEVICE_COUNT,
    ALLOW_PREQUANTIZED_MODELS,
)
from ..import_fixes import UNSLOTH_ENABLE_LOGGING
from unsloth_zoo.log import logger
from unsloth_zoo.tokenizer_utils import (
    patch_tokenizer as _patch_tokenizer,
)
from unsloth_zoo.rl_environments import (
    check_python_modules,
    create_locked_down_function,
    execute_with_time_limit,
    Benchmarker,
)
from unsloth_zoo.patching_utils import (
    patch_compiling_bitsandbytes,
    patch_layernorm,
    patch_torch_compile,
    patch_model_and_tokenizer,
    patch_compiled_autograd,
)
from unsloth_zoo.gradient_checkpointing import (
    Unsloth_Offloaded_Gradient_Checkpointer,
    unsloth_offloaded_gradient_checkpoint,
    patch_unsloth_gradient_checkpointing,
    unpatch_unsloth_gradient_checkpointing,
    Unsloth_Gradient_Checkpointer,
    unsloth_gradient_checkpoint,
    patch_gradient_checkpointing,
    unpatch_gradient_checkpointing,
    patch_unsloth_smart_gradient_checkpointing,
    unpatch_unsloth_smart_gradient_checkpointing,
)
from unsloth_zoo.loss_utils import (
    HAS_CUT_CROSS_ENTROPY,
    fused_linear_cross_entropy,
    _unsloth_get_batch_samples,
    unsloth_fused_ce_loss,
)
from unsloth_zoo.vision_utils import (
    process_vision_info,
)
from unsloth_zoo.compiler import (
    get_transformers_model_type,
    unsloth_compile_transformers as _unsloth_compile_transformers,
)
from unsloth_zoo.training_utils import (
    prepare_model_for_training,
)


def resolve_hip_gpu_stats_name(gpu_stats):
    name = str(getattr(gpu_stats, "name", "") or "").strip()
    name = re.sub(r"\s*\([^)]*\)\s*$", "", name).strip()
    normalized_name = name.lower().strip(". ")
    if normalized_name and normalized_name not in ("amd radeon graphics",):
        return name + ". "

    try:
        torch_name = str(torch.cuda.get_device_name(0) or "").strip()
        torch_name = re.sub(r"\s*\([^)]*\)\s*$", "", torch_name).strip()
    except Exception:
        torch_name = ""
    normalized_torch_name = torch_name.lower().strip(". ")
    if normalized_torch_name and normalized_torch_name not in ("amd radeon graphics",):
        return torch_name + ". "

    arch_name = ""
    for key in ("gcnArchName", "gcn_arch_name", "arch_name", "gfx_arch_name"):
        value = getattr(gpu_stats, key, None)
        if value is not None and str(value).strip():
            arch_name = str(value).strip()
            break

    if arch_name:
        arch_name = arch_name.strip()
        match = re.search(r"(gfx[0-9a-z]+)", arch_name, flags = re.I)
        if match:
            return f"AMD {match.group(1).lower()} GPU. "
    return "AMD GPU. "


from unsloth_zoo.temporary_patches import (
    TEMPORARY_PATCHES,
)


def apply_unsloth_gradient_checkpointing(
    use_gradient_checkpointing, max_seq_length, dtype
):
    """
    Apply gradient checkpointing with smart heuristics.

    For seq < 512, the overhead of gradient offloading in gc="unsloth" mode
    is not worth it. Benchmarks show standard gc is faster for small sequences.

    Args:
        use_gradient_checkpointing: "unsloth", True, False, or None
        max_seq_length: The maximum sequence length
        dtype: The model dtype for patching

    Returns:
        The effective use_gradient_checkpointing value (may change from "unsloth" to True)
    """
    if use_gradient_checkpointing == "unsloth":
        # Gradient offloading overhead is not worth it for small sequences.
        # Benchmarks show crossover point is around seq_len 384-512.
        # For seq < 512, standard gradient checkpointing is faster.
        if max_seq_length < 512:
            unpatch_unsloth_smart_gradient_checkpointing()
            return True
        else:
            patch_unsloth_smart_gradient_checkpointing(dtype = dtype)
            return "unsloth"
    elif use_gradient_checkpointing in (True, False):
        # User explicitly set True or False - unpatch any previous "unsloth" patching
        unpatch_unsloth_smart_gradient_checkpointing()
        return use_gradient_checkpointing
    return use_gradient_checkpointing


def prefer_flex_attn_if_supported(model_class, config):
    if os.environ.get("UNSLOTH_ENABLE_FLEX_ATTENTION", "1") == "0":
        return None
    try:
        from transformers.utils.import_utils import is_torch_flex_attn_available

        if not is_torch_flex_attn_available():
            return None
        if model_class is None or not getattr(
            model_class, "_supports_flex_attn", False
        ):
            return None
        # GPT-OSS, Mllama and Gemma3N use eager/sdpa attention during
        # inference since flex attention returns incorrect results or errors out.
        # GPT-OSS: left padding issues cause incorrect outputs.
        # Mllama: _update_causal_mask uses make_flex_block_causal_mask which
        # creates BlockMask with Q_LEN=KV_LEN=total_seq_len, but during
        # decode q_len=1, causing ValueError. Needs transformers update.
        # Gemma3N: timm vision wrappers (eg Gemma3nVisionConfig) do not
        # support flex_attention.
        # NemotronH: hybrid Mamba-2 + Transformer model that does not
        # support flex_attention (raises NotImplementedError from transformers).
        model_type = getattr(config, "model_type", "") if config else ""
        if model_type in ("gpt_oss", "mllama", "nemotron_h") or str(
            model_type
        ).startswith("gemma3n"):
            return None
        if config is not None:
            setattr(config, "_attn_implementation", "flex_attention")
            if hasattr(config, "attn_implementation"):
                setattr(config, "attn_implementation", "flex_attention")
        return "flex_attention"
    except Exception:
        return None


def _run_temporary_patches(phase):
    import inspect

    for temporary_patch in TEMPORARY_PATCHES:
        try:
            sig = inspect.signature(temporary_patch)
            if "phase" in sig.parameters:
                temporary_patch(phase = phase)
            else:
                temporary_patch()
        except (ValueError, TypeError):
            temporary_patch()


_run_temporary_patches("init")

# =============================================
# Disable some warnings which can get annoying
warnings.filterwarnings(action = "ignore", category = UserWarning, module = "torch")
warnings.filterwarnings(action = "ignore", category = FutureWarning, module = "torch")
warnings.filterwarnings(action = "ignore", category = UserWarning, module = "huggingface_hub")
warnings.filterwarnings(
    action = "ignore", category = FutureWarning, module = "huggingface_hub"
)
warnings.filterwarnings(action = "ignore", category = UserWarning, module = "trl")
warnings.filterwarnings(action = "ignore", category = FutureWarning, module = "trl")
warnings.filterwarnings(action = "ignore", category = FutureWarning, module = "xformers")
warnings.filterwarnings(action = "ignore", category = RuntimeWarning, module = "subprocess")
warnings.filterwarnings(action = "ignore", category = UserWarning, module = "transformers")
warnings.filterwarnings(action = "ignore", category = FutureWarning, module = "accelerate")
warnings.filterwarnings(
    action = "ignore", category = RuntimeWarning, module = "multiprocessing"
)
warnings.filterwarnings(action = "ignore", category = RuntimeWarning, module = "multiprocess")
warnings.filterwarnings(action = "ignore", category = UserWarning, module = "triton")
warnings.filterwarnings(action = "ignore", category = UserWarning, module = "bitsandbytes")

# Stop "Special tokens have been added in the vocabulary, ..."
logging.getLogger("transformers.tokenization_utils_base").setLevel(logging.CRITICAL + 1)

TORCHAO_MSG = "Error: torchao not found, please install with `pip install torchao`"


# Ignore logging messages
class HideLoggingMessage(logging.Filter):
    __slots__ = ("text",)

    def __init__(self, text):
        self.text = text

    def filter(self, x):
        return not (self.text in x.getMessage())


# Replace warning messages (analogous to HideLoggingMessage but for warnings.warn)
class ReplaceWarningMessage:
    """
    Intercepts warnings.warn calls and replaces matching messages with Unsloth branded ones.
    Uses a list of registered (match_text, replacement, category) rules checked in order.
    """

    _rules = []
    _original_showwarning = None
    _installed = False

    @classmethod
    def add_rule(cls, match_text, replacement, category = None):
        cls._rules.append((match_text, replacement, category))
        if not cls._installed:
            cls._install()

    @classmethod
    def _install(cls):
        cls._original_showwarning = warnings.showwarning
        cls._installed = True

        def _patched_showwarning(
            message, category, filename, lineno, file = None, line = None
        ):
            msg_str = str(message)
            for match_text, replacement, match_category in cls._rules:
                if match_text in msg_str and (
                    match_category is None or category is match_category
                ):
                    print(replacement)
                    return
            cls._original_showwarning(message, category, filename, lineno, file, line)

        warnings.showwarning = _patched_showwarning


# Stop vLLM messages
if not UNSLOTH_ENABLE_LOGGING:
    try:
        from vllm.worker.worker import logger as vllm_worker_logger

        vllm_worker_logger.addFilter(HideLoggingMessage("Sleep mode freed"))
        del vllm_worker_logger
    except:
        pass
    try:
        from vllm.v1.worker.gpu_worker import logger as vllm_gpu_worker_logger

        vllm_gpu_worker_logger.addFilter(HideLoggingMessage("Sleep mode freed"))
        del vllm_gpu_worker_logger
    except:
        pass
    try:
        from vllm.executor.executor_base import logger as vllm_executor_logger

        vllm_executor_logger.addFilter(HideLoggingMessage("to fall asleep"))
        vllm_executor_logger.addFilter(HideLoggingMessage("to wake up"))
        vllm_executor_logger.addFilter(HideLoggingMessage("Executor is not sleeping"))
        del vllm_executor_logger
    except:
        pass
    try:
        from vllm.v1.executor.abstract import logger as vllm_v1_executor_logger

        vllm_v1_executor_logger.addFilter(HideLoggingMessage("to fall asleep"))
        vllm_v1_executor_logger.addFilter(HideLoggingMessage("to wake up"))
        vllm_v1_executor_logger.addFilter(
            HideLoggingMessage("Executor is not sleeping")
        )
        del vllm_v1_executor_logger
    except:
        pass
    try:
        from vllm.core.block.prefix_caching_block import (
            logger as vllm_prefix_caching_logger,
        )

        vllm_prefix_caching_logger.addFilter(HideLoggingMessage("reset prefix cache"))
        del vllm_prefix_caching_logger
    except:
        pass
    try:
        from vllm.v1.core.block_pool import logger as vllm_block_pool_logger

        vllm_block_pool_logger.addFilter(HideLoggingMessage("reset prefix cache"))
        del vllm_block_pool_logger
    except:
        pass
    try:
        from vllm.lora.models import logger as vllm_lora_model_logger

        vllm_lora_model_logger.addFilter(
            HideLoggingMessage(
                "Regarding multimodal models, vLLM currently only supports adding"
            )
        )
        del vllm_lora_model_logger
    except:
        pass
    try:
        from vllm.attention.utils.fa_utils import (
            logger as vllm_attention_utils_fa_utils_logger,
        )

        vllm_attention_utils_fa_utils_logger.addFilter(
            HideLoggingMessage("Cannot use FA version")
        )
        del vllm_attention_utils_fa_utils_logger
    except:
        pass

# The speedups for torchdynamo mostly come with GPU Ampere or higher and which is not detected here.
from transformers.training_args import logger as transformers_training_args_logger

transformers_training_args_logger.addFilter(HideLoggingMessage("The speedups"))
# torch.distributed process group is initialized, but parallel_mode != ParallelMode.DISTRIBUTED.
transformers_training_args_logger.addFilter(HideLoggingMessage("torch.distributed"))
# average_tokens_across_devices is set to True but it is invalid when world size is1
transformers_training_args_logger.addFilter(
    HideLoggingMessage("average_tokens_across_devices")
)
del transformers_training_args_logger

# No label_names provided for model class
from transformers.trainer import logger as transformers_trainer_logger

transformers_trainer_logger.addFilter(HideLoggingMessage("No label_names"))

# The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config.
transformers_trainer_logger.addFilter(HideLoggingMessage("The tokenizer has new"))
del transformers_trainer_logger

# Using the default loss: `ForCausalLMLoss`.
try:
    from transformers.modeling_utils import logger as transformers_modeling_utils_logger

    transformers_modeling_utils_logger.addFilter(HideLoggingMessage("ForCausalLMLoss"))
    del transformers_modeling_utils_logger
except:
    pass

# The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.
try:
    from accelerate.utils.modeling import logger as accelerate_utils_modeling_logger

    accelerate_utils_modeling_logger.addFilter(
        HideLoggingMessage("The model weights are not tied")
    )
    del accelerate_utils_modeling_logger
except:
    pass

# Setting `pad_token_id` to `eos_token_id`
try:
    from transformers.generation.utils import (
        logger as transformers_generation_utils_logger,
    )

    transformers_generation_utils_logger.addFilter(
        HideLoggingMessage("Setting `pad_token_id` to `eos_token_id`")
    )
    # "You have set `compile_config`
    transformers_generation_utils_logger.addFilter(HideLoggingMessage("compile_config"))
    del transformers_generation_utils_logger
except:
    pass

# The following generation flags are not valid and may be ignored:
try:
    from transformers.generation.configuration_utils import (
        logger as configuration_logger,
    )

    configuration_logger.addFilter(HideLoggingMessage("following generation flags"))
    del configuration_logger
except:
    pass

# Gemma3 It is strongly recommended to train Gemma3 models with the `eager`
try:
    from transformers.models.gemma3.modeling_gemma3 import logger as gemma3_logger

    gemma3_logger.addFilter(HideLoggingMessage("strongly recommended"))
    del gemma3_logger
except:
    pass

# Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed.
try:
    from huggingface_hub.file_download import logger as hub_logger

    hub_logger.addFilter(HideLoggingMessage("hf_xet"))
    del hub_logger
except:
    pass

# MXFP4 quantization requires triton >= 3.4.0
try:
    from transformers.quantizers.quantizer_mxfp4 import logger as mxfp4_logger

    mxfp4_logger.addFilter(HideLoggingMessage("requires triton"))
    del mxfp4_logger
except:
    pass

# You passed `quantization_config` or equivalent parameters
try:
    warnings.filterwarnings(
        action = "ignore",
        message = r".*quantization_config.*",
        category = UserWarning,
        append = True,
    )
except:
    pass

# UserWarning: Logical operators 'and' and 'or' are deprecated for non-scalar tensors; please use '&' or '|' instead
# Will be fixed in torch 2.8.1 https://github.com/pytorch/pytorch/issues/158463
try:
    warnings.filterwarnings(
        action = "ignore",
        message = r".*Logical operators 'and' and 'or'.*",
        category = UserWarning,
        append = True,
    )
except:
    pass

# Using a slow image processor as `use_fast`
try:
    from transformers.processing_utils import logger as processing_utils_logger

    processing_utils_logger.addFilter(HideLoggingMessage("`use_fast`"))
    del processing_utils_logger
except:
    pass

# Using a slow image processor as `use_fast`
try:
    from transformers.models.auto.image_processing_auto import (
        logger as processing_utils_logger,
    )

    processing_utils_logger.addFilter(HideLoggingMessage("`use_fast`"))
    del processing_utils_logger
except:
    pass

# `use_cache=True` is incompatible with gradient checkpointing
try:
    from transformers.trainer import logger as trainer_logger

    trainer_logger.addFilter(HideLoggingMessage("`use_cache=True`"))
    del trainer_logger
except:
    pass

# `use_cache=True` is incompatible with gradient checkpointing
try:
    from transformers.utils.generic import logger as trainer_logger

    trainer_logger.addFilter(HideLoggingMessage("`use_cache=True`"))
    del trainer_logger
except:
    pass

# We detected that you are using `from_pretrained` with a meta device context manager or `torch.set_default_device('meta')
try:
    from transformers.modeling_utils import logger as modeling_utils_logger

    modeling_utils_logger.addFilter(HideLoggingMessage("anti-pattern"))
    del modeling_utils_logger
except:
    pass

# Errors out on
# Some weights of Gemma3nForConditionalGeneration were not initialized from the model checkpoint
from transformers.modeling_utils import logger as transformers_logger


class _RaiseUninitialized(logging.Handler):
    def __init__(self):
        super().__init__()

    def emit(self, record):
        record_lower = str(record).lower()
        if (
            ("some weights of" in record_lower)
            and ("score.weight" not in record_lower)
            and ("classifier.weight" not in record_lower)
            and ("cls.predictions" not in record_lower)
            and ("predictions.decoder" not in record_lower)
            and (os.environ.get("UNSLOTH_WARN_UNINITIALIZED", "1") == "1")
        ):
            raise Exception(
                f"Unsloth: Critical error since some weights are not initialized.\n"
                f"Please try updating Unsloth, transformers and timm via:\n"
                f"`pip install --upgrade --force-reinstall --no-cache-dir --no-deps unsloth unsloth_zoo transformers timm`\n"
                f"{str(record)}"
            )


class RaiseUninitialized:
    def __init__(self):
        self.error_handler = _RaiseUninitialized()
        transformers_logger.addHandler(self.error_handler)

    def remove(self):
        transformers_logger.removeHandler(self.error_handler)


try:
    from transformers.trainer import logger as transformers_trainer_logger

    transformers_trainer_logger.addFilter(
        HideLoggingMessage("The model is already on multiple devices.")
    )
except:
    pass

# Hide HF Hub unauthenticated request warnings
try:
    from huggingface_hub.utils._http import logger as hf_http_logger

    hf_http_logger.addFilter(
        HideLoggingMessage("You are sending unauthenticated requests")
    )
    del hf_http_logger
except:
    pass

# Replace PEFT target_parameters warning with Unsloth branded message for MoE models
ReplaceWarningMessage.add_rule(
    match_text = "target_parameters",
    replacement = (
        "Unsloth: PEFT set target_parameters but found no matching parameters.\n"
        "This is expected for MoE models - Unsloth handles MoE expert LoRA targeting separately."
    ),
    category = RuntimeWarning,
)

# Patch get_model_param_count to record correct 4bit / 8bit
from transformers.trainer_pt_utils import is_deepspeed_zero3_enabled


def extract_quant_model_param_count(model):
    """
    Calculate quant model param count based on difference in param class. Returns int for param count.
    """
    count: int = 0
    for name, p in model.named_parameters():
        if p.__class__.__name__ == "Params4bit":
            count += 2 * p.numel()
        else:
            count += p.numel()
    return count


def get_model_param_count(model, trainable_only = False):
    """
    Calculate model's total param count. If trainable_only is True then count only those requiring grads
    """
    if is_deepspeed_zero3_enabled():

        def numel(p):
            return p.ds_numel if hasattr(p, "ds_numel") else p.numel()
    else:

        def numel(p):
            return p.numel()

    s = sum(
        numel(p) for p in model.parameters() if not trainable_only or p.requires_grad
    )
    if (
        (not trainable_only)
        and hasattr(model, "config")
        and hasattr(model.config, "quantization_config")
    ):
        approx = extract_quant_model_param_count(model)
        if approx is not None:
            s = approx
    return s


import transformers.trainer_pt_utils

transformers.trainer_pt_utils.get_model_param_count = get_model_param_count
import transformers.trainer

transformers.trainer.get_model_param_count = get_model_param_count
# =============================================

# =============================================
# Edits all Config files to enable RoPE Scaling for all models


# Transformers had to update for Mistral Nemo 12b since Attention is (5120, 4096) now.
def patch_mistral_nemo_config(config):
    if "head_dim (" not in config:
        add_head_dim = (
            "If it is not specified, will default to `8`.\n"
            "        head_dim (`int`, *optional*, defaults to `hidden_size // num_attention_heads`):\n"
            "            The attention head dimension."
        )
        config = config.replace(
            "If it is not specified, will default to `8`.", add_head_dim
        )

        add_head_dim = "num_key_value_heads=8,\n        head_dim=None,"
        config = config.replace("num_key_value_heads=8,", add_head_dim)

        add_head_dim = "self.sliding_window = sliding_window\n        self.head_dim = head_dim or hidden_size // num_attention_heads\n"
        config = config.replace("self.sliding_window = sliding_window", add_head_dim)
    return config


try:
    # Some Config files use layer_type_validation
    # for eg Gemma-2, so we must import it to stop errors.
    from transformers.configuration_utils import layer_type_validation
except:
    pass

try:
    # Transformers 5.0+ uses RotaryEmbeddingConfigMixin as a base class for configs
    from transformers.modeling_rope_utils import RotaryEmbeddingConfigMixin
except:
    pass
from transformers import __version__ as transformers_version

try:
    from transformers import PreTrainedConfig
except:
    from transformers import PretrainedConfig

model_architectures = [
    "llama",
    "mistral",
    "gemma",
    "gemma2",
    "qwen2",
    "granite",
    "qwen3",
    "qwen3_moe",
    "falcon_h1",
]

for model_name in model_architectures:
    config_filepath = f"transformers.models.{model_name}.configuration_{model_name}"
    model_filepath = f"transformers.models.{model_name}.modeling_{model_name}"
    config_filename = f"{model_name.title().replace('_','')}Config"  # qwen3 arch folder is qwen3_moe but config is Qwen3Config. Need to remove underscore(_) for now
    try:
        exec(f"from {config_filepath} import {config_filename}", globals())
    except:
        continue

    try:
        config = inspect.getsource(eval(config_filename))
    except:
        continue
    if "RopeParameters" in config:
        try:
            exec(f"from {config_filepath} import RopeParameters", globals())
        except:
            continue

    if "rope_scaling" in config:
        continue
    config = re.sub(
        r"(\*\*kwargs)[\s]{0,}\,[\s]{0,}\)[\s]{0,}\:",
        r"rope_scaling=None,"
        r"\n        **kwargs):\n"
        r"\n        self.rope_scaling = rope_scaling\n",
        config,
    )

    # Just for Mistral Nemo
    if model_name == "mistral":
        if Version(transformers_version) <= Version("4.42.4"):
            config = patch_mistral_nemo_config(config)

    exec(config, globals())
    exec(f"import {config_filepath}", globals())
    exec(f"{config_filepath}.{config_filename} = {config_filename}", globals())
# =============================================

# =============================================
# torch.cuda.amp.custom_fwd is deprecated >= 2.4
torch_version = torch.__version__
if DEVICE_TYPE in ("cuda", "hip"):
    if Version(torch_version) < Version("2.4.0"):
        torch_amp_custom_fwd = torch.cuda.amp.custom_fwd
        torch_amp_custom_bwd = torch.cuda.amp.custom_bwd
    else:
        torch_amp_custom_fwd = torch.amp.custom_fwd(device_type = "cuda")
        torch_amp_custom_bwd = torch.amp.custom_bwd(device_type = "cuda")
elif DEVICE_TYPE == "xpu":
    if Version(torch_version) < Version("2.6.0"):
        raise RuntimeError("torch.xpu currently only supports torch.version >= 2.6.0")
    else:
        torch_amp_custom_fwd = torch.amp.custom_fwd(device_type = "xpu")
        torch_amp_custom_bwd = torch.amp.custom_bwd(device_type = "xpu")
# =============================================

# =============================================
# Fix KeyError: 'Cache only has 0 layers, attempted to access layer with index 0'
# import transformers.cache_utils
# if hasattr(transformers.cache_utils, "DynamicCache") and \
#     transformers.cache_utils.DynamicCache.__getitem__.__name__ != "__cache_utils_getitem__":

#     source = inspect.getsource(transformers.cache_utils.DynamicCache.__getitem__)
#     start = source.find("def")
#     spaces = start*" "
#     source = source.split("\n")
#     source = "\n".join(x[start:] for x in source)
#     where = source.find("raise KeyError")
#     source = source[:where] + \
#         f"if len(self) == 0:\n{spaces}{spaces}"\
#         "    raise RuntimeError('Unsloth: You must call `FastLanguageModel.for_inference(model)` before doing inference for Unsloth models.')\n" + \
#         f"{spaces}{spaces}else:\n{spaces}{spaces}{spaces}" + source[where:]
#     source = source.replace("__getitem__", "__cache_utils_getitem__", 1)
#     exec(source)
#     transformers.cache_utils.DynamicCache.__getitem__ = __cache_utils_getitem__
# pass
# =============================================

# =============================================
# Weird Databricks errors
from transformers.utils import is_openai_available

if is_openai_available():
    try:
        from openai import OpenAI
    except:
        print("Unsloth: OpenAI failed to import - ignoring for now.")
        import transformers.utils

        def _is_openai_available():
            return False

        transformers.utils.is_openai_available = _is_openai_available

# =============================================
# Get Flash Attention v2 if Ampere (RTX 30xx, A100)
import bitsandbytes as bnb

from transformers import AutoTokenizer
from transformers.utils.import_utils import _is_package_available

SUPPORTS_BFLOAT16 = False
HAS_FLASH_ATTENTION = False
HAS_FLASH_ATTENTION_SOFTCAPPING = False

if DEVICE_TYPE == "cuda":
    major_version, minor_version = torch.cuda.get_device_capability()
    torch.cuda.get_device_capability = functools.cache(torch.cuda.get_device_capability)

    if major_version >= 8:
        SUPPORTS_BFLOAT16 = True
        if _is_package_available("flash_attn"):
            # Check for CUDA linking errors "undefined symbol: _ZNK3c106SymIntltEl"
            try:
                try:
                    # See https://github.com/unslothai/unsloth/issues/1437
                    from flash_attn.flash_attn_interface import flash_attn_gpu
                except:
                    from flash_attn.flash_attn_interface import flash_attn_cuda
                HAS_FLASH_ATTENTION = True

                # Also check for softcapping
                from flash_attn import __version__ as flash_attn_version

                HAS_FLASH_ATTENTION_SOFTCAPPING = Version(
                    flash_attn_version
                ) >= Version("2.6.3")
                if not HAS_FLASH_ATTENTION_SOFTCAPPING:
                    print(
                        "Unsloth: If you want to finetune Gemma 2, upgrade flash-attn to version 2.6.3 or higher!\n"
                        "Newer versions support faster and less memory usage kernels for Gemma 2's attention softcapping!\n"
                        "To update flash-attn, do the below:\n"
                        '\npip install --no-deps --no-build-isolation --upgrade "flash-attn>=2.6.3"'
                    )
            except:
                print(
                    "Unsloth: Your Flash Attention 2 installation seems to be broken. "
                    "Using Xformers instead. No performance changes will be seen."
                )

                # Stop Flash Attention from importing!
                import transformers.utils.import_utils

                transformers.utils.import_utils.is_flash_attn_2_available = (
                    lambda *args, **kwargs: False
                )
                import transformers.utils

                transformers.utils.is_flash_attn_2_available = (
                    lambda *args, **kwargs: False
                )

                HAS_FLASH_ATTENTION = False
        else:
            HAS_FLASH_ATTENTION = False
    else:
        # Tri Dao's benchmark shows xformers is faster for now.
        HAS_FLASH_ATTENTION = False
elif DEVICE_TYPE == "hip":
    SUPPORTS_BFLOAT16 = True
    if _is_package_available("flash_attn"):
        # Check for CUDA linking errors "undefined symbol: _ZNK3c106SymIntltEl"
        try:
            try:
                # See https://github.com/unslothai/unsloth/issues/1437
                from flash_attn.flash_attn_interface import flash_attn_gpu
            except:
                from flash_attn.flash_attn_interface import flash_attn_cuda
            HAS_FLASH_ATTENTION = True

            # Also check for softcapping
            from flash_attn import __version__ as flash_attn_version

            HAS_FLASH_ATTENTION_SOFTCAPPING = Version(flash_attn_version) >= Version(
                "2.6.3"
            )
            if not HAS_FLASH_ATTENTION_SOFTCAPPING:
                print(
                    "Unsloth: If you want to finetune Gemma 2, upgrade flash-attn to version 2.6.3 or higher!\n"
                    "Newer versions support faster and less memory usage kernels for Gemma 2's attention softcapping!\n"
                    "To update flash-attn, do the below:\n"
                    '\npip install --no-deps --no-build-isolation --upgrade "flash-attn>=2.6.3"'
                )
        except:
            print(
                "Unsloth: Your Flash Attention 2 installation seems to be broken. "
                "Using Xformers instead. No performance changes will be seen."
            )

            # Stop Flash Attention from importing!
            import transformers.utils.import_utils

            transformers.utils.import_utils.is_flash_attn_2_available = (
                lambda *args, **kwargs: False
            )
            import transformers.utils

            transformers.utils.is_flash_attn_2_available = lambda *args, **kwargs: False

            HAS_FLASH_ATTENTION = False
elif DEVICE_TYPE == "xpu":
    SUPPORTS_BFLOAT16 = True

# =============================================
# Get Xformers
# Silence xformers CUDA mismatch warnings before import
try:
    _xformers_logger = logging.getLogger("xformers")
    _xformers_logger.setLevel(logging.ERROR)
    del _xformers_logger
except:
    pass
try:
    from xformers import __version__ as xformers_version

    # Xformers <= 0.0.32.post2 has a broken FA3 dispatch on Blackwell/RTX 50x GPUs.
    # The FA3 check used `capability >= (9, 0)` which matches SM 10.0/11.0/12.0,
    # causing sm_90a kernels to be attempted on non-Hopper GPUs (CUDA error in
    # flash_fwd_launch_template.h:188). Fixed in 0.0.33 with `<= (9, 0)`.
    # See https://github.com/facebookresearch/xformers/issues/1329
    if DEVICE_TYPE == "cuda":
        major_version, minor_version = torch.cuda.get_device_capability()
        if (f"{major_version}.{minor_version}" in ("10.0", "11.0", "12.0")) and (
            Version(xformers_version) <= Version("0.0.32.post2")
        ):
            raise NotImplementedError(
                f"Unsloth: Xformers {xformers_version} has a broken FA3 dispatch on "
                f"SM {major_version}.{minor_version} GPUs. Please upgrade to >= 0.0.33 or build from source via\n"
                "```\n"
                "pip install ninja\n"
                "pip install -v --no-build-isolation -U git+https://github.com/facebookresearch/xformers.git@main#egg=xformers\n"
                "```\n"
            )

    # Temporarily disable 0.0.27 and higher - inference issues
    if False:  # Version(xformers_version) >= Version("0.0.27"):
        raise ImportError(
            "Unsloth: If you are in Colab, we updated the top cell install instructions - please change it to below "
            "then press Disconnect Runtime and then Restart it.\n"
            "\n"
            "%%capture\n"
            "# Installs Unsloth, Xformers (Flash Attention) and all other packages!\n"
            '!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"\n'
            '!pip install --no-deps "xformers<=0.0.27" trl peft accelerate bitsandbytes\n'
            "\n"
            f"Otherwise in local machines, your xformers version of {xformers_version} is too new.\n"
            'Please downgrade xformers via `pip install --force-reinstall "xformers<=0.0.27"'
        )

    if Version(torch_version) < Version("2.2.0") and Version(
        xformers_version
    ) >= Version("0.0.24"):
        raise ImportError(
            f"Unsloth: You have torch = {torch_version} but xformers = {xformers_version}.\n"
            f"Please install xformers < 0.0.24 for torch = {torch_version}."
        )
    elif Version(torch_version) < Version("2.3.0") and Version(
        xformers_version
    ) >= Version("0.0.26"):
        raise ImportError(
            f"Unsloth: You have torch = {torch_version} but xformers = {xformers_version}.\n"
            f"Please install xformers < 0.0.26 for torch = {torch_version}."
        )
    elif Version(torch_version) < Version("2.4.0") and Version(
        xformers_version
    ) > Version("0.0.27"):
        raise ImportError(
            f"Unsloth: You have torch = {torch_version} but xformers = {xformers_version}.\n"
            f"Please install xformers <= 0.0.27 for torch = {torch_version}."
        )

    from xformers._cpp_lib import _register_extensions

    try:
        _register_extensions()  # Check if C++ modules are loaded correctly
    except Exception as error:
        raise ImportError(
            "Unsloth: Xformers was not installed correctly.\n"
            "Please install xformers separately first.\n"
            "Then confirm if it's correctly installed by running:\n"
            "python -m xformers.info\n\n"
            "Longer error message:\n" + str(error)
        )
    import xformers.ops.fmha as xformers

    xformers_attention = xformers.memory_efficient_attention
except ModuleNotFoundError:
    xformers = None
    xformers_attention = None
    xformers_version = None
except Exception as e:
    if UNSLOTH_ENABLE_LOGGING:
        print(
            "========\nSwitching to PyTorch attention since your Xformers is broken.\n========\n"
        )
        print(str(e))
    xformers = None
    xformers_attention = None
    xformers_version = None

# Check TRL version
from trl import __version__ as trl_version

# Unsloth now supports all TRL versions!
if False:  # Version(trl_version) >= Version("0.9.0"):
    raise ImportError(
        "Unsloth: If you are in Colab, we updated the top cell install instructions - please change it to below "
        "then press Disconnect Runtime and then Restart it.\n"
        "\n"
        "%%capture\n"
        "# Installs Unsloth, Xformers (Flash Attention) and all other packages!\n"
        '!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"\n'
        '!pip install --no-deps "xformers<=0.0.27" trl peft accelerate bitsandbytes\n'
        "\n"
        f"Otherwise in local machines, your TRL version of {trl_version} is too new.\n"
        "Please downgrade TRL via `pip install --force-reinstall trl"
    )

# =============================================
# Fix new Xformers versions TypeError: Multiple dispatch failed for 'torch._ops.aten.to.dtype_layout'
# accelerate_old_send_to_device = None
# accelerate_new_send_to_device = None
# if xformers_version is not None and Version(xformers_version) >= Version("0.0.27"):
#     import accelerate.utils.operations
#     if hasattr(accelerate.utils.operations, "send_to_device") and \
#         accelerate.utils.operations.send_to_device.__name__ != "_fixed_send_to_device":
#         accelerate_old_send_to_device = accelerate.utils.operations.send_to_device
#         from accelerate.utils.operations import *
#         send_to_device = inspect.getsource(accelerate.utils.operations.send_to_device)
#         send_to_device = re.sub(
#             r"([ ]{4,})return tensor\.to\(device\)",
#             r"\1try: return tensor.to(device)\n\1except: return tensor",
#             send_to_device,
#         ).replace("def send_to_device", "def _fixed_send_to_device")
#         exec(send_to_device)
#         # accelerate.utils.operations.send_to_device = _fixed_send_to_device
#         accelerate_new_send_to_device = _fixed_send_to_device
#     pass
# pass

# Transformers 4.46 breaks dynamic caching. This is a hack
import transformers.generation.configuration_utils

if hasattr(transformers.generation.configuration_utils, "ALL_CACHE_IMPLEMENTATIONS"):
    if (
        type(transformers.generation.configuration_utils.ALL_CACHE_IMPLEMENTATIONS)
        is list
    ):
        if (
            "dynamic"
            not in transformers.generation.configuration_utils.ALL_CACHE_IMPLEMENTATIONS
        ):
            transformers.generation.configuration_utils.ALL_CACHE_IMPLEMENTATIONS.append(
                "dynamic"
            )
# =============================================

# =============================================
# Torch compile settings
UNSLOTH_COMPILE_DEBUG = os.environ.get("UNSLOTH_COMPILE_DEBUG", "0") == "1"
UNSLOTH_COMPILE_MAXIMUM = os.environ.get("UNSLOTH_COMPILE_MAXIMUM", "0") == "1"
UNSLOTH_COMPILE_IGNORE_ERRORS = (
    os.environ.get("UNSLOTH_COMPILE_IGNORE_ERRORS", "1") == "1"
)
# Just remove max_autotune_gemm warning
from torch._inductor.runtime.hints import DeviceProperties


@functools.lru_cache(None)
def is_big_gpu(index) -> bool:
    if DEVICE_TYPE == "xpu":
        prop = DeviceProperties.create(
            torch.device("xpu", index) if type(index) is int else index
        )
        min_sms = 16
    else:
        prop = DeviceProperties.create(
            torch.device("cuda", index) if type(index) is int else index
        )
        min_sms = 80

    avail_sms = prop.multi_processor_count
    if avail_sms < min_sms:
        return False
    return True


import torch._inductor.utils

torch._inductor.utils.is_big_gpu = is_big_gpu
patch_torch_compile(
    debug = UNSLOTH_COMPILE_DEBUG,
    O3 = UNSLOTH_COMPILE_MAXIMUM,
    ignore_errors = UNSLOTH_COMPILE_IGNORE_ERRORS,
)

torch_compile_options = {
    "epilogue_fusion": True,
    "max_autotune": True,
    "shape_padding": True,
    "trace.enabled": UNSLOTH_COMPILE_DEBUG,
    "triton.cudagraphs": False,
}

import accelerate


def torch_compile_kwargs(*args, **kwargs):
    print("Unsloth: Enabled auto compiling")
    return {
        "dynamic": True,
        "fullgraph": False,
        "options": torch_compile_options,
    }


accelerate.utils.dataclasses.TorchDynamoPlugin.to_kwargs = torch_compile_kwargs
accelerate.utils.TorchDynamoPlugin.to_kwargs = torch_compile_kwargs
accelerate.accelerator.TorchDynamoPlugin.to_kwargs = torch_compile_kwargs
del accelerate


def patch_regional_compilation():
    # Regional torch 2.5 Recompilation - weirdly very slow??
    if torch.nn.ModuleList.__name__ == "UnslothModuleList":
        return
    # Only works for torch 2.5
    if Version(torch.__version__) < Version("2.5.0"):
        return

    old_module_list = torch.nn.ModuleList
    os.environ["UNSLOTH_PATCHED"] = "1"

    def UnslothModuleList(*args, **kwargs):
        if len(args) == 1 and len(kwargs) == 0 and type(args[0]) is list:
            args = [
                old_module_list(
                    [
                        torch.compile(
                            x,
                            dynamic = True,
                            options = torch_compile_options,
                            fullgraph = False,
                        )
                        for x in args[0]
                    ]
                )
            ]
        return old_module_list(*args, **kwargs)

    UnslothModuleList.__doc__ = old_module_list.__doc__

    torch.nn.ModuleList = UnslothModuleList
    return


# =============================================


def prepare_model_for_kbit_training(
    model: Any,
    use_gradient_checkpointing: Optional = True,
    use_reentrant: Optional[bool] = True,
) -> Any:
    return prepare_model_for_training(
        model = model,
        use_gradient_checkpointing = use_gradient_checkpointing,
        use_reentrant = use_reentrant,
        full_finetuning = False,
        train_layernorms = False,
        train_embedding = False,
        train_lm_head = False,
        float32_mixed_precision = True,
    )


# =============================================
# Weirdly LoraLayer.update_layer downcasts PEFT layers to float16??
# For mixed precision, we need it to be in float32 not float16.
from peft import __version__ as peft_version
from peft.utils.integrations import dequantize_module_weight

if Version(peft_version) < Version("0.12.0"):
    from peft.tuners.lora.layer import LoraLayer

    try:
        source = inspect.getsource(LoraLayer.update_layer)
        text = "if weight is not None:\n"
        start = source.find(text) + len(text)
        end = source.find("self.to(weight.device)", start)
        spaces = re.findall(r"^([ ]{1,})break", source, flags = re.MULTILINE)[0]
        source = source.replace(source[start:end], spaces)
        spaces = len(re.match(r"[\s]{1,}", source).group(0))
        lines = source.split("\n")
        source = "\n".join(x[spaces:] for x in lines)
        source = re.sub(r"([^\.])nn\.", r"\1torch.nn.", source)
        source = source.replace("def update_layer", "def LoraLayer_update_layer")
        exec(source, globals())

        # Fix up incorrect downcasting of LoRA weights
        from peft.tuners.lora.layer import LoraLayer

        LoraLayer.update_layer = LoraLayer_update_layer
        from peft.tuners.lora import LoraLayer

        LoraLayer.update_layer = LoraLayer_update_layer
    except:
        logger.warning_once(
            "Unsloth unsuccessfully patched LoraLayer.update_layer. Please file a bug report.\n"
            "Luckily, your training run will still work in the meantime!"
        )

# =============================================
import importlib

global USE_MODELSCOPE
USE_MODELSCOPE = os.environ.get("UNSLOTH_USE_MODELSCOPE", "0") == "1"
if USE_MODELSCOPE:
    if importlib.util.find_spec("modelscope") is None:
        raise ImportError(
            f"You are using the modelscope hub, please install modelscope by `pip install modelscope -U`"
        )

import socket


@functools.lru_cache(1)
def has_internet(host = "8.8.8.8", port = 53, timeout = 3):
    if os.environ.get("TRANSFORMERS_OFFLINE", "0") == "1":
        return False
    try:
        socket.setdefaulttimeout(timeout)
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        try:
            sock.connect((host, port))
            return True
        finally:
            sock.close()
    except socket.error as ex:
        return False


import psutil


def _get_statistics(statistics = None, force_download = True):
    # We log some basic stats about which environment is being used.
    # We simply download a README.md file from HF - all data is made public.
    # This is simply so we can check if some envs are broken or not.
    # You can disable this by commenting the below out
    n_cpus = psutil.cpu_count(logical = False)
    keynames = "\n" + "\n".join(os.environ.keys())
    # Check modelscope for down detection
    global USE_MODELSCOPE
    USE_MODELSCOPE = os.environ.get("UNSLOTH_USE_MODELSCOPE", "0") == "1"

    if statistics is None:
        # Prefer filesystem markers (harder to misidentify) before env-key matching
        try:
            from pathlib import Path

            if Path("/kaggle/working").exists():
                statistics = "kaggle"
            elif Path("/content").exists() and Path("/opt/colab").exists():
                statistics = "colab" if n_cpus == 1 else "colabpro"
            elif Path("/runpod-volume").exists():
                statistics = "runpod"
        except Exception:
            pass

        # Fallback to env-key detection
        if statistics is None:
            if "\nKAGGLE_" in keynames:
                statistics = "kaggle"
            elif "\nCOLAB_" in keynames and n_cpus == 1:
                statistics = "colab"
            elif "\nCOLAB_" in keynames:
                statistics = "colabpro"
            elif "\nRUNPOD_" in keynames:
                statistics = "runpod"
            elif "\nAWS_" in keynames:
                statistics = "aws"
            elif "\nAZURE_" in keynames:
                statistics = "azure"
            # elif "\nK_" in keynames or "\nFUNCTION_" in keynames: statistics = "gcp"
            elif "\nINVOCATION_ID" in keynames:
                statistics = "lambda"
            # else: statistics = "other"
            else:

                def try_vllm_check():
                    vendor_files = (
                        "/sys/class/dmi/id/product_version",
                        "/sys/class/dmi/id/bios_vendor",
                        "/sys/class/dmi/id/product_name",
                        "/sys/class/dmi/id/chassis_asset_tag",
                        "/sys/class/dmi/id/sys_vendor",
                    )

                    for vendor_file in vendor_files:
                        path = Path(vendor_file)
                        if path.is_file():
                            file_content = path.read_text().lower()
                            if "amazon" in file_content:
                                return "aws"
                            elif "microsoft corporation" in file_content:
                                return "azure"
                            elif "google" in file_content:
                                return "gcp"
                    return "other"

                try:
                    statistics = try_vllm_check()
                except Exception:
                    statistics = "other"

    if statistics is not None:
        import tempfile
        from huggingface_hub import snapshot_download
        from unsloth_zoo.rl_environments import execute_with_time_limit

        if has_internet():

            def stats_check():
                with tempfile.TemporaryDirectory(ignore_cleanup_errors = True) as f:
                    snapshot_download(
                        f"unslothai/{statistics}",
                        force_download = True,
                        cache_dir = f,
                        local_dir = f,
                    )

            time_limited_stats_check = execute_with_time_limit(120)(stats_check)
            try:
                time_limited_stats_check()
            except TimeoutError:
                raise TimeoutError(
                    "Unsloth: HuggingFace seems to be down after trying for 120 seconds :(\n"
                    "Check https://status.huggingface.co/ for more details.\n"
                    "As a temporary measure, use modelscope with the same model name ie:\n"
                    "```\n"
                    "pip install modelscope\n"
                    "import os; os.environ['UNSLOTH_USE_MODELSCOPE'] = '1'\n"
                    "from unsloth import FastLanguageModel\n"
                    "model = FastLanguageModel.from_pretrained('unsloth/gpt-oss-20b')\n"
                    "```"
                )
            except Exception:
                # Try no time limit check
                stats_check()


def get_statistics(local_files_only = False):
    # We log some basic stats about which environment is being used.
    # This is also to check if HuggingFace is down or not!
    # We simply download a README.md file from HF - all data is made public.
    # This is simply so we can check if some envs are broken or not.
    # You can disable this by setting UNSLOTH_DISABLE_STATISTICS
    import os

    if (
        "UNSLOTH_DISABLE_STATISTICS" in os.environ
        or os.environ.get("UNSLOTH_USE_MODELSCOPE", "0") == "1"
    ):
        return
    if local_files_only:
        return
    from huggingface_hub.utils import (
        disable_progress_bars,
        enable_progress_bars,
        are_progress_bars_disabled,
    )

    disabled = False
    if not are_progress_bars_disabled():
        disable_progress_bars()
        disabled = True
    _get_statistics(None)
    _get_statistics("repeat", force_download = False)
    total_memory = (
        torch.xpu.get_device_properties(0).total_memory
        if DEVICE_TYPE == "xpu"
        else torch.cuda.get_device_properties(0).total_memory
    )
    vram = total_memory / 1024 / 1024 / 1024
    if vram <= 8:
        vram = 8
    elif vram <= 16:
        vram = 16
    elif vram <= 20:
        vram = 20
    elif vram <= 24:
        vram = 24
    elif vram <= 40:
        vram = 40
    elif vram <= 48:
        vram = 48
    elif vram <= 80:
        vram = 80
    else:
        vram = 96
    _get_statistics(f"vram-{vram}")
    _get_statistics(f"{DEVICE_COUNT if DEVICE_COUNT <= 8 else 9}")
    if disabled:
        enable_progress_bars()


# =============================================
# Fixes Bitsandbytes to remove missing warnings
from transformers.utils.quantization_config import (
    BitsAndBytesConfig,
    QuantizationMethod,
)

BitsAndBytesConfig__init__ = inspect.getsource(BitsAndBytesConfig.__init__)
BitsAndBytesConfig__init__ = re.sub(
    r"if[\s]{1,}kwargs\:[\s]{1,}.+?\n",
    "",
    BitsAndBytesConfig__init__,
    flags = re.MULTILINE,
)
BitsAndBytesConfig__init__ = BitsAndBytesConfig__init__.split("\n")
length_spaces = len(re.match(r"[\s]{1,}", BitsAndBytesConfig__init__[0]).group(0))
BitsAndBytesConfig__init__ = "\n".join(
    x[length_spaces:] for x in BitsAndBytesConfig__init__
)
BitsAndBytesConfig__init__ = BitsAndBytesConfig__init__.replace(
    "__init__",
    "_BitsAndBytesConfig__init__",
)
exec(BitsAndBytesConfig__init__, globals())

if DEVICE_COUNT == 1 and int(os.environ.get("WORLD_SIZE", "1")) <= 1:
    from accelerate.utils.dataclasses import DistributedType

    def _prepare_backend(self, *args, **kwargs):
        return None, DistributedType.NO

    import accelerate.state

    accelerate.state.PartialState._prepare_backend = _prepare_backend
    accelerate.accelerator.Accelerator.distributed_type = (
        lambda *args, **kwargs: DistributedType.NO
    )


# to move multiple tensors to the same device
def move_to_device(target_device, *tensors):
    """
    Move multiple tensors to target device if they're not already there.

    Args:
        target_device: The target device to move tensors to
        *tensors: Variable number of tensors to potentially move

    Returns:
        tuple: The tensors on the target device (same objects if already on device, new if moved)
    """
    if isinstance(target_device, int):
        target_device = torch.device(target_device)
    elif isinstance(target_device, str):
        # if string we expect it to be a device name like "cuda:0"
        target_device = torch.device(target_device)
    elif isinstance(target_device, torch.device):
        pass
    else:
        raise ValueError(f"Invalid target device: {target_device}")
    moved_tensors = []
    for tensor in tensors:
        if tensor.device != target_device:
            moved_tensors.append(tensor.to(target_device))
        else:
            moved_tensors.append(tensor)
    return tuple(moved_tensors) if len(moved_tensors) > 1 else moved_tensors[0]


import transformers.utils.quantization_config

transformers.utils.quantization_config.BitsAndBytesConfig.__init__ = (
    _BitsAndBytesConfig__init__
)
# =============================================

# Offloading to disk for modules (lm_head, embed_tokens)
import pickle


def offload_to_disk(
    W, model, name, temporary_location: str = "_unsloth_temporary_saved_buffers"
):
    file_location = os.path.join(temporary_location, model.config._name_or_path)
    if not os.path.exists(file_location):
        os.makedirs(file_location)

    filename = os.path.join(file_location, f"{name}.pt")
    W = W.weight if hasattr(W, "weight") else W
    torch.save(
        W,
        filename,
        pickle_module = pickle,
        pickle_protocol = pickle.HIGHEST_PROTOCOL,
    )
    # We must use weights_only = False due to pickling
    offloaded_W = torch.load(
        filename, map_location = "cpu", mmap = True, weights_only = False
    )
    offloaded_W._offloaded_file_location = filename
    return offloaded_W


def offload_input_embeddings(
    model, temporary_location: str = "_unsloth_temporary_saved_buffers"
):
    offloaded_W = offload_to_disk(
        model.get_input_embeddings(), model, "input_embeddings", temporary_location
    )
    new_input_embeddings = torch.nn.Embedding.from_pretrained(offloaded_W)
    new_input_embeddings._offloaded_file_location = offloaded_W._offloaded_file_location
    model.set_input_embeddings(new_input_embeddings)
    return


def offload_output_embeddings(
    model, temporary_location: str = "_unsloth_temporary_saved_buffers"
):
    offloaded_W = offload_to_disk(
        model.get_output_embeddings(), model, "output_embeddings", temporary_location
    )

    new_output_embeddings = torch.nn.Linear(1, 1, bias = None)
    del new_output_embeddings.weight
    new_output_embeddings.weight = offloaded_W
    new_output_embeddings.in_features = offloaded_W.shape[1]
    new_output_embeddings.out_features = offloaded_W.shape[0]

    new_output_embeddings._offloaded_file_location = (
        offloaded_W._offloaded_file_location
    )
    model.set_output_embeddings(new_output_embeddings)
    return


# Fixes a weird Torch 2.3 bug which says T4s have bfloat16
def is_bfloat16_supported():
    return SUPPORTS_BFLOAT16


def is_vLLM_available():
    return _is_package_available("vllm")


# Patches models to add RoPE Scaling
def patch_linear_scaling(
    model_name = "gemma2",
    rope_module = None,
    scaled_rope_module = None,
    attention_module = None,
):
    assert rope_module is not None and scaled_rope_module is not None
    assert attention_module is not None

    rope_name = rope_module.__name__
    scaled_rope_name = scaled_rope_module.__name__
    model_filepath = f"transformers.models.{model_name}.modeling_{model_name}"
    exec_code = (
        f"import torch.nn as nn\n"
        f"from typing import Union, Optional, List, Any, Callable, Tuple\n"
        f"from {model_filepath} import logger, "
        f"{model_name.title()}Attention, {model_name.title()}Config"
    )

    try:
        function = inspect.getsource(attention_module.__init__)
    except:
        # Most likely already patched!
        return None, None
    where = function.find("def")
    function = function.split("\n")
    function = "\n".join(x[where:] for x in function)
    init_name = f"{model_name.title()}Attention__init__"
    function = function.replace("def __init__", f"def {init_name}")
    function = function.replace(
        "super().__init__()",
        f"super({model_name.title()}Attention, self).__init__()",
    )
    fix_rope_function = """
    if getattr(self.config, "rope_scaling", None) is None:
        self.rotary_emb = {rope_function}(
            dim = self.head_dim,
            max_position_embeddings=self.max_position_embeddings,
            base=self.rope_theta,
        )
    else:
        scaling_type = self.config.rope_scaling["type"]
        scaling_factor = self.config.rope_scaling["factor"]
        if scaling_type == "linear":
            self.rotary_emb = {scaled_rope_function}(
                dim = self.head_dim,
                max_position_embeddings=self.max_position_embeddings,
                scaling_factor=scaling_factor,
                base=self.rope_theta,
            )
        else:
            raise ValueError(f"Unknown RoPE scaling type {{scaling_type}}")
    pass
    """
    fix_rope_function = fix_rope_function.format(
        rope_function = rope_module.__name__,
        scaled_rope_function = scaled_rope_module.__name__,
    )
    rotary_emb = re.findall(
        r"self\.rotary\_emb \= .+?\)",
        function,
        flags = re.DOTALL | re.MULTILINE,
    )
    if len(rotary_emb) == 0:
        return None, exec_code + "\n\n" + function

    rotary_emb = rotary_emb[0]
    function = function.replace(rotary_emb, fix_rope_function, 1)
    function = exec_code + "\n\n" + function
    return init_name, function


# Patches for Llama-3 LlamaExtendedRotaryEmbedding
def patch_llama_rope_scaling(
    model_name = "llama",
    rope_module = None,
    scaled_rope_module = None,
    extended_rope_module = None,
    attention_module = None,
    longrope_module = None,
):
    assert (
        rope_module is not None
        and scaled_rope_module is not None
        and extended_rope_module is not None
    )
    assert attention_module is not None

    rope_name = rope_module.__name__
    scaled_rope_name = scaled_rope_module.__name__
    model_filepath = f"transformers.models.{model_name}.modeling_{model_name}"
    exec_code = (
        f"import torch.nn as nn\n"
        f"from typing import Union, Optional, List, Any, Callable, Tuple\n"
        f"from {model_filepath} import logger, "
        f"{model_name.title()}Attention, {model_name.title()}Config"
    )

    try:
        function = inspect.getsource(attention_module.__init__)
    except:
        # Most likely already patched!
        return None, None
    where = function.find("def")
    function = function.split("\n")
    function = "\n".join(x[where:] for x in function)
    init_name = f"{model_name.title()}Attention__init__"
    function = function.replace("def __init__", f"def {init_name}")
    function = function.replace(
        "super().__init__()",
        f"super({model_name.title()}Attention, self).__init__()",
    )
    fix_rope_function = """
    if getattr(self.config, "rope_scaling", None) is None:
        self.rotary_emb = {rope_function}(
            dim = self.head_dim,
            max_position_embeddings=self.max_position_embeddings,
            base=self.rope_theta,
        )
    else:
        scaling_type1 = self.config.rope_scaling.get("type", None)
        scaling_type2 = self.config.rope_scaling.get("rope_type", None)
        scaling_type = scaling_type1 if scaling_type1 is not None else scaling_type2
        scaling_factor = self.config.rope_scaling.get("factor")

        if scaling_type == "linear":
            self.rotary_emb = {scaled_rope_function}(
                dim = self.head_dim,
                max_position_embeddings=self.max_position_embeddings,
                scaling_factor=scaling_factor,
                base=self.rope_theta,
            )
        elif scaling_type == "llama3":
            self.rotary_emb = {extended_rope_function}(
                dim = self.head_dim,
                max_position_embeddings=self.max_position_embeddings,
                base=self.rope_theta,
            )
        elif scaling_type == "longrope":
            self.rotary_emb = {longrope_rope_function}(
                dim = self.head_dim,
                max_position_embeddings = self.max_position_embeddings,
                original_max_position_embeddings = self.config.original_max_position_embeddings,
                base = self.rope_theta,
                short_factor = self.config.rope_scaling['short_factor'],
                long_factor  = self.config.rope_scaling['long_factor' ],
            )
        else:
            raise ValueError(f"Unknown RoPE scaling type {{scaling_type}}")
    pass
    """

    fix_rope_function = fix_rope_function.format(
        rope_function = rope_module.__name__,
        scaled_rope_function = scaled_rope_module.__name__,
        extended_rope_function = extended_rope_module.__name__,
        longrope_rope_function = (
            longrope_module if longrope_module is not None else rope_module
        ).__name__,
    )
    rotary_emb = re.findall(
        r"self\.rotary\_emb \= .+?\)",
        function,
        flags = re.DOTALL | re.MULTILINE,
    )
    if len(rotary_emb) == 0:
        return None, function
    rotary_emb = rotary_emb[0]
    function = function.replace(rotary_emb, fix_rope_function, 1)
    function = exec_code + "\n\n" + function
    return init_name, function


def create_boolean_mask(n = 4096, sliding_window = 2048):
    # Creates a boolean mask for attention
    mask = torch.ones(n, n, dtype = torch.bool)
    if sliding_window == 0:
        return torch.triu(mask, diagonal = 1, out = mask)
    torch.triu(mask, diagonal = 0, out = mask)
    torch.triu(mask.T, diagonal = -sliding_window, out = mask.T)
    mask = mask.T
    torch.logical_not(mask, out = mask)
    return mask


def test_mask_creation():
    from transformers.modeling_attn_mask_utils import AttentionMaskConverter

    for n in range(2, 23):
        for s in range(1, 23):
            correct_mask = (
                AttentionMaskConverter(
                    is_causal = True,
                    sliding_window = s,
                )
                .to_causal_4d(
                    1,
                    n,
                    n,
                    dtype = torch.float16,
                )
                .squeeze(0)
                .squeeze(0)
            )
            correct_mask = correct_mask == correct_mask.min()
            our_mask = create_boolean_mask(n = n, sliding_window = s)
            assert torch.all(correct_mask == our_mask)
        correct_mask = (
            AttentionMaskConverter(
                is_causal = True,
                sliding_window = None,
            )
            .to_causal_4d(
                1,
                n,
                n,
                dtype = torch.float16,
            )
            .squeeze(0)
            .squeeze(0)
        )
        correct_mask = correct_mask == correct_mask.min()
        our_mask = create_boolean_mask(n = n, sliding_window = 0)
        assert torch.all(correct_mask == our_mask)


def _unsloth_pre_compute_loss(self, model, inputs, *args, **kwargs):
    num_items_in_batch = None

    if "num_items_in_batch" in kwargs:
        num_items_in_batch = kwargs["num_items_in_batch"]
        if num_items_in_batch is None:
            # Remove it since the model does not support it!
            kwargs.pop("num_items_in_batch")
        elif "num_items_in_batch" not in inputs:
            inputs["num_items_in_batch"] = num_items_in_batch

    # Get gradient accumulation steps if possible
    if (
        num_items_in_batch is None
        and getattr(getattr(self, "args", self), "gradient_accumulation_steps", 1) != 1
    ):
        inner_model = model
        if hasattr(inner_model, "base_model"):
            inner_model = inner_model.base_model
        if hasattr(inner_model, "model"):
            inner_model = inner_model.model
        name = inner_model.__class__.__name__

        logger.warning_once(
            f"Unsloth: Not an error, but {name} does not accept `num_items_in_batch`.\n"
            "Using gradient accumulation will be very slightly less accurate.\n"
            "Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient"
        )
    # Gemma3 multimodal models in transformers 5.x require token_type_ids during training.
    # For text-only SFT, token_type_ids should be all zeros (no image tokens).
    if "token_type_ids" not in inputs and "input_ids" in inputs:
        _inner = model
        for _attr in ("base_model", "model", "model"):
            _inner = getattr(_inner, _attr, _inner)
        if getattr(getattr(_inner, "config", None), "model_type", "") in ("gemma3",):
            import sys as _sys

            _mod = _sys.modules.get(type(_inner).__module__)
            _has_ccm = _mod is not None and hasattr(_mod, "create_causal_mask_mapping")
            if _has_ccm and _inner.training:
                inputs["token_type_ids"] = torch.zeros_like(inputs["input_ids"])

    outputs = self._old_compute_loss(model, inputs, *args, **kwargs)
    return outputs


def patch_gradient_accumulation_fix(Trainer):
    # Fixes gradient accumulation
    # Fixes Output 0 of UnslothFusedLossBackward is a view and is being modified inplace.
    import inspect

    if hasattr(Trainer, "get_batch_samples"):
        if Trainer.get_batch_samples.__name__ == "_unsloth_get_batch_samples":
            return
        if (
            not inspect.getsource(Trainer.get_batch_samples)
            .strip()
            .endswith("return batch_samples, num_items_in_batch")
        ):
            raise NotImplementedError(
                "Unsloth: Please make a Github issue immediately!!"
            )
        else:
            if Trainer.get_batch_samples.__name__ != "_unsloth_get_batch_samples":
                Trainer.get_batch_samples = _unsloth_get_batch_samples

            # Also fix passing in num_items_in_batch
            if not hasattr(Trainer, "_old_compute_loss"):
                # Fix transformers 4.57.0 causing `Output 0 of UnslothFusedLossBackward is a view and is being modified inplace.`
                function = inspect.getsource(Trainer.compute_loss)
                if "loss *=" in function or "loss*=" in function:
                    where = function.find("def")
                    function = function.split("\n")
                    function = "\n".join(x[where:] for x in function)

                    # Import all variables that need importing
                    import transformers.trainer

                    items_in_trainer = dir(transformers.trainer)
                    good_items = []
                    for item in items_in_trainer:
                        if item in function:
                            good_items.append(item)
                    exec(
                        "from transformers.trainer import ("
                        + ", ".join(x for x in good_items)
                        + ")",
                        globals(),
                    )

                    # Replace loss*= with loss = loss *
                    function = re.sub(
                        r"loss[\s]{0,}\*\=",
                        "loss = loss *",
                        function,
                    )
                    exec(function, globals())
                    Trainer.compute_loss = compute_loss
                Trainer._old_compute_loss = Trainer.compute_loss
                Trainer.compute_loss = _unsloth_pre_compute_loss
    else:
        logger.warning_once(
            "Unsloth: We fixed a gradient accumulation bug, "
            "but it seems like you don't have the latest transformers version!\n"
            "Please update transformers, TRL and unsloth via:\n"
            "`pip install --upgrade --no-cache-dir --no-deps unsloth transformers git+https://github.com/huggingface/trl.git`"
        )

    # Also fix up loss scaling ie negate loss *= self.args.gradient_accumulation_steps
    if not (
        Trainer.training_step.__name__ == "_unsloth_training_step"
        or "num_items_in_batch"
        not in inspect.signature(Trainer.training_step).parameters
    ):
        function = inspect.getsource(Trainer.training_step)
        where = function.find("def")
        function = function.split("\n")
        function = "\n".join(x[where:] for x in function)

        # Import all variables that need importing
        import transformers.trainer

        items_in_trainer = dir(transformers.trainer)
        good_items = []
        for item in items_in_trainer:
            if item in function:
                good_items.append(item)
        exec(
            "from transformers.trainer import ("
            + ", ".join(x for x in good_items)
            + ")",
            globals(),
        )

        # Accelerate does / self.args.gradient_accumulation_steps internally, so if we already
        # summed it up and did the division before hand, we have to negate it.
        function = function.replace(
            "loss *= self.args.gradient_accumulation_steps",
            "if num_items_in_batch is not None: loss *= self.args.gradient_accumulation_steps",
        )
        function = function.replace(
            "def training_step", "def _unsloth_training_step", 1
        )

        # Fix 4.47.0 issue where num_items_in_batch was removed
        # See https://github.com/huggingface/transformers/pull/35121
        function = function.replace(
            "if self.model_accepts_loss_kwargs:",
            "if False:",
        )

        # Fix when num_items_in_batch is nothing
        # https://github.com/huggingface/transformers/pull/35207
        function = re.sub(
            r"else:\n"
            r"([\s]{4,})self\.accelerator\.backward\(loss, \*\*kwargs\)\n"
            r"(.+?)if num_items_in_batch is None\:\n"
            r"(.+?)return loss\.detach\(\) \/ self\.args\.gradient_accumulation_steps",
            "else:\n"
            "\2if num_items_in_batch is None:\n"
            "\3loss = loss / self.args.gradient_accumulation_steps\n"
            "\1self.accelerator.backward(loss, **kwargs)",
            function,
        )

        exec(function, globals())
        Trainer.training_step = _unsloth_training_step

    # Prevent double scaling gradient accumulation
    # https://github.com/huggingface/transformers/pull/37208
    # Patch model_accepts_loss_kwargs detection in Trainer.__init__
    if Trainer.__init__.__name__ != "_unsloth___init__":
        try:
            init_function = inspect.getsource(Trainer.__init__)
        except Exception:
            init_function = ""
        if init_function is not None:
            init_function = textwrap.dedent(init_function)

            # Import all variables that need importing
            import transformers.trainer

            items_in_trainer = dir(transformers.trainer)
            good_items = []
            for item in items_in_trainer:
                if item in init_function:
                    good_items.append(item)
            exec(
                "from transformers.trainer import ("
                + ", ".join(x for x in good_items)
                + ")",
                globals(),
            )

            init_function = init_function.replace(
                "def __init__", "def _unsloth___init__", 1
            )

            # Force else branch
            init_function = re.sub(
                r'if[\s]+hasattr\(\s*unwrapped_model\s*,\s*"accepts_loss_kwargs"\s*\)\s*:',
                'if hasattr(unwrapped_model, "accepts_loss_kwargs") and False:',
                init_function,
            )
            exec(init_function, globals())
            Trainer.__init__ = _unsloth___init__


def patch_tokenizer(model, tokenizer):
    model, tokenizer = _patch_tokenizer(model, tokenizer)
    if model is not None:
        model.config.update({"unsloth_version": __version__})
    return model, tokenizer


def patch_fast_lora():
    import peft.tuners.lora.bnb

    peft.tuners.lora.bnb.Linear4bit.forward = fast_lora_forward


def unsloth_compile_transformers(
    dtype,
    model_name,
    model_types,
    token = None,
    revision = None,
    trust_remote_code = False,
    sdpa_dynamic_mask = True,
    sdpa_bool_masks = True,
    sdpa_gqa_replace = True,
    sdpa_dynamic_compile = True,
    compile_attention = True,
    disable_causal_masks = True,
    compile_torch_modules = True,
    compile_custom_modules = True,
    compile_function_calls = True,
    fuse_lm_head = True,
    gradient_checkpointing = True,
    manual_replacements = True,
    fast_lora_forwards = True,
    fast_residual_stream = True,
    accurate_accumulation = True,
    epilogue_fusion = True,
    max_autotune = False,
    shape_padding = True,
    cudagraphs = False,
    debug = False,
    fullgraph = True,
    import_from_cache = False,
    disable = False,
    return_logits = False,
    unsloth_force_compile = False,
):
    if Version(torch_version) < Version("2.4.0"):
        print(
            "="
            * 30
            + "Unsloth: Unfortunately Unsloth vision and other newer optimized models need Torch 2.4 or later.\n"
            f"You have Torch version {torch_version}. Please upgrade your Torch version by visiting https://pytorch.org/\n"
            "For now your models will not get optimized, but will still work for now!"
        )
        return
    if trust_remote_code and unsloth_force_compile == False:
        print(
            "Unsloth: We can't trace models if `trust_remote_code = True`, "
            "so turning off some optimizations!"
        )
        return model_types, False
    model_types = list(dict().fromkeys(model_types).keys())
    if disable:
        return model_types, False

    supports_sdpa = [True]

    # Run patches BEFORE compiler so class replacements (e.g. GptOssTopKRouter,
    # GptOssExperts) are in place before the compiler caches references to them.
    _run_temporary_patches("pre_compile")

    for model_type in model_types:
        _unsloth_compile_transformers(
            model_type,
            sdpa_dynamic_mask = sdpa_dynamic_mask,
            sdpa_bool_masks = sdpa_bool_masks,
            sdpa_gqa_replace = sdpa_gqa_replace,
            sdpa_dynamic_compile = sdpa_dynamic_compile,
            compile_attention = compile_attention,
            disable_causal_masks = disable_causal_masks,
            compile_torch_modules = compile_torch_modules,
            compile_custom_modules = compile_custom_modules,
            compile_function_calls = compile_function_calls,
            fuse_lm_head = fuse_lm_head,
            gradient_checkpointing = gradient_checkpointing,
            manual_replacements = manual_replacements,
            fast_lora_forwards = fast_lora_forwards,
            fast_residual_stream = fast_residual_stream,
            accurate_accumulation = accurate_accumulation,
            epilogue_fusion = epilogue_fusion,
            max_autotune = max_autotune,
            shape_padding = shape_padding,
            cudagraphs = cudagraphs,
            debug = debug,
            fullgraph = fullgraph,
            import_from_cache = import_from_cache,
            disable = disable,
            return_logits = return_logits,
            supports_sdpa = supports_sdpa,
        )
    # Redo patches which override compiler
    _run_temporary_patches("post_compile")
    return model_types, supports_sdpa[0]


# We need an empty logits flag to warn people logits will not be returned anymore unless asked ie
# os.environ['UNSLOTH_RETURN_LOGITS'] = '1'
LOGITS_ERROR_STRING = (
    "Unsloth: Logits are empty from 2024.11 onwards. To get raw logits again, please "
    'set the environment variable `UNSLOTH_RETURN_LOGITS` to `"1" BEFORE starting to train ie before `trainer.train()`. For example:\n'
    "```\nimport os\n"
    "os.environ['UNSLOTH_RETURN_LOGITS'] = '1'\n"
    "trainer.train()\n```\n"
    "No need to restart your console - just add `os.environ['UNSLOTH_RETURN_LOGITS'] = '1'` before trainer.train() and re-run the cell!"
)


def raise_logits_error(*args, **kwargs):
    raise NotImplementedError(LOGITS_ERROR_STRING)


def return_none(*args, **kwargs):
    return None


class EmptyLogits:
    def __init__(self):
        return

    def raise_getattr_error(self, attr):
        return return_none if attr == "to" else raise_logits_error

    __getitem__ = raise_logits_error
    __getattr__ = raise_getattr_error

    def __repr__(self):
        return LOGITS_ERROR_STRING

    def __str__(self):
        return LOGITS_ERROR_STRING


EMPTY_LOGITS = EmptyLogits()
functions = dir(torch.Tensor)
for j, function in enumerate(functions):
    if function.startswith("__") and function.endswith("__"):
        exec(
            f"def raise_{j}(*args, **kwargs): print('{function}')", globals(), locals()
        )
        try:
            exec(f"EMPTY_LOGITS.{function} = raise_{j}", globals(), locals())
        except:
            continue


def validate_loftq_config(loftq_config, lora_dropout, bias, init_lora_weights, model):
    from peft import LoraConfig

    if loftq_config is None:
        loftq_config = {}

    signature = str(inspect.signature(LoraConfig))
    SUPPORTS_LOFTQ = "loftq_config" in signature

    if lora_dropout != 0:
        logger.warning_once(
            f"Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = {lora_dropout}.\n"
            f"Unsloth will patch all other layers, except LoRA matrices, causing a performance hit."
        )

    if bias != "none":
        logger.warning_once(
            f"Unsloth: bias = `none` is supported for fast patching. You are using bias = {bias}.\n"
            f"Unsloth will patch all other layers, except LoRA matrices, causing a performance hit."
        )

    if not (
        type(init_lora_weights) is bool
        or init_lora_weights == "gaussian"
        or init_lora_weights == "loftq"
        or init_lora_weights == "corda"
    ):
        raise ValueError(
            'Unsloth: `init_lora_weights` must be either [True, False, "gaussian", "loftq", "corda"].'
        )

    if init_lora_weights == "loftq":
        if not SUPPORTS_LOFTQ:
            import peft

            raise RuntimeError(
                f"Unsloth: Your PEFT version of {peft.__version__} does not support LoftQ init.\n"
                "Please install PEFT 0.7.2 or higher.\n"
                "You can also install from source: `pip install git+https://github.com/huggingface/peft.git"
            )

        if loftq_config == {}:
            from peft import LoftQConfig

            logger.warning_once(
                "Unsloth: init_lora_weights = `loftq` is set, but `loftq_config` is None.\n"
                "We shall use `loftq_config = LoftQConfig(loftq_bits = 4, loftq_iter = 1)`."
            )
            loftq_config = LoftQConfig(loftq_bits = 4, loftq_iter = 1)

        if hasattr(model.config, "quantization_config"):
            raise ValueError(
                "Unsloth: You are using `loftq` init, yet `load_in_4bit = True` was set.\n"
                "Reload your model without any quantization by setting `load_in_4bit = False`."
            )

    return loftq_config


def fast_inference_setup(model_name, model_config):
    fast_inference = True
    if not is_vLLM_available():
        logger.warning_once(
            "Unsloth: vLLM is not installed! Will use Unsloth inference!"
        )
        fast_inference = False
    from unsloth_zoo.vllm_utils import (
        patch_vllm,
        vllm_dynamic_quant_supported,
    )

    patch_vllm()
    if model_name.endswith("unsloth-bnb-4bit"):
        if not vllm_dynamic_quant_supported(model_name, model_config):
            # Instead use -bnb-4bit variant
            logger.warning_once(
                f"Unsloth: Switching from Unsloth dynamic quant to normal quant since\n"
                f"we do not yet support fast inference for {model_name}"
            )
            model_name = model_name[: -len("unsloth-bnb-4bit")] + "bnb-4bit"
    return fast_inference, model_name


def patch_peft_fast_inference(model):
    vllm_engine = getattr(model.model, "vllm_engine", None)
    if vllm_engine is not None:
        model.vllm_engine = model.model.vllm_engine
        model.fast_generate = model.model.fast_generate
        model.fast_generate_batches = model.model.fast_generate_batches

        # Also saving and loading LoRA
        from unsloth_zoo.vllm_utils import save_lora, load_lora

        model.save_lora = functools.partial(save_lora, model)
        model.load_lora = functools.partial(load_lora, model)


def error_out_no_vllm(*args, **kwargs):
    raise NotImplementedError(
        "Unsloth: vLLM is not yet supported for fast inference for this model! Please use `.generate` instead"
    )


try:
    from torchao.core.config import AOBaseConfig

    try:
        from torchao.quantization import Int4WeightOnlyConfig
    except:
        print("Unsloth: TorchAO changed `torchao.quantization.Int4WeightOnlyConfig`")
        Int4WeightOnlyConfig = None
except:
    AOBaseConfig = None
    Int4WeightOnlyConfig = None


@dataclass
class TorchAOConfig:
    qat_scheme: Optional[str] = "int4"

    # Each (config, filter_fn) pair defines a quantization rule
    base_config_and_filter_fns: List[
        Tuple["AOBaseConfig", Optional[Callable[[torch.nn.Module, str], bool]]]
    ] = field(
        default_factory = lambda: [
            (
                Int4WeightOnlyConfig(group_size = 128),
                lambda m, _: isinstance(m, torch.nn.Linear)
                and getattr(m, "in_features", 0) >= 128,
            ),
        ]
    )

    # Optional transformation to apply before quantization setup
    prequantization_transform: Optional[Callable[[torch.nn.Module], None]] = None


def _untie_input_output_embeddings(model: torch.nn.Module) -> None:
    """
    Utility to untie input/output embeddings in a HuggingFace model.
    This is useful if we want to quantize the input/ouput embeddings differently.
    Model is modified in-place.
    """

    # 1) Persist setting in config
    if hasattr(model.config, "tie_word_embeddings"):
        model.config.tie_word_embeddings = False

    # 2) Find input and output embeddings
    in_emb = model.get_input_embeddings()
    out_proj = model.get_output_embeddings() or getattr(model, "lm_head", None)
    if out_proj is None:
        raise AttributeError("Couldn't locate output projection (lm_head).")

    # (Optional) sanity: shapes should match [vocab, hidden]
    assert (
        out_proj.weight.shape == in_emb.weight.shape
    ), f"Shape mismatch: out_proj {out_proj.weight.shape} vs in_emb {in_emb.weight.shape}"

    # 3) Only clone if they are actually tied (shared storage)
    if out_proj.weight.data_ptr() == in_emb.weight.data_ptr():
        with torch.no_grad():
            W = in_emb.weight.detach().clone()
        out_proj.weight = torch.nn.Parameter(W)  # new storage, keeps dtype/device

    # 4) Prevent future automatic re-tying
    def _no_tie(self):
        return

    model.tie_weights = _no_tie.__get__(model, model.__class__)

    # 5) Verify no shared storage
    assert (
        out_proj.weight.data_ptr() != in_emb.weight.data_ptr()
    ), "Embeddings still tied!"


def _filter_fn_to_fqns(
    model: torch.nn.Module,
    filter_fn: Callable[[torch.nn.Module, str], bool],
) -> Iterator[str]:
    """
    Given a model and a filter function (m, fqn) -> bool,
    yield fully qualified names (FQNs) of modules that match.
    """
    for fqn, module in model.named_modules():
        if filter_fn(module, fqn):
            yield fqn


def _convert_torchao_model(model):
    from transformers import TorchAoConfig
    from torchao.quantization import quantize_, ModuleFqnToConfig
    from torchao.quantization.qat import QATConfig
    from torchao.utils import TorchAOBaseTensor

    module_to_fqn_dict = {}
    for base_config, filter_fn in model._torchao_config.base_config_and_filter_fns:
        quantize_(model, QATConfig(base_config, step = "convert"), filter_fn = filter_fn)

        # Default filter function used for quantize_
        if filter_fn is None:
            if "_default" in module_to_fqn_dict:
                raise ValueError("Cannot use multiple default quantization configs")
            module_to_fqn_dict["_default"] = base_config
        else:
            for fqn in _filter_fn_to_fqns(model, filter_fn):
                if fqn in module_to_fqn_dict:
                    raise ValueError(f"Found multiple quantization configs for {fqn}")
                module_to_fqn_dict[fqn] = base_config

    in_emb = model.get_input_embeddings()
    out_proj = model.get_output_embeddings() or getattr(model, "lm_head", None)
    kwargs = {}
    if isinstance(in_emb.weight, TorchAOBaseTensor) or (
        out_proj is not None and isinstance(out_proj.weight, TorchAOBaseTensor)
    ):
        kwargs["include_input_output_embeddings"] = True
        kwargs["modules_to_not_convert"] = []

    quant_config = ModuleFqnToConfig(module_to_fqn_dict)
    quantization_config = TorchAoConfig(quant_type = quant_config, **kwargs)
    model.config.quantization_config = quantization_config


def _prepare_model_for_qat(
    model: torch.nn.Module, qat_scheme: Union[str, TorchAOConfig]
) -> torch.nn.Module:
    """
    Transform a model for Quantization-Aware Training (QAT) during fine-tuning.

    On a high level, this means fake quantizing the base (frozen) model during training.
    Fake quantization refers to simulating quantization numerics in high precision (e.g. bf16).
    This helps mitigate quantization degradations when the model is quantized after training.

    QAT can be optionally combined with LoRA fine-tuning to for additional throughput improvement.
    For more details: https://dev-discuss.pytorch.org/t/speeding-up-qat-by-1-89x-with-lora/2700
    """
    try:
        from torchao.quantization import PerRow, quantize_
        from torchao.quantization.granularity import PerGroup, PerAxis
        from torchao.quantization.qat import QATConfig
    except ImportError:
        raise ImportError(TORCHAO_MSG)

    # Gemma3 models have issues with int8 embedding quantization due to their
    # large vocabulary size (262144). Auto-switch to int4 weight-only instead.
    if qat_scheme == "int8-int4":
        model_types = get_transformers_model_type(model.config)
        is_gemma3 = any("gemma3" in mt or "gemma_3" in mt for mt in model_types)
        if is_gemma3:
            print(
                "Unsloth: Gemma3 has a large vocabulary causing int8 embedding issues. "
                "Switching to int4 weight-only QAT for training stability."
            )
            qat_scheme = "int4"

    if not isinstance(qat_scheme, TorchAOConfig):
        torchao_config: Optional[TorchAOConfig] = None
        if qat_scheme == "fp8-int4":
            try:
                from torchao.quantization import Float8DynamicActivationInt4WeightConfig
            except ImportError:
                raise ImportError(TORCHAO_MSG)
            group_size = 128
            base_config = Float8DynamicActivationInt4WeightConfig()
            filter_fn = (
                lambda m, _: isinstance(m, torch.nn.Linear)
                and m.in_features >= group_size
            )
            torchao_config = TorchAOConfig(
                qat_scheme = qat_scheme,
                base_config_and_filter_fns = [(base_config, filter_fn)],
            )
        elif qat_scheme == "fp8-fp8":
            try:
                from torchao.quantization import (
                    Float8DynamicActivationFloat8WeightConfig,
                )
            except ImportError:
                raise ImportError(TORCHAO_MSG)
            base_config = Float8DynamicActivationFloat8WeightConfig(
                granularity = PerRow()
            )
            torchao_config = TorchAOConfig(
                qat_scheme = qat_scheme, base_config_and_filter_fns = [(base_config, None)]
            )
        elif qat_scheme == "int8-int4":
            try:
                from torchao.quantization import (
                    Int8DynamicActivationIntxWeightConfig,
                    IntxWeightOnlyConfig,
                )
            except ImportError:
                raise ImportError(TORCHAO_MSG)
            torchao_config = TorchAOConfig(
                qat_scheme = qat_scheme,
                base_config_and_filter_fns = [
                    (
                        IntxWeightOnlyConfig(
                            weight_dtype = torch.int8, granularity = PerAxis(0)
                        ),
                        lambda m, fqn: isinstance(m, torch.nn.Embedding),
                    ),
                    (
                        Int8DynamicActivationIntxWeightConfig(
                            weight_dtype = torch.int4, weight_granularity = PerGroup(32)
                        ),
                        None,
                    ),
                ],
                prequantization_transform = _untie_input_output_embeddings,
            )
        elif qat_scheme == "int4":
            try:
                from torchao.quantization import Int4WeightOnlyConfig
            except ImportError:
                raise ImportError(TORCHAO_MSG)
            group_size = 128
            base_config = Int4WeightOnlyConfig(group_size = group_size)
            filter_fn = (
                lambda m, _: isinstance(m, torch.nn.Linear)
                and m.in_features >= group_size
            )
            torchao_config = TorchAOConfig(
                qat_scheme = qat_scheme,
                base_config_and_filter_fns = [(base_config, filter_fn)],
            )
        elif qat_scheme == "int8":
            try:
                from torchao.quantization import IntxWeightOnlyConfig
                from torchao.quantization.granularity import PerAxis
            except ImportError:
                raise ImportError(TORCHAO_MSG)

            base_config = IntxWeightOnlyConfig(
                weight_dtype = torch.int8,
                granularity = PerAxis(0),
            )
            filter_fn = lambda m, _: isinstance(m, torch.nn.Linear)
            torchao_config = TorchAOConfig(
                qat_scheme = qat_scheme,
                base_config_and_filter_fns = [(base_config, filter_fn)],
            )
        else:
            raise ValueError(f"Unexpected QAT scheme {qat_scheme}")
        assert torchao_config is not None, f"TorchAOConfig was not set for {qat_scheme}"
    else:
        torchao_config = qat_scheme

    # Save Torchao metadata everywhere
    inner_model = model
    while hasattr(inner_model, "model"):
        inner_model._torchao_config = torchao_config
        inner_model = inner_model.model
    inner_model._torchao_config = torchao_config

    if torchao_config.prequantization_transform is not None:
        torchao_config.prequantization_transform(model)
    for base_config, filter_fn in torchao_config.base_config_and_filter_fns:
        quantize_(model, QATConfig(base_config, step = "prepare"), filter_fn = filter_fn)

    return model


def patch_hf_quantizer():
    # To tell hf trainer that the quantized model is trainable
    def make_trainable(self):
        return True

    try:
        from transformers.quantizers.quantizer_finegrained_fp8 import (
            FineGrainedFP8HfQuantizer,
        )

        FineGrainedFP8HfQuantizer.is_trainable = property(make_trainable)
        FineGrainedFP8HfQuantizer.is_qat_trainable = property(make_trainable)
    except Exception as e:
        logger.warning(f"Failed to patch FineGrainedFP8HfQuantizer. Error {e}")

    try:
        from transformers.quantizers.quantizer_fbgemm_fp8 import FbgemmFp8HfQuantizer

        FbgemmFp8HfQuantizer.is_trainable = property(make_trainable)
        FbgemmFp8HfQuantizer.is_qat_trainable = property(make_trainable)
    except Exception as e:
        logger.warning(f"Failed to patch FbgemmFp8HfQuantizer. Error {e}")

    try:
        from transformers.quantizers.quantizer_torchao import TorchAoHfQuantizer

        TorchAoHfQuantizer.is_trainable = property(make_trainable)
        TorchAoHfQuantizer.is_qat_trainable = property(make_trainable)
    except Exception as e:
        logger.warning(f"Failed to patch TorchAoHfQuantizer. Error {e}")


patch_hf_quantizer()


def verify_fp8_support_if_applicable(model_config):
    quant_method = get_quant_type(model_config)
    if quant_method in ["fbgemm_fp8", "fp8"] and DEVICE_TYPE != "cuda":
        raise ValueError(
            f"Unsloth: FP8 quantization is only supported on CUDA GPUs. You are using {DEVICE_TYPE}."
        )

    # [TODO] Need to add FP8 support for Intel XPUs
    if DEVICE_TYPE == "cuda":
        major_version, minor_version = torch.cuda.get_device_capability()
        if quant_method == "fbgemm_fp8" and major_version < 9:
            # While L4 does support FP8 as data type, it doesn't have fbgemm (package) support yet. So we restrict it.
            raise ValueError(
                f"Unsloth: FBGEMM FP8 quantization is only supported on H100 and higher GPUs. L4 is not supported. You are using {torch.cuda.get_device_name()}. Refer to https://developer.nvidia.com/cuda-gpus for more details."
            )
        if quant_method == "fp8" and major_version * 10 + minor_version < 89:
            # In case of block quantized, we allow L4 because we fall back to torchao kernels.
            raise ValueError(
                f"Unsloth: FP8 quantization is only supported on L4 and higher GPUs with compute capability 8.9 or higher. You are using {torch.cuda.get_device_name()}. Refer to https://developer.nvidia.com/cuda-gpus for more details."
            )


def _get_inference_mode_context_manager(model: torch.nn.Module):
    """
    If the state dict was quantized using torchao, we will run into
    the following error when calling ops like aten.t() in inference mode.
    This is a bug in PyTorch that affects all tensor subclasses.

        Cannot set version_counter for inference tensor

    For now, we work around this issue by using `torch.no_grad()` in this case.
    See https://github.com/pytorch/pytorch/issues/164872 for more details.
    Otherwise, just return `torch.inference_mode()`.
    """
    torchao_config = getattr(model, "torchao_config", None)
    if torchao_config is not None and torchao_config.qat_scheme is None:
        return torch.no_grad()
    else:
        return torch.inference_mode()


def hf_login(token: Optional[str] = None) -> Optional[str]:
    if token is None:
        try:
            from huggingface_hub import get_token

            token = get_token()
            if token is None:
                return None
        except:
            return None
    try:
        from huggingface_hub import login

        login(token = token)
        return token
    except Exception as e:
        logger.info(f"Failed to login to huggingface using token with error: {e}")
    return token


# =============================================
# MoE (Mixture of Experts) Detection and LoRA Utilities


def is_moe_model(model) -> bool:
    """
    Detect if a model is a Mixture of Experts (MoE) model.

    Args:
        model: The model to check (can be HF model or config)

    Returns:
        True if the model is an MoE model, False otherwise
    """
    config = getattr(model, "config", model)

    # Different MoE models use different config attribute names:
    # - Qwen3-MoE: num_experts
    # - GLM4-MoE: n_routed_experts, num_local_experts
    # - Mixtral: num_local_experts
    num_experts = None
    for attr in ("num_experts", "n_routed_experts", "num_local_experts"):
        num_experts = getattr(config, attr, None)
        if num_experts is not None:
            break

    # Check text_config for VL models
    if num_experts is None and hasattr(config, "text_config"):
        for attr in ("num_experts", "n_routed_experts", "num_local_experts"):
            num_experts = getattr(config.text_config, attr, None)
            if num_experts is not None:
                break

    return num_experts is not None and num_experts > 0


def get_moe_target_parameters(model, target_modules = None) -> Optional[List[str]]:
    """
    Get the target_parameters for MoE expert layers if applicable.

    For MoE models, returns the parameter paths for expert weights
    (gate_up_proj, down_proj) that should be targeted by PEFT's
    target_parameters for LoRA on nn.Parameter.

    Only includes MoE parameters that match what's in target_modules:
    - If "down_proj" is in target_modules -> includes "mlp.experts.down_proj"
    - If "gate_proj" or "up_proj" is in target_modules -> includes "mlp.experts.gate_up_proj"

    Args:
        model: The model to get target parameters for
        target_modules: List/tuple of target module names to match against

    Returns:
        List of parameter paths for MoE experts, or None if not an MoE model
    """
    if not is_moe_model(model):
        return None

    config = getattr(model, "config", model)
    # Get num_experts from various possible config attributes
    num_experts = None
    for attr in ("num_experts", "n_routed_experts", "num_local_experts"):
        num_experts = getattr(config, attr, None)
        if num_experts is not None:
            break
    if num_experts is None and hasattr(config, "text_config"):
        for attr in ("num_experts", "n_routed_experts", "num_local_experts"):
            num_experts = getattr(config.text_config, attr, None)
            if num_experts is not None:
                break
    if num_experts is None:
        num_experts = 0

    # Determine which MoE parameters to include based on target_modules
    moe_params = []

    # Normalize target_modules to a set for efficient lookup
    if target_modules is None:
        # If no target_modules specified, include all MoE params
        target_set = {"gate_proj", "up_proj", "down_proj", "gate_up_proj"}
    elif isinstance(target_modules, str):
        target_set = {target_modules}
        # Heuristic for regex matching MLPs
        if "proj" in target_modules and (
            "mlp" in target_modules or "ffn" in target_modules
        ):
            target_set.update({"gate_proj", "up_proj", "down_proj", "gate_up_proj"})
    else:
        target_set = set(target_modules) if target_modules else set()

    # gate_up_proj combines both gate_proj and up_proj in MoE
    # Also match "gate_up_proj" directly since users may specify the fused name
    if (
        "gate_proj" in target_set
        or "up_proj" in target_set
        or "gate_up_proj" in target_set
    ):
        moe_params.append("mlp.experts.gate_up_proj")

    if "down_proj" in target_set:
        moe_params.append("mlp.experts.down_proj")

    if moe_params:
        print(
            f"Unsloth: Detected MoE model with {num_experts = } and {target_modules = }. Enabling LoRA on MoE parameters: {moe_params}"
        )
        return moe_params

    return None


def make_fast_generate_wrapper(original_generate):
    """
    Creates a wrapper around model.generate that checks for incorrect
    vLLM-style usage when fast_inference=False.
    """

    @functools.wraps(original_generate)
    def _fast_generate_wrapper(*args, **kwargs):
        # Check for vLLM-specific arguments
        if "sampling_params" in kwargs:
            raise ValueError(
                "Unsloth: `sampling_params` is only supported when `fast_inference=True` (vLLM). "
                "Since `fast_inference=False`, use HuggingFace generate arguments instead:\n"
                "  model.fast_generate(**tokens.to('cuda'), max_new_tokens=64, temperature=1.0, top_p=0.95)"
            )

        if "lora_request" in kwargs:
            raise ValueError(
                "Unsloth: `lora_request` is only supported when `fast_inference=True` (vLLM). "
                "Since `fast_inference=False`, LoRA weights are already merged into the model."
            )

        # Check if first positional argument is a string or list of strings
        if len(args) > 0:
            first_arg = args[0]
            is_string_input = False

            if isinstance(first_arg, str):
                is_string_input = True
            elif isinstance(first_arg, (list, tuple)) and len(first_arg) > 0:
                if isinstance(first_arg[0], str):
                    is_string_input = True

            if is_string_input:
                raise ValueError(
                    "Unsloth: Passing text strings to `fast_generate` is only supported "
                    "when `fast_inference=True` (vLLM). Since `fast_inference=False`, you must "
                    "tokenize the input first:\n\n"
                    "  messages = tokenizer.apply_chat_template(\n"
                    '      [{"role": "user", "content": "Your prompt here"}],\n'
                    "      tokenize=True, add_generation_prompt=True,\n"
                    '      return_tensors="pt", return_dict=True\n'
                    "  )\n"
                    "  output = model.fast_generate(\n"
                    "      **messages.to('cuda'),\n"
                    "      max_new_tokens=64,\n"
                    "      temperature=1.0,\n"
                    "  )"
                )

        # Call original generate
        return original_generate(*args, **kwargs)

    return _fast_generate_wrapper


# Fix llm_int8_skip_modules not being respected for VLMs with dynamic quantization.
# Dynamic quant checkpoints (eg gemma-3-4b-it-unsloth-bnb-4bit) encode skip paths as
# "language_model.model.layers.*", but the live module tree surfaces them as
# "model.language_model.layers.*". This prefix mismatch causes should_convert_module
# to miss the skip list, so modules meant to stay in 16-bit get wrapped in Linear4bit
# without a quant_state, producing "Skipping ... no quant_state found" warnings.
# We patch should_convert_module to expand both the module name and the skip patterns
# into all equivalent alias forms before delegating to the original matcher.
# Ref: https://github.com/unslothai/unsloth/issues/4208
import transformers.quantizers.quantizers_utils as _quantizers_utils

if (
    hasattr(_quantizers_utils, "should_convert_module")
    and getattr(_quantizers_utils.should_convert_module, "__name__", "")
    != "patched_should_convert_module"
):
    _original_should_convert_module = _quantizers_utils.should_convert_module

    def _get_full_name_aliases(full_name):
        aliases = {full_name}
        if not isinstance(full_name, str):
            return aliases

        if full_name.startswith("model.language_model."):
            aliases.add(full_name[len("model.") :])
        if "language_model.model." in full_name:
            aliases.add(full_name.replace("language_model.model.", "language_model."))
        if full_name.startswith("model.language_model.model."):
            aliases.add(
                full_name[len("model.") :].replace(
                    "language_model.model.", "language_model."
                )
            )
        return aliases

    def _get_pattern_aliases(pattern):
        aliases = {pattern}
        if not isinstance(pattern, str):
            return aliases

        if "language_model.model." in pattern:
            aliases.add(pattern.replace("language_model.model.", "language_model."))
        return aliases

    def _expand_patterns(patterns):
        expanded = set()
        for pattern in patterns:
            expanded.update(_get_pattern_aliases(pattern))
        return expanded

    def patched_should_convert_module(full_name, patterns = None):
        if patterns is None:
            return _original_should_convert_module(full_name, patterns)

        expanded_patterns = _expand_patterns(patterns)
        return all(
            _original_should_convert_module(candidate, expanded_patterns)
            for candidate in _get_full_name_aliases(full_name)
        )

    patched_should_convert_module._original_should_convert_module = (
        _original_should_convert_module
    )
    _quantizers_utils.should_convert_module = patched_should_convert_module

    try:
        import transformers.integrations.bitsandbytes

        transformers.integrations.bitsandbytes.should_convert_module = (
            patched_should_convert_module
        )
    except Exception:
        pass


================================================
FILE: unsloth/models/cohere.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .llama import *
from ._utils import __version__
from unsloth_zoo.hf_utils import dtype_from_config
from unsloth_zoo.utils import _get_dtype, Version
from ..utils.packing import get_packed_info_from_kwargs
from ..utils.attention_dispatch import (
    AttentionConfig,
    AttentionContext,
    run_attention,
    select_attention_backend,
)

try:
    from transformers.models.cohere.modeling_cohere import (
        CohereAttention,
        CohereDecoderLayer,
        CohereModel,
        CohereForCausalLM,
        CohereRotaryEmbedding,
        apply_rotary_pos_emb,
        repeat_kv,
    )
except:
    transformers_version = Version(transformers_version)
    if not transformers_version >= Version("4.42"):
        raise ImportError(
            f"Unsloth: Your transformers version of {transformers_version} does not support Cohere.\n"
            f"The minimum required version is 4.42.3.\n"
            f'Try `pip install --upgrade "transformers>=4.42.3"`\n'
            f"to obtain the latest transformers build, then restart this session."
        )

from transformers.modeling_attn_mask_utils import (
    _prepare_4d_causal_attention_mask_for_sdpa,
)

# For Pytorch 2.1.1
try:
    from transformers.models.cohere.modeling_cohere import (
        CohereSdpaAttention,
        CohereFlashAttention2,
    )
except:
    CohereSdpaAttention = CohereAttention
    CohereFlashAttention2 = CohereAttention


def fast_layernorm_inference(self, X, out_weight = None):
    XX = X.to(torch.float32, copy = True)
    XX -= X.mean(-1, keepdim = True)
    variance = XX.square().mean(-1, keepdim = True)
    variance += self.variance_epsilon
    XX *= variance.rsqrt_()
    out_weight[:] = self.weight
    XX *= out_weight
    return XX.to(X.dtype)


# QK norm in Cohere
def CohereAttention_fast_forward(
    self,
    hidden_states: torch.Tensor,
    causal_mask: Optional[BlockDiagonalCausalMask] = None,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    past_key_value: Optional[Tuple[torch.Tensor]] = None,
    output_attentions: bool = False,
    use_cache: bool = False,
    padding_mask: Optional[torch.LongTensor] = None,
    position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
    *args,
    **kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
    # Clear inference
    if hasattr(self, "paged_attention"):
        del self.paged_attention_K
        del self.paged_attention_V
        del self.paged_attention
        del self.temp_QA
        del self.temp_KV
        del self.RH_Q
        del self.attention
        del self.q_norm_out_weight
        del self.k_norm_out_weight

    bsz, q_len, _ = hidden_states.size()

    n_heads = self.config.num_attention_heads
    n_groups = self.num_key_value_groups
    n_kv_heads = self.config.num_key_value_heads
    head_dim = self.head_dim
    assert n_kv_heads * n_groups == n_heads

    Q, K, V = self.apply_qkv(self, hidden_states)
    Q = Q.view(bsz, q_len, n_heads, head_dim).transpose(1, 2)
    K = K.view(bsz, q_len, n_kv_heads, head_dim).transpose(1, 2)
    V = V.view(bsz, q_len, n_kv_heads, head_dim).transpose(1, 2)
    seq_info = get_packed_info_from_kwargs(kwargs, Q.device)
    if self.use_qk_norm:
        Q = fast_layernorm_compiled(self.q_norm, Q)
        K = fast_layernorm_compiled(self.k_norm, K)

    kv_seq_len = K.shape[-2]
    if past_key_value is not None:
        kv_seq_len += past_key_value[0].shape[-2]

    # Extend RoPE dynamically to fit in VRAM
    if position_embeddings:
        cos, sin = position_embeddings
    else:
        cos, sin = self.rotary_emb.get_cached(kv_seq_len, Q.device.index)

    rope_position_ids = (
        position_ids if position_ids is not None else kwargs.get("position_ids")
    )
    # Useful for LongRoPE
    Q, K = fast_rope_embedding(Q, K, cos, sin, rope_position_ids)

    if past_key_value is not None:
        K = torch.cat([past_key_value[0], K], dim = 2)
        V = torch.cat([past_key_value[1], V], dim = 2)
    past_key_value = (K, V) if use_cache else None

    # Attention module
    use_varlen = seq_info is not None and past_key_value is None
    backend = select_attention_backend(use_varlen)
    attention_config = AttentionConfig(
        backend = backend,
        n_kv_heads = n_kv_heads,
        n_groups = n_groups,
        flash_dense_kwargs = {"causal": True},
        flash_varlen_kwargs = {
            "dropout_p": 0.0,
            "causal": True,
            "softmax_scale": getattr(self, "softmax_scale", None),
        },
    )
    context = AttentionContext(
        bsz = bsz,
        q_len = q_len,
        kv_seq_len = kv_seq_len,
        n_heads = n_heads,
        head_dim = head_dim,
        requires_grad = hidden_states.requires_grad,
        seq_info = seq_info,
        attention_mask = attention_mask,
        causal_mask = causal_mask,
    )

    A = run_attention(config = attention_config, context = context, Q = Q, K = K, V = V)

    attn_output = A.reshape(bsz, q_len, n_heads * head_dim)
    attn_output = self.apply_o(self, attn_output)
    attn_weights = None
    return attn_output, attn_weights, past_key_value


# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L590
def CohereDecoderLayer_fast_forward(
    self,
    hidden_states: torch.Tensor,
    causal_mask: Optional[BlockDiagonalCausalMask] = None,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    past_key_value: Optional[Tuple[torch.Tensor]] = None,
    output_attentions: Optional[bool] = False,
    use_cache: Optional[bool] = False,
    padding_mask: Optional[torch.LongTensor] = None,
    position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
    *args,
    **kwargs,
):
    if use_cache and hasattr(
        self, "_flag_for_generation"
    ):  # past_key_value is not None:
        out_weight = torch.empty(
            self.input_layernorm.weight.shape,
            dtype = torch.float32,
            device = f"{DEVICE_TYPE_TORCH}:0",
        )

        # Self Attention
        residual = hidden_states
        hidden_states = fast_layernorm_inference(
            self.input_layernorm, hidden_states, out_weight
        )
        hidden_states_attention, self_attn_weights, present_key_value = self.self_attn(
            hidden_states = hidden_states,
            causal_mask = causal_mask,
            attention_mask = attention_mask,
            position_ids = position_ids,
            past_key_value = past_key_value,
            output_attentions = output_attentions,
            use_cache = use_cache,
            padding_mask = padding_mask,
            **kwargs,
        )

        # Fully Connected
        hidden_states_mlp = fast_swiglu_inference(self.mlp, hidden_states)
        residual += hidden_states_attention
        residual += hidden_states_mlp
        hidden_states = residual
    else:
        residual = hidden_states
        hidden_states = fast_layernorm_compiled(self.input_layernorm, hidden_states)
        hidden_states_attention, self_attn_weights, present_key_value = self.self_attn(
            hidden_states = hidden_states,
            causal_mask = causal_mask,
            attention_mask = attention_mask,
            position_ids = position_ids,
            past_key_value = past_key_value,
            output_attentions = output_attentions,
            use_cache = use_cache,
            padding_mask = padding_mask,
            **kwargs,
        )

        # Fully Connected
        hidden_states_mlp = self.mlp(hidden_states)
        hidden_states = residual + hidden_states_attention + hidden_states_mlp

    outputs = (hidden_states,)
    if output_attentions:
        outputs += (self_attn_weights,)
    if use_cache:
        outputs += (present_key_value,)
    return outputs


from math import sqrt as math_sqrt

KV_CACHE_INCREMENT = 256  # KV Cache update size
torch_nn_functional_softmax = torch.nn.functional.softmax
torch_matmul = torch.matmul


def CohereAttention_fast_forward_inference(
    self,
    hidden_states: torch.Tensor,
    past_key_value: Optional[Tuple[torch.Tensor]],
    position_ids,
    do_prefill = False,
    attention_mask = None,
    **kwargs,
):
    Xn = hidden_states
    bsz, _, hd = hidden_states.size()
    K1, V1 = past_key_value
    dtype = Xn.dtype

    n_heads = self.config.num_attention_heads
    n_groups = self.num_key_value_groups
    n_kv_heads = self.config.num_key_value_heads
    head_dim = self.head_dim
    # assert(n_kv_heads * n_groups == n_heads)

    hidden_size = self.config.hidden_size
    attention_size = n_heads * head_dim
    seq_len = K1.shape[-2]
    kv_seq_len = seq_len + 1

    # Prefill phase
    # if not hasattr(self, "paged_attention"):
    if do_prefill:
        self.paged_attention = torch.empty(
            (KV_CACHE_INCREMENT + seq_len + 1, 2, bsz, n_kv_heads, head_dim),
            dtype = dtype,
            device = f"{DEVICE_TYPE_TORCH}:0",
        )
        self.paged_attention_K = self.paged_attention[:, 0]
        self.paged_attention_V = self.paged_attention[:, 1]
        self.paged_attention_K[:seq_len] = K1.permute(2, 0, 1, 3)
        self.paged_attention_V[:seq_len] = V1.permute(2, 0, 1, 3)
        self.temp_QA = torch.empty(
            (2, bsz, 1, attention_size), dtype = dtype, device = f"{DEVICE_TYPE_TORCH}:0"
        )
        self.temp_KV = torch.empty(
            (2, bsz, 1, n_kv_heads * head_dim),
            dtype = dtype,
            device = f"{DEVICE_TYPE_TORCH}:0",
        )
        self.RH_Q = torch.empty(
            (bsz, n_heads, 1, head_dim), dtype = dtype, device = f"{DEVICE_TYPE_TORCH}:0"
        )

        # Mistral Nemo 12b has weird dimensions
        if attention_size != hidden_size:
            self.temp_O = torch.empty(
                (bsz, 1, hidden_size), dtype = dtype, device = f"{DEVICE_TYPE_TORCH}:0"
            )
        else:
            self.temp_O = self.temp_QA[1][:, :, :hidden_size]

        self.attention = torch.empty(
            (bsz, n_heads, 1, KV_CACHE_INCREMENT + seq_len),
            dtype = dtype,
            device = f"{DEVICE_TYPE_TORCH}:0",
        )
        self.scalar = 1.0 / math_sqrt(self.head_dim)
        self.half_head_dim = head_dim // 2
        # Cohere has QK layernorms
        if self.use_qk_norm:
            self.q_norm_out_weight = torch.empty(
                self.q_norm.weight.shape,
                dtype = torch.float32,
                device = f"{DEVICE_TYPE_TORCH}:0",
            )
            self.k_norm_out_weight = torch.empty(
                self.k_norm.weight.shape,
                dtype = torch.float32,
                device = f"{DEVICE_TYPE_TORCH}:0",
            )
        else:
            self.q_norm_out_weight = None
            self.k_norm_out_weight = None
    elif kv_seq_len >= self.paged_attention.shape[0]:
        self.paged_attention.resize_(
            (
                self.paged_attention.shape[0] + KV_CACHE_INCREMENT,
                2,
                bsz,
                n_kv_heads,
                head_dim,
            )
        )
        self.paged_attention_K = self.paged_attention[:, 0]
        self.paged_attention_V = self.paged_attention[:, 1]
        self.attention.resize_(
            (bsz, n_heads, 1, self.attention.shape[-1] + KV_CACHE_INCREMENT)
        )

    Qn = fast_linear_forward(self.q_proj, Xn, out = self.temp_QA[0])
    Kn = fast_linear_forward(self.k_proj, Xn, out = self.temp_KV[0])
    Vn = fast_linear_forward(self.v_proj, Xn, out = self.temp_KV[1])
    Qn = Qn.view(bsz, 1, n_heads, head_dim).transpose(1, 2)
    Kn = Kn.view(bsz, 1, n_kv_heads, head_dim).transpose(1, 2)
    Vn = Vn.view(bsz, 1, n_kv_heads, head_dim).transpose(1, 2)
    if self.use_qk_norm:
        Qn = fast_layernorm_inference(self.q_norm, Qn, self.q_norm_out_weight)
        Kn = fast_layernorm_inference(self.k_norm, Kn, self.k_norm_out_weight)

    # cos, sin = self.rotary_emb(Vn, seq_len = kv_seq_len)
    # Qn, Kn = inplace_rope_embedding(Qn, Kn, cos, sin, position_ids)
    cos, sin = self.rotary_emb.get_cached(kv_seq_len, Qn.device.index)
    cos = cos[position_ids].unsqueeze(1)
    sin = sin[position_ids].unsqueeze(1)
    h = self.half_head_dim

    RH_Q = self.RH_Q
    RH_Q[:, :, :, :h] = Qn[:, :, :, h:]
    RH_Q[:, :, :, h:] = Qn[:, :, :, :h]
    RH_Q[:, :, :, :h].neg_()
    Qn *= cos
    Qn.addcmul_(RH_Q, sin)

    RH_K = RH_Q[
        :, :n_kv_heads, :, :
    ]  # torch.empty((n_kv_heads, 1, head_dim), dtype = dtype, device = "cuda:0")
    RH_K[:, :, :, :h] = Kn[:, :, :, h:]
    RH_K[:, :, :, h:] = Kn[:, :, :, :h]
    RH_K[:, :, :, :h].neg_()
    Kn *= cos
    Kn.addcmul_(RH_K, sin)

    # New KV cache
    # Kn = torch.cat([K1, Kn], dim = 2)
    # Vn = torch.cat([V1, Vn], dim = 2)
    self.paged_attention_K[seq_len] = Kn.permute(2, 0, 1, 3)
    self.paged_attention_V[seq_len] = Vn.permute(2, 0, 1, 3)
    Kn = self.paged_attention_K[:kv_seq_len].permute(1, 2, 0, 3)
    Vn = self.paged_attention_V[:kv_seq_len].permute(1, 2, 0, 3)

    # Handle sliding windows
    sliding_window = getattr(self.config, "sliding_window", None)
    if sliding_window is not None and kv_seq_len > sliding_window:
        start = kv_seq_len - sliding_window
        Knn = Kn[:, :, start:, :]  # .contiguous()
        Vnn = Vn[:, :, start:, :]  # .contiguous()
        if attention_mask is not None:
            attention_mask = attention_mask[..., start:]
    else:
        Knn, Vnn = Kn, Vn

    # Grouped query attention
    _, _, cached_len, _ = Knn.shape
    if n_groups != 1:
        Knn = Knn[:, :, None, :, :].expand(
            bsz, n_kv_heads, n_groups, cached_len, head_dim
        )
        Vnn = Vnn[:, :, None, :, :].expand(
            bsz, n_kv_heads, n_groups, cached_len, head_dim
        )
        Knn = Knn.reshape(bsz, n_heads, cached_len, head_dim)
        Vnn = Vnn.reshape(bsz, n_heads, cached_len, head_dim)

    # Attention
    if bsz == 1:
        Qn *= self.scalar  # See https://github.com/ggerganov/llama.cpp/issues/7805#issuecomment-2153349963
        # It seems like doing (Q * scalar) @ K is better than (Q @ K) * scalar to stop overflows
        A = torch_matmul(
            Qn, Knn.transpose(2, 3), out = self.attention[:, :, :, :cached_len]
        )
        A[:] = torch_nn_functional_softmax(
            A, dim = -1, dtype = torch.float32
        )  # .to(A.dtype)
        A = torch_matmul(A, Vnn, out = Qn)
    else:
        A = scaled_dot_product_attention(
            Qn, Knn, Vnn, attn_mask = attention_mask, is_causal = False
        )
    A = A.transpose(1, 2)
    A = A.reshape(bsz, 1, attention_size)
    A = fast_linear_forward(self.o_proj, A, out = self.temp_O)
    return A, (Kn, Vn)


# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L825
# @torch.inference_mode
def CohereModel_fast_forward_inference(
    self,
    input_ids,
    past_key_values,
    position_ids,
    attention_mask = None,
):
    out_weights = tuple(
        torch.empty_like(
            self.model.layers[0].input_layernorm.weight,
            dtype = torch.float32,
            device = torch.device(x),
        )
        for x in range(DEVICE_COUNT)
    )
    input_ids = input_ids[:, : self.max_seq_length]
    hidden_states = self.model.embed_tokens(input_ids)
    hidden_states = hidden_states.to(_get_dtype(dtype_from_config(self.config)))
    bsz, q_len, hd = hidden_states.shape
    seq_len = past_key_values[0][0].shape[-2]
    if bsz != 1:
        attention_mask = _prepare_4d_causal_attention_mask_for_sdpa(
            attention_mask,
            (bsz, q_len),
            hidden_states,
            seq_len,
            sliding_window = getattr(self.config, "sliding_window", None),
        )
        # Pre-convert to bool once for all layers (avoids per-layer .eq(0))
        if attention_mask is not None and attention_mask.dtype != torch.bool:
            attention_mask = attention_mask.eq(0)
    else:
        attention_mask = None

    next_decoder_cache = []
    for idx, decoder_layer in enumerate(self.model.layers):
        device_index = getattr(decoder_layer, "_per_layer_device_index", 0)
        hidden_states, position_ids = move_to_device(
            device_index, hidden_states, position_ids
        )
        residual = hidden_states
        hidden_states = fast_layernorm_inference(
            decoder_layer.input_layernorm, hidden_states, out_weights[device_index]
        )
        hidden_states_attention, present_key_value = (
            CohereAttention_fast_forward_inference(
                decoder_layer.self_attn,
                hidden_states = hidden_states,
                past_key_value = past_key_values[idx],
                position_ids = position_ids,
                attention_mask = attention_mask,
                do_prefill = not hasattr(decoder_layer.self_attn, "paged_attention"),
            )
        )

        hidden_states_mlp = fast_swiglu_inference(decoder_layer.mlp, hidden_states)
        residual += hidden_states_attention
        residual += hidden_states_mlp
        hidden_states = residual

        next_decoder_cache.append(present_key_value)
    hidden_states = fast_layernorm_inference(
        self.model.norm, hidden_states, out_weights[device_index]
    )

    return BaseModelOutputWithPast(
        last_hidden_state = hidden_states,
        past_key_values = next_decoder_cache,
        hidden_states = [],
        attentions = [],
    )


class FastCohereModel(FastLlamaModel):
    @staticmethod
    def pre_patch():
        init_name, function = patch_linear_scaling(
            model_name = "cohere",
            rope_module = LlamaRotaryEmbedding,
            scaled_rope_module = LlamaLinearScalingRotaryEmbedding,
            attention_module = CohereAttention,
        )
        if init_name is not None:
            exec(function, globals())
            CohereAttention.__init__ = eval(init_name)
        CohereAttention.forward = CohereAttention_fast_forward
        CohereSdpaAttention.forward = CohereAttention_fast_forward
        CohereFlashAttention2.forward = CohereAttention_fast_forward
        CohereDecoderLayer.forward = CohereDecoderLayer_fast_forward
        CohereModel.forward = LlamaModel_fast_forward
        CohereForCausalLM.forward = CausalLM_fast_forward(
            CohereModel_fast_forward_inference
        )
        PeftModelForCausalLM.forward = PeftModel_fast_forward
        fix_prepare_inputs_for_generation(CohereForCausalLM)

        import transformers.models.cohere.modeling_cohere

        transformers.models.cohere.modeling_cohere.CohereRotaryEmbedding = (
            LlamaRotaryEmbedding
        )
        return


================================================
FILE: unsloth/models/dpo.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = [
    "PatchDPOTrainer",
    "PatchKTOTrainer",
]


def PatchDPOTrainer():
    return


def PatchKTOTrainer():
    return


================================================
FILE: unsloth/models/falcon_h1.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .llama import *
import os
from ._utils import __version__
from unsloth_zoo.utils import Version, _get_dtype
from unsloth_zoo.hf_utils import dtype_from_config
from ..utils.packing import get_packed_info_from_kwargs
from ..utils.attention_dispatch import (
    AttentionConfig,
    AttentionContext,
    run_attention,
    select_attention_backend,
    SDPA,
)
from .llama import (
    LlamaRotaryEmbedding,
    LlamaLinearScalingRotaryEmbedding,
    _LlamaModel_fast_forward_inference,
)

try:
    from transformers.models.falcon_h1.modeling_falcon_h1 import (
        FalconH1Attention,
        FalconH1DecoderLayer,
        FalconH1Model,
        FalconH1ForCausalLM,
        FalconHybridMambaAttentionDynamicCache,
    )
except:
    from transformers import __version__ as transformers_version

    transformers_version = Version(transformers_version)
    if not transformers_version >= Version(
        "4.53.0"
    ):  # TODO: Update when transformers is updated
        raise ImportError(
            f"Unsloth: Your transformers version of {transformers_version} does not support FalconH1.\n"
            f"The minimum required version is 4.53.0.\n"
            f'Try `pip install --upgrade "transformers>=4.53.0"`\n'
            f"to obtain the latest transformers build, then restart this session."
        )
from transformers.modeling_attn_mask_utils import (
    _prepare_4d_causal_attention_mask_for_sdpa,
)
from transformers.utils import (
    is_torchdynamo_compiling,
)

# For Pytorch 2.1.1
try:
    from transformers.models.falcon_h1.modeling_falcon_h1 import (
        FalconH1Attention,
    )
except ModuleNotFoundError:
    # if we are on an old version of transformers technically it should fail in the try except above
    # but if somehow we make it here, we need to raise an error since FalconH1Attention is not available
    # or renamed
    raise ImportError(
        "Unsloth: Could not import FalconH1Attention from transformers.models.falcon_h1.modeling_falcon_h1."
    )


def FalconH1Attention_fast_forward(
    self,
    hidden_states: torch.Tensor,
    causal_mask: Optional[BlockDiagonalCausalMask] = None,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    past_key_value: Optional[Tuple[torch.Tensor]] = None,
    output_attentions: bool = False,
    use_cache: bool = False,
    padding_mask: Optional[torch.LongTensor] = None,
    position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
    *args,
    **kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
    # Clear inference
    if hasattr(self, "paged_attention"):
        del self.paged_attention_K
        del self.paged_attention_V
        del self.paged_attention
        del self.temp_QA
        del self.temp_KV
        del self.RH_Q
        del self.attention

    bsz, q_len, _ = hidden_states.size()

    n_heads = self.config.num_attention_heads
    n_groups = self.num_key_value_groups
    n_kv_heads = self.config.num_key_value_heads
    head_dim = self.head_dim
    assert n_kv_heads * n_groups == n_heads

    Q, K, V = self.apply_qkv(self, hidden_states)
    Q = Q.view(bsz, q_len, n_heads, head_dim)
    K = K.view(bsz, q_len, n_kv_heads, head_dim)
    V = V.view(bsz, q_len, n_kv_heads, head_dim).transpose(1, 2)
    seq_info = get_packed_info_from_kwargs(kwargs, hidden_states.device)

    # Falcon H1 multiplies key states by a multiplier
    K = K * self.config.key_multiplier

    Q = Q.transpose(1, 2)
    K = K.transpose(1, 2)

    kv_seq_len = K.shape[-2]
    if past_key_value is not None:
        kv_seq_len += past_key_value[0].shape[-2]

    # Extend RoPE dynamically to fit in VRAM
    if position_embeddings and kv_seq_len <= position_embeddings[0].shape[0]:
        cos, sin = position_embeddings
    else:
        rotary_emb = self.rotary_emb
        rotary_emb.extend_rope_embedding(V, seq_len = kv_seq_len)
        cos, sin = rotary_emb.get_cached(kv_seq_len, Q.device.index)

    rope_position_ids = (
        position_ids if position_ids is not None else kwargs.get("position_ids")
    )
    # Useful for LongRoPE
    Q, K = fast_rope_embedding(Q, K, cos, sin, rope_position_ids)

    if past_key_value is not None:
        K = torch.cat([past_key_value[0], K], dim = 2)
        V = torch.cat([past_key_value[1], V], dim = 2)
    past_key_value = (K, V) if use_cache else None

    # Attention module
    window = (-1, -1)
    use_varlen = (
        attention_mask is None
        and seq_info is not None
        and past_key_value is None
        and window == (-1, -1)
    )

    backend = (
        SDPA if attention_mask is not None else select_attention_backend(use_varlen)
    )
    attention_config = AttentionConfig(
        backend = backend,
        n_kv_heads = n_kv_heads,
        n_groups = n_groups,
        flash_dense_kwargs = {
            "causal": True,
            "window_size": (kv_seq_len, kv_seq_len),
        },
        flash_varlen_kwargs = {
            "dropout_p": 0.0,
            "softmax_scale": None,
            "causal": True,
        },
        sdpa_kwargs = {} if attention_mask is None else {"attn_mask": attention_mask},
    )
    context = AttentionContext(
        bsz = bsz,
        q_len = q_len,
        kv_seq_len = kv_seq_len,
        n_heads = n_heads,
        head_dim = head_dim,
        requires_grad = hidden_states.requires_grad,
        seq_info = seq_info,
        attention_mask = attention_mask,
        causal_mask = causal_mask,
    )

    A = run_attention(config = attention_config, context = context, Q = Q, K = K, V = V)

    attn_output = A.reshape(bsz, q_len, n_heads * head_dim)
    attn_output = self.apply_o(self, attn_output)
    attn_weights = None
    return attn_output, attn_weights, past_key_value


torch_matmul = torch.matmul


def FalconH1Attention_fast_forward_inference(
    self,
    hidden_states: torch.Tensor,
    past_key_value: Optional[Tuple[torch.Tensor]],
    position_ids,
    do_prefill = False,
    attention_mask = None,
    **kwargs,
):
    """
    https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L406
    Fast inference using KV cache.
    QK^T can be computed in 4 chunks

    [Q, q] @ [K, k].T where q, k are the new tokens.
    [QK^T, Qk^T]
    [qK^T, qk^T]

    Since the attention mask wipes Qk^T, we just get
    [QK^T,    0]
    [qK^T, qk^T]

    Since softmax is row-wise, we get
    softmax([QK^T,    0])
    softmax([qK^T, qk^T])

    We then multiply by   [V]
                          [v]
    softmax([QK^T,    0]) [softmax(QK^T)V] *
    softmax([qK^T, qk^T]) [softmax([qK^T, qk^T]) @ [V, v]]

    But notice * [softmax(QK^T)V] is just the last attention.
    We just need to compute the last final row.

    This means we can pass in a row of Q, but we need to
    remember K and V, which are called the KV cache.
    """
    Xn = hidden_states
    bsz, _, hd = hidden_states.size()
    K1, V1 = past_key_value
    dtype = Xn.dtype

    n_heads = self.config.num_attention_heads
    n_groups = self.num_key_value_groups
    n_kv_heads = self.config.num_key_value_heads
    head_dim = self.head_dim
    # assert(n_kv_heads * n_groups == n_heads)

    hidden_size = self.config.hidden_size
    attention_size = n_heads * head_dim
    seq_len = K1.shape[-2]
    kv_seq_len = seq_len + 1

    # Prefill phase
    # if not hasattr(self, "paged_attention"):
    device = hidden_states.device
    if do_prefill:
        self.paged_attention = torch.empty(
            (KV_CACHE_INCREMENT + seq_len + 1, 2, bsz, n_kv_heads, head_dim),
            dtype = dtype,
            device = device,
        )
        self.paged_attention_K = self.paged_attention[:, 0]
        self.paged_attention_V = self.paged_attention[:, 1]
        self.paged_attention_K[:seq_len] = K1.permute(2, 0, 1, 3)
        self.paged_attention_V[:seq_len] = V1.permute(2, 0, 1, 3)
        self.temp_QA = torch.empty(
            (2, bsz, 1, attention_size), dtype = dtype, device = device
        )
        self.temp_KV = torch.empty(
            (2, bsz, 1, n_kv_heads * head_dim), dtype = dtype, device = device
        )
        self.RH_Q = torch.empty((bsz, n_heads, 1, head_dim), dtype = dtype, device = device)

        # Mistral Nemo 12b has weird dimensions
        if attention_size != hidden_size:
            self.temp_O = torch.empty((bsz, 1, hidden_size), dtype = dtype, device = device)
        else:
            self.temp_O = self.temp_QA[1][:, :, :hidden_size]

        self.attention = torch.empty(
            (bsz, n_heads, 1, KV_CACHE_INCREMENT + seq_len), dtype = dtype, device = device
        )
        self.scalar = 1.0 / math_sqrt(self.head_dim)
        self.half_head_dim = head_dim // 2
    elif kv_seq_len >= self.paged_attention.shape[0]:
        self.paged_attention.resize_(
            (
                self.paged_attention.shape[0] + KV_CACHE_INCREMENT,
                2,
                bsz,
                n_kv_heads,
                head_dim,
            )
        )
        self.paged_attention_K = self.paged_attention[:, 0]
        self.paged_attention_V = self.paged_attention[:, 1]
        self.attention.resize_(
            (bsz, n_heads, 1, self.attention.shape[-1] + KV_CACHE_INCREMENT)
        )

    Qn = fast_linear_forward(self.q_proj, Xn, out = self.temp_QA[0])
    Kn = fast_linear_forward(self.k_proj, Xn, out = self.temp_KV[0])
    Kn.mul_(self.config.key_multiplier)
    Vn = fast_linear_forward(self.v_proj, Xn, out = self.temp_KV[1])
    Qn = Qn.view(
        bsz, 1, n_heads, head_dim
    )  # .transpose(1, 2) # we will transpose after normalisation
    Kn = Kn.view(
        bsz, 1, n_kv_heads, head_dim
    )  # .transpose(1, 2) # we will transpose after normalisation
    Vn = Vn.view(bsz, 1, n_kv_heads, head_dim).transpose(1, 2)

    Qn = Qn.transpose(1, 2)
    Kn = Kn.transpose(1, 2)

    # cos, sin = self.rotary_emb(Vn, seq_len = kv_seq_len)
    # Qn, Kn = inplace_rope_embedding(Qn, Kn, cos, sin, position_ids)

    # Need to do it prior 2 steps before hitting full on short KV cache
    # or else error
    self.rotary_emb.extend_rope_embedding(Vn, seq_len + 2)
    cos, sin = self.rotary_emb.get_cached(kv_seq_len, Qn.device.index)
    cos = cos[position_ids].unsqueeze(1)
    sin = sin[position_ids].unsqueeze(1)
    h = self.half_head_dim

    RH_Q = self.RH_Q
    RH_Q[:, :, :, :h] = Qn[:, :, :, h:]
    RH_Q[:, :, :, h:] = Qn[:, :, :, :h]
    RH_Q[:, :, :, :h].neg_()  # torch.neg(RH_Q[:,:,:,:h], out = RH_Q[:,:,:,:h])
    Qn *= cos
    Qn.addcmul_(RH_Q, sin)

    RH_K = RH_Q[
        :, :n_kv_heads, :, :
    ]  # torch.empty((n_kv_heads, 1, head_dim), dtype = dtype, device = "cuda:0")
    RH_K[:, :, :, :h] = Kn[:, :, :, h:]
    RH_K[:, :, :, h:] = Kn[:, :, :, :h]
    RH_K[:, :, :, :h].neg_()  # torch.neg(RH_K[:,:,:,:h], out = RH_K[:,:,:,:h])
    Kn *= cos
    Kn.addcmul_(RH_K, sin)

    # New KV cache
    # Kn = torch.cat([K1, Kn], dim = 2)
    # Vn = torch.cat([V1, Vn], dim = 2)
    self.paged_attention_K[seq_len] = Kn.permute(2, 0, 1, 3)
    self.paged_attention_V[seq_len] = Vn.permute(2, 0, 1, 3)
    Kn = self.paged_attention_K[:kv_seq_len].permute(1, 2, 0, 3)
    Vn = self.paged_attention_V[:kv_seq_len].permute(1, 2, 0, 3)

    # Handle sliding windows
    sliding_window = getattr(self.config, "sliding_window", None)
    if sliding_window is not None and kv_seq_len > sliding_window:
        start = kv_seq_len - sliding_window
        Knn = Kn[:, :, start:, :]  # .contiguous()
        Vnn = Vn[:, :, start:, :]  # .contiguous()
        if attention_mask is not None:
            attention_mask = attention_mask[..., start:]
    else:
        Knn, Vnn = Kn, Vn

    # Grouped query attention
    _, _, cached_len, _ = Knn.shape
    if bsz == 1 or not SDPA_HAS_GQA and n_groups != 1:
        Knn = Knn[:, :, None, :, :].expand(
            bsz, n_kv_heads, n_groups, cached_len, head_dim
        )
        Vnn = Vnn[:, :, None, :, :].expand(
            bsz, n_kv_heads, n_groups, cached_len, head_dim
        )
        Knn = Knn.reshape(bsz, n_heads, cached_len, head_dim)
        Vnn = Vnn.reshape(bsz, n_heads, cached_len, head_dim)

    # Attention
    if bsz == 1:
        Qn *= self.scalar  # See https://github.com/ggerganov/llama.cpp/issues/7805#issuecomment-2153349963
        # It seems like doing (Q * scalar) @ K is better than (Q @ K) * scalar to stop overflows
        A = torch_matmul(
            Qn, Knn.transpose(2, 3), out = self.attention[:, :, :, :cached_len]
        )
        A[:] = torch_nn_functional_softmax(
            A, dim = -1, dtype = torch.float32
        )  # .to(A.dtype)
        A = torch_matmul(A, Vnn, out = Qn)
    else:
        if SDPA_HAS_GQA:
            A = scaled_dot_product_attention(
                Qn, Knn, Vnn, attn_mask = attention_mask, is_causal = False, enable_gqa = True
            )
        else:
            A = scaled_dot_product_attention(
                Qn, Knn, Vnn, attn_mask = attention_mask, is_causal = False
            )
    A = A.transpose(1, 2)
    A = A.reshape(bsz, 1, attention_size)
    A = fast_linear_forward(self.o_proj, A, out = self.temp_O)
    return A, (Kn, Vn)


# https://github.com/huggingface/transformers/blob/main/src/transformers/models/falcon_h1/modeling_falcon_h1.py
def FalconH1DecoderLayer_fast_forward(
    self,
    hidden_states: torch.Tensor,
    causal_mask = None,
    attention_mask: Optional[torch.Tensor] = None,
    mamba_attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    cache_position: Optional[torch.LongTensor] = None,
    past_key_value: Optional[Tuple[torch.Tensor]] = None,
    output_attentions: Optional[bool] = False,
    use_cache: Optional[bool] = False,
    padding_mask: Optional[torch.LongTensor] = None,
    position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
    *args,
    **kwargs,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
    """
    Args:
        hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
        attention_mask (`torch.FloatTensor`, *optional*): attention mask of size
            `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under
            returned tensors for more detail.
        use_cache (`bool`, *optional*):
            If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
            (see `past_key_values`).
        past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
    """
    if use_cache and hasattr(self, "_flag_for_generation"):
        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference(
            self.input_layernorm, hidden_states
        )
        attention_hidden_states, self_attn_weights, present_key_value = self.self_attn(
            hidden_states = hidden_states,
            causal_mask = causal_mask,
            attention_mask = attention_mask,
            position_ids = position_ids,
            past_key_value = past_key_value,
            output_attentions = output_attentions,
            use_cache = use_cache,
            padding_mask = padding_mask,
            position_embeddings = position_embeddings,
            **kwargs,
        )
        attention_hidden_states = attention_hidden_states * self.attn_out_multiplier

        mamba_hidden_states = self.mamba(
            hidden_states = hidden_states,
            cache_params = past_key_value,
            cache_position = cache_position,
            attention_mask = mamba_attention_mask,
        )
        mamba_hidden_states = mamba_hidden_states * self.ssm_out_multiplier

        hidden_states = mamba_hidden_states + attention_hidden_states

        hidden_states += residual

        # Fully Connected
        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference(
            self.pre_ff_layernorm, hidden_states
        )
        hidden_states = fast_swiglu_inference(self.feed_forward, hidden_states)
        hidden_states += residual
    else:
        residual = hidden_states
        hidden_states = fast_rms_layernorm(self.input_layernorm, hidden_states)

        mamba_hidden_states = self.mamba(
            hidden_states = hidden_states,
            cache_params = past_key_value,
            cache_position = cache_position,
            attention_mask = mamba_attention_mask,
        )
        mamba_hidden_states = mamba_hidden_states * self.ssm_out_multiplier

        attention_hidden_states, self_attn_weights, present_key_value = self.self_attn(
            hidden_states = hidden_states,
            causal_mask = causal_mask,
            attention_mask = attention_mask,
            position_ids = position_ids,
            past_key_value = past_key_value,
            output_attentions = output_attentions,
            use_cache = use_cache,
            padding_mask = padding_mask,
            position_embeddings = position_embeddings,
            **kwargs,
        )
        attention_hidden_states = attention_hidden_states * self.attn_out_multiplier

        hidden_states = mamba_hidden_states + attention_hidden_states

        # residual connection after attention + Mamba
        hidden_states = residual + hidden_states

        # Fully Connected
        residual = hidden_states
        hidden_states = fast_rms_layernorm(self.pre_ff_layernorm, hidden_states)
        hidden_states = self.feed_forward(hidden_states)
        hidden_states = residual + hidden_states

    outputs = (hidden_states,)
    if output_attentions:
        outputs += (self_attn_weights,)
    if use_cache:
        outputs += (present_key_value,)
    return outputs


def _FalconH1_fast_forward_inference(
    attention_fast_forward_inference = FalconH1Attention_fast_forward_inference,
    mlp_fast_forward_inference = fast_swiglu_inference,
):
    # This makes the attention and MLP customisable.
    # Now for models like qwen3 or cohere which use custom attention operations, we can use this function
    def FalconH1Model_fast_forward_inference_custom(
        self,
        input_ids,
        past_key_values,
        position_ids,
        cache_position = None,
        attention_mask = None,
        mamba_attention_mask = None,
    ):
        input_ids = input_ids[:, : self.max_seq_length]
        bsz, q_len = input_ids.shape
        hd = self.config.hidden_size
        mlp_size = self.config.intermediate_size
        gate_multiplier, down_multiplier = self.config.mlp_multipliers

        X = self.model.embed_tokens(input_ids)
        X = X * self.config.embedding_multiplier

        X = X.to(_get_dtype(dtype_from_config(self.config)))
        bsz, q_len, hd = X.shape
        assert q_len == 1
        # Get saved buffers to reduce memory movement
        residual = torch.empty(
            (bsz, q_len, hd), dtype = torch.float32, device = f"{DEVICE_TYPE_TORCH}:0"
        )
        _XX = torch.empty(
            (2, bsz, q_len, hd), dtype = torch.float32, device = f"{DEVICE_TYPE_TORCH}:0"
        )
        XX, XX2 = _XX[0], _XX[1]
        variance = torch.empty(
            (bsz, q_len, 1), dtype = torch.float32, device = f"{DEVICE_TYPE_TORCH}:0"
        )
        temp_mlp = torch.empty(
            (2, bsz, 1, mlp_size), dtype = X.dtype, device = f"{DEVICE_TYPE_TORCH}:0"
        )
        temp_gate, temp_up = temp_mlp[0], temp_mlp[1]
        seq_len = past_key_values[0][0].shape[-2]
        if bsz != 1:
            attention_mask = _prepare_4d_causal_attention_mask_for_sdpa(
                attention_mask,
                (bsz, q_len),
                X,
                seq_len,
                sliding_window = getattr(self.config, "sliding_window", None),
            )
        else:
            attention_mask = None

        next_decoder_cache = []

        for idx, decoder_layer in enumerate(self.model.layers):
            residual.copy_(X)  # residual = X
            X = fast_rms_layernorm_inference(
                decoder_layer.input_layernorm,
                X,
                XX = XX,
                XX2 = XX2,
                variance = variance,
            )
            attention_hidden_states, present_key_value = (
                attention_fast_forward_inference(
                    decoder_layer.self_attn,
                    hidden_states = X * decoder_layer.attention_in_multiplier,
                    past_key_value = past_key_values[idx],
                    position_ids = position_ids,
                    attention_mask = attention_mask,
                    do_prefill = not hasattr(decoder_layer.self_attn, "paged_attention"),
                )
            )
            attention_hidden_states = (
                attention_hidden_states * decoder_layer.attn_out_multiplier
            )
            mamba_hidden_states = decoder_layer.mamba(
                hidden_states = X,
                cache_params = present_key_value,
                cache_position = cache_position,
                attention_mask = mamba_attention_mask,
            )
            mamba_hidden_states = mamba_hidden_states * decoder_layer.ssm_out_multiplier
            X = mamba_hidden_states + attention_hidden_states

            X += residual

            residual.copy_(X)  # residual = X
            X = fast_rms_layernorm_inference(
                decoder_layer.pre_ff_layernorm,
                X,
                XX = XX,
                XX2 = XX2,
                variance = variance,
            )
            X = mlp_fast_forward_inference(
                decoder_layer.feed_forward,
                X,
                temp_gate = temp_gate,
                temp_up = temp_up,
                gate_multiplier = gate_multiplier,
                down_multiplier = down_multiplier,
            )
            X += residual

            next_decoder_cache.append(present_key_value)
        X = fast_rms_layernorm_inference(
            self.model.final_layernorm,
            X,
            XX = XX,
            XX2 = XX2,
            variance = variance,
        )

        return BaseModelOutputWithPast(
            last_hidden_state = X,
            past_key_values = next_decoder_cache,
            hidden_states = [],
            attentions = [],
        )

    return FalconH1Model_fast_forward_inference_custom


# Separate prepare_inputs_for_generation for Hybrid FalconH1
def _fast_prepare_inputs_for_generation(
    self,
    input_ids,
    past_key_values = None,
    attention_mask = None,
    inputs_embeds = None,
    cache_position = None,
    position_ids = None,
    use_cache = True,
    **kwargs,
):
    # Overwritten -- has a unique cache type, `FalconHybridMambaAttentionDynamicCache`
    empty_past_kv = past_key_values is None

    # If we have cache: let's slice `input_ids` through `cache_position`, to keep only the unprocessed tokens
    # Exception 1: when passing input_embeds, input_ids may be missing entries
    # Exception 2: some generation methods do special slicing of input_ids, so we don't need to do it here
    # Exception 3: with synced GPUs cache_position may go out of bounds, but we only want dummy token in that case.
    #              (we can't check exception 3 while compiling)
    if not empty_past_kv:
        if (
            inputs_embeds is not None  # Exception 1
            or (
                is_torchdynamo_compiling() or cache_position[-1] >= input_ids.shape[1]
            )  # Exception 3
        ):
            input_ids = input_ids[:, -cache_position.shape[0] :]
        elif (
            input_ids.shape[1] != cache_position.shape[0]
        ):  # Default case (the "else", a no op, is Exception 2)
            input_ids = input_ids[:, cache_position]
    # TODO: Wire up Cache to work for inference.
    # else:
    #     past_key_values = FalconHybridMambaAttentionDynamicCache(
    #         self.config,
    #         input_ids.shape[0],
    #         self.dtype,
    #         devices=[
    #             self.model.layers[i].mamba.conv1d.weight.device for i in range(self.config.num_hidden_layers)
    #         ],
    #     )

    if attention_mask is not None and position_ids is None:
        # create position_ids on the fly for batch generation
        position_ids = attention_mask.long().cumsum(-1) - 1
        position_ids.masked_fill_(attention_mask == 0, 1)
        if not empty_past_kv:
            position_ids = position_ids[:, -input_ids.shape[1] :]

    # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
    if inputs_embeds is not None and empty_past_kv:
        model_inputs = {"inputs_embeds": inputs_embeds}
    else:
        model_inputs = {
            "input_ids": input_ids.contiguous()
        }  # `contiguous()` needed for compilation use cases

    model_inputs.update(
        {
            "position_ids": position_ids,
            "past_key_values": past_key_values,
            "use_cache": use_cache,
            "attention_mask": attention_mask,
            "logits_to_keep": self.config.num_logits_to_keep,
            "cache_position": cache_position,
        }
    )
    return model_inputs


def fix_prepare_inputs_for_generation(module):
    # Fix prepare_inputs_for_generation
    if hasattr(module, "prepare_inputs_for_generation"):
        module.prepare_inputs_for_generation = _fast_prepare_inputs_for_generation


class FastFalconH1Model(FastLlamaModel):
    @staticmethod
    def pre_patch():
        init_name, function = patch_linear_scaling(
            model_name = "FalconH1",
            rope_module = LlamaRotaryEmbedding,
            scaled_rope_module = LlamaLinearScalingRotaryEmbedding,
            attention_module = FalconH1Attention,
        )
        if init_name is not None:
            exec(function, globals())
            FalconH1Attention.__init__ = eval(init_name)
        FalconH1Attention.forward = FalconH1Attention_fast_forward
        FalconH1DecoderLayer.forward = FalconH1DecoderLayer_fast_forward
        FalconH1Model.forward = LlamaModel_fast_forward
        FalconH1ForCausalLM.forward = CausalLM_fast_forward(
            _FalconH1_fast_forward_inference(FalconH1Attention_fast_forward_inference)
        )
        PeftModelForCausalLM.forward = PeftModel_fast_forward
        fix_prepare_inputs_for_generation(FalconH1ForCausalLM)

        # Solves https://github.com/unslothai/unsloth/issues/168
        # Static KV Cache was introduced in 4.38.0, causing training to be much slower.
        # Inference can now be CUDAGraphed, but we shall retain the old rotary embeddings.
        # https://github.com/huggingface/transformers/pull/27931
        # https://github.com/huggingface/transformers/blob/v4.37.2/src/transformers/models/llama/modeling_llama.py
        import transformers.models.falcon_h1.modeling_falcon_h1

        transformers.models.falcon_h1.modeling_falcon_h1.FalconH1RotaryEmbedding = (
            LlamaRotaryEmbedding
        )
        return

    @staticmethod
    def from_pretrained(  # TODO: Change after release
        model_name = "Qwen/FalconH1-7B",
        max_seq_length = 4096,
        dtype = None,
        load_in_4bit = True,
        token = None,
        device_map = "sequential",
        rope_scaling = None,
        fix_tokenizer = True,
        model_patcher = None,
        tokenizer_name = None,
        trust_remote_code = False,
        **kwargs,
    ):
        return FastLlamaModel.from_pretrained(
            model_name = model_name,
            max_seq_length = max_seq_length,
            dtype = dtype,
            load_in_4bit = load_in_4bit,
            token = token,
            device_map = device_map,
            rope_scaling = rope_scaling,
            fix_tokenizer = fix_tokenizer,
            model_patcher = FastFalconH1Model,
            tokenizer_name = tokenizer_name,
            trust_remote_code = trust_remote_code,
            **kwargs,
        )


================================================
FILE: unsloth/models/gemma.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .llama import *
from .llama import _get_rope_theta
from ._utils import __version__
from unsloth_zoo.utils import _get_dtype, Version
from unsloth_zoo.hf_utils import dtype_from_config
from ..utils.packing import (
    build_sdpa_packed_attention_mask,
    build_xformers_block_causal_mask,
    get_packed_info_from_kwargs,
)
import math

try:
    from transformers.models.gemma.modeling_gemma import (
        GemmaAttention,
        GemmaDecoderLayer,
        GemmaModel,
        GemmaForCausalLM,
        GemmaRotaryEmbedding,
        apply_rotary_pos_emb,
        repeat_kv,
    )
except:
    transformers_version = Version(transformers_version)
    if not transformers_version >= Version("4.38"):
        raise ImportError(
            f"Unsloth: Your transformers version of {transformers_version} does not support Gemma.\n"
            f"The minimum required version is 4.38.\n"
            f'Try `pip install --upgrade "transformers>=4.38"`\n'
            f"to obtain the latest transformers build, then restart this session."
        )

from transformers.modeling_attn_mask_utils import (
    _prepare_4d_causal_attention_mask_for_sdpa,
)

# For Pytorch 2.1.1
try:
    from transformers.models.gemma.modeling_gemma import (
        GemmaSdpaAttention,
        GemmaFlashAttention2,
    )
except:
    GemmaSdpaAttention = GemmaAttention
    GemmaFlashAttention2 = GemmaAttention


torch_nn_functional_gelu = torch.nn.functional.gelu


def fast_geglu_inference(self, X):
    # gate = self.gate_proj(X)
    # up   = self.up_proj(X)
    bsz, _, hd = X.shape
    # mlp_size = self.config.intermediate_size
    # temp = torch.empty((2, bsz, 1, mlp_size), dtype = X.dtype, device = "cuda:0")

    gate = fast_linear_forward(self.gate_proj, X)  # , out = temp[0])
    up = fast_linear_forward(self.up_proj, X)  # , out = temp[1])
    gate = torch_nn_functional_gelu(gate, approximate = "tanh")
    gate *= up

    # X = self.down_proj(gate)
    down = fast_linear_forward(self.down_proj, gate, out = up[:, :, :hd])
    return down


# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L590
def GemmaDecoderLayer_fast_forward(
    self,
    hidden_states: torch.Tensor,
    causal_mask: Optional[BlockDiagonalCausalMask] = None,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    past_key_value: Optional[Tuple[torch.Tensor]] = None,
    output_attentions: Optional[bool] = False,
    use_cache: Optional[bool] = False,
    padding_mask: Optional[torch.LongTensor] = None,
    *args,
    **kwargs,
):
    if use_cache and hasattr(
        self, "_flag_for_generation"
    ):  # past_key_value is not None:
        out_weight = torch.empty(
            self.input_layernorm.weight.shape,
            dtype = torch.float32,
            device = f"{DEVICE_TYPE_TORCH}:0",
        )

        # Self Attention
        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference_gemma(
            self.input_layernorm, hidden_states, out_weight
        )
        hidden_states, self_attn_weights, present_key_value = self.self_attn(
            hidden_states = hidden_states,
            causal_mask = causal_mask,
            attention_mask = attention_mask,
            position_ids = position_ids,
            past_key_value = past_key_value,
            output_attentions = output_attentions,
            use_cache = use_cache,
            padding_mask = padding_mask,
            **kwargs,
        )
        hidden_states += residual

        # Fully Connected
        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference_gemma(
            self.post_attention_layernorm, hidden_states, out_weight
        )
        hidden_states = fast_geglu_inference(self.mlp, hidden_states)
        hidden_states += residual
    else:
        residual = hidden_states
        hidden_states = fast_rms_layernorm(
            self.input_layernorm, hidden_states, gemma = True
        )
        hidden_states, self_attn_weights, present_key_value = self.self_attn(
            hidden_states = hidden_states,
            causal_mask = causal_mask,
            attention_mask = attention_mask,
            position_ids = position_ids,
            past_key_value = past_key_value,
            output_attentions = output_attentions,
            use_cache = use_cache,
            padding_mask = padding_mask,
            **kwargs,
        )
        hidden_states = residual + hidden_states

        # Fully Connected
        residual = hidden_states
        hidden_states = fast_rms_layernorm(
            self.post_attention_layernorm, hidden_states, gemma = True
        )
        hidden_states = self.mlp(hidden_states)
        hidden_states = residual + hidden_states

    outputs = (hidden_states,)
    if output_attentions:
        outputs += (self_attn_weights,)
    if use_cache:
        outputs += (present_key_value,)
    return outputs


from math import sqrt as math_sqrt


# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L825
# @torch.inference_mode
def GemmaModel_fast_forward_inference(
    self,
    input_ids,
    past_key_values,
    position_ids,
    attention_mask = None,
    **kwargs,
):
    out_weights = tuple(
        torch.empty_like(
            self.model.layers[0].input_layernorm.weight,
            dtype = torch.float32,
            device = torch.device(x),
        )
        for x in range(DEVICE_COUNT)
    )
    input_ids = input_ids[:, : self.max_seq_length]
    hidden_states = self.model.embed_tokens(input_ids)
    hidden_states = hidden_states.to(_get_dtype(dtype_from_config(self.config)))
    # 3072**0.5 = 55.5000 in bfloat16, whilst 55.4256 in float32
    # 2048**0.5 = 45.2500 in bfloat16, whilst 45.2548 in float32
    hidden_states *= torch.tensor(
        math_sqrt(self.config.hidden_size), dtype = hidden_states.dtype
    )

    bsz, q_len, hd = hidden_states.shape
    seq_len = past_key_values[0][0].shape[-2]
    kv_seq_len = seq_len + 1
    if bsz != 1:
        attention_mask = _prepare_4d_causal_attention_mask_for_sdpa(
            attention_mask,
            (bsz, q_len),
            hidden_states,
            seq_len,
        )
        # Pre-convert to bool once for all layers (avoids per-layer .eq(0))
        if attention_mask is not None and attention_mask.dtype != torch.bool:
            attention_mask = attention_mask.eq(0)

    # Compute rotary_seq_len once to avoid per-layer GPU-CPU sync from .item()
    rotary_seq_len = max(kv_seq_len, int(position_ids.max().item()) + 1)

    next_decoder_cache = []
    for idx, decoder_layer in enumerate(self.model.layers):
        device_index = getattr(decoder_layer, "_per_layer_device_index", 0)
        hidden_states, position_ids = move_to_device(
            device_index, hidden_states, position_ids
        )

        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference_gemma(
            decoder_layer.input_layernorm, hidden_states, out_weights[device_index]
        )
        hidden_states, present_key_value = LlamaAttention_fast_forward_inference(
            decoder_layer.self_attn,
            hidden_states = hidden_states,
            past_key_value = past_key_values[idx],
            position_ids = position_ids,
            attention_mask = attention_mask,
            do_prefill = not hasattr(decoder_layer.self_attn, "paged_attention"),
            rotary_seq_len = rotary_seq_len,
        )
        hidden_states += residual

        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference_gemma(
            decoder_layer.post_attention_layernorm,
            hidden_states,
            out_weights[device_index],
        )
        hidden_states = fast_geglu_inference(decoder_layer.mlp, hidden_states)
        hidden_states += residual

        next_decoder_cache.append(present_key_value)
    hidden_states = fast_rms_layernorm_inference_gemma(
        self.model.norm, hidden_states, out_weights[device_index]
    )

    return BaseModelOutputWithPast(
        last_hidden_state = hidden_states,
        past_key_values = next_decoder_cache,
        hidden_states = [],
        attentions = [],
    )


# Follows line by line https://github.com/google-deepmind/gemma/blob/main/gemma/positional_embeddings.py#L45
# Formulates cos and sin differently from Llama!
class GemmaFixedRotaryEmbedding(torch.nn.Module):
    # Fixes https://github.com/huggingface/transformers/pull/28837
    # https://github.com/microsoft/DeepSpeed/issues/4932
    # The precision of RoPE buffers is not correct, so we cast to int64.
    def __init__(
        self,
        dim = None,
        max_position_embeddings = 2048,
        base = 10000,
        device = None,
        config = None,  # [TODO] Hack to pass in config - need to remove later
    ):
        super().__init__()
        # In transformers 5.0+, RotaryEmbedding(config) passes config as first positional arg (dim)
        if (
            config is None
            and dim is not None
            and hasattr(dim, "max_position_embeddings")
        ):
            config = dim
            dim = None
        if config is not None:
            # [TODO] Hack to pass in config - need to remove later
            base = _get_rope_theta(config, default = base)
            partial_rotary_factor = (
                config.partial_rotary_factor
                if hasattr(config, "partial_rotary_factor")
                else 1.0
            )
            dim = getattr(config, "head_dim", None)
            if dim is None:
                dim = int((config.hidden_size // config.num_attention_heads))
            device = "cuda"
            max_position_embeddings = config.max_position_embeddings
        self.dim = dim
        self.max_position_embeddings = max_position_embeddings
        self.base = base
        # Dynamic RoPE we first set it to a max of 4 * 8192 tokens then we iteratively grow this
        self.current_rope_size = min(4 * 8192, self.max_position_embeddings)
        self.multi_gpu_cos_cached = [None] * DEVICE_COUNT
        self.multi_gpu_sin_cached = [None] * DEVICE_COUNT

        # Build here to make `torch.jit.trace` work.
        for device in range(DEVICE_COUNT):
            self._set_cos_sin_cache(
                seq_len = self.current_rope_size,
                device = torch.device(device),
                dtype = torch.get_default_dtype(),
            )

        # dummy so that patch_utils doesn't fail for now
        self.cos_cached = torch.empty(
            1, device = torch.cuda.current_device(), dtype = torch.get_default_dtype()
        )
        self.sin_cached = torch.empty(
            1, device = torch.cuda.current_device(), dtype = torch.get_default_dtype()
        )

    def _set_cos_sin_cache(self, seq_len, device, dtype):
        # Note: on the original Llama codebase, these tensors are created on the target device (and not on CPU) and
        # in FP32. They are applied (multiplied) in FP32 as well.
        self.current_rope_size = seq_len

        # The difference is we do division explicitly instead of t * (1/x) ie we do t/x.
        freq_exponents = (2.0 / self.dim) * (
            torch.arange(self.dim // 2, dtype = torch.int64, device = "cpu").float()
        )
        timescale = self.base**freq_exponents
        positions = torch.arange(
            self.current_rope_size, device = "cpu", dtype = torch.int64
        ).float()
        radians_new = positions[..., None] / timescale[None, None, :]
        radians_new = radians_new.squeeze(0)

        emb = torch.cat((radians_new, radians_new), dim = -1)
        # We must do RoPE in float32!
        cos = emb.cos().to(device = device, non_blocking = True)  # , dtype = dtype)
        sin = emb.sin().to(device = device, non_blocking = True)  # , dtype = dtype)
        self.multi_gpu_cos_cached[device.index] = cos
        self.multi_gpu_sin_cached[device.index] = sin
        return cos, sin

    def forward(self, x, position_ids = None, seq_len = None):
        # x: [bs, num_attention_heads, seq_len, head_size]
        if seq_len is not None and seq_len > self.current_rope_size:
            self._set_cos_sin_cache(seq_len = seq_len, device = x.device, dtype = x.dtype)

        device_index = x.device.index

        return (
            self.multi_gpu_cos_cached[device_index][:seq_len],
            self.multi_gpu_sin_cached[device_index][:seq_len],
        )

    def get_cached(self, seq_len = None, device_index = None):
        if device_index is None:
            device_index = torch.cuda.current_device()
        return self.multi_gpu_cos_cached[device_index], self.multi_gpu_sin_cached[
            device_index
        ]

    def extend_rope_embedding(self, x, seq_len):
        if seq_len <= self.current_rope_size:
            return
        # Iteratively grow by increments of 8192
        self.current_rope_size = math.ceil(seq_len / 8192) * 8192
        for device in range(DEVICE_COUNT):
            self._set_cos_sin_cache(
                self.current_rope_size, device = torch.device(device), dtype = x.dtype
            )


class GemmaFixedLinearScalingRotaryEmbedding(GemmaFixedRotaryEmbedding):
    """LlamaRotaryEmbedding extended with linear scaling. Credits to the Reddit user /u/kaiokendev"""

    # Fixes https://github.com/huggingface/transformers/pull/28837
    # https://github.com/microsoft/DeepSpeed/issues/4932
    # The precision of RoPE buffers is not correct, so we cast to int64.
    def __init__(
        self,
        dim = None,
        max_position_embeddings = 2048,
        base = 10000,
        device = None,
        scaling_factor = 1.0,
        config = None,  # [TODO] Hack to pass in config - need to remove later
    ):
        self.scaling_factor = scaling_factor
        super().__init__(
            dim = dim,
            max_position_embeddings = max_position_embeddings,
            base = base,
            device = device,
            config = config,
        )

    def _set_cos_sin_cache(self, seq_len, device, dtype):
        # Note: on the original Llama codebase, these tensors are created on the target device (and not on CPU) and
        # in FP32. They are applied (multiplied) in FP32 as well.
        self.current_rope_size = seq_len

        # The difference is we do division explicitly instead of t * (1/x) ie we do t/x.
        freq_exponents = (2.0 / self.dim) * (
            torch.arange(self.dim // 2, dtype = torch.int64, device = "cpu").float()
        )
        timescale = self.base**freq_exponents
        positions = torch.arange(
            self.current_rope_size, device = "cpu", dtype = torch.int64
        ).float()
        positions = positions / self.scaling_factor
        radians_new = positions[..., None] / timescale[None, None, :]
        radians_new = radians_new.squeeze(0)

        emb = torch.cat((radians_new, radians_new), dim = -1)
        # We must do RoPE in float32!
        cos = emb.cos().to(device = device, non_blocking = True)  # , dtype = dtype)
        sin = emb.sin().to(device = device, non_blocking = True)  # , dtype = dtype)
        self.multi_gpu_cos_cached[device.index] = cos
        self.multi_gpu_sin_cached[device.index] = sin
        return cos, sin


class FastGemmaModel(FastLlamaModel):
    @staticmethod
    def pre_patch():
        init_name, function = patch_linear_scaling(
            model_name = "gemma",
            rope_module = GemmaFixedRotaryEmbedding,
            scaled_rope_module = GemmaFixedLinearScalingRotaryEmbedding,
            attention_module = GemmaAttention,
        )
        if init_name is not None:
            exec(function, globals())
            GemmaAttention.__init__ = eval(init_name)
        GemmaAttention.forward = LlamaAttention_fast_forward
        GemmaSdpaAttention.forward = LlamaAttention_fast_forward
        GemmaFlashAttention2.forward = LlamaAttention_fast_forward
        GemmaDecoderLayer.forward = GemmaDecoderLayer_fast_forward
        GemmaModel.forward = LlamaModel_fast_forward
        GemmaForCausalLM.forward = CausalLM_fast_forward(
            GemmaModel_fast_forward_inference
        )
        PeftModelForCausalLM.forward = PeftModel_fast_forward
        fix_prepare_inputs_for_generation(GemmaForCausalLM)

        # Solves https://github.com/unslothai/unsloth/issues/168
        # Static KV Cache was introduced in 4.38.0, causing training to be much slower.
        # Inference can now be CUDAGraphed, but we shall retain the old rotary embeddings.
        # https://github.com/huggingface/transformers/pull/27931
        # https://github.com/huggingface/transformers/blob/v4.37.2/src/transformers/models/llama/modeling_llama.py
        import transformers.models.gemma.modeling_gemma

        transformers.models.gemma.modeling_gemma.GemmaRotaryEmbedding = (
            GemmaFixedRotaryEmbedding
        )
        return

    @staticmethod
    def post_patch(model, tokenizer, correct_dtype = None):
        # Gemma does not downcast RoPE
        model, tokenizer = patch_model_and_tokenizer(
            model, tokenizer, downcast_rope = False, correct_dtype = correct_dtype
        )

        # Add 1 to weight
        # return output * (1 + self.weight)
        # https://github.com/huggingface/transformers/blob/main/src/transformers/models/gemma/modeling_gemma.py#L89
        from transformers.models.gemma.modeling_gemma import GemmaRMSNorm

        # Freeze all parameters except LoRA
        # We do this first since += 1 seems to not be liked by requires_grad = True
        for name, param in model.named_parameters():
            if ".lora_A." in name or ".lora_B." in name:
                param.requires_grad_(True)
            else:
                param.requires_grad_(False)

        # Patch RMS Layernorm
        for name, module in model.named_modules():
            if isinstance(module, GemmaRMSNorm):
                # Must be in float32
                # https://github.com/keras-team/keras-nlp/blob/v0.8.2/keras_nlp/models/gemma/rms_normalization.py#L36
                # module = module.to(torch.float32)
                # Leave + 1 to Triton kernel itself
                # module.weight += 1.0 # return output * (1 + self.weight)
                if not hasattr(module, "variance_epsilon"):
                    module.variance_epsilon = (
                        module.eps
                    )  # Gemma doesn't use variance_epsilon

        # Clear deleted GPU items
        import gc

        for _ in range(3):
            gc.collect()
            torch.cuda.empty_cache()
        return model, tokenizer


================================================
FILE: unsloth/models/gemma2.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .llama import *
from ._utils import __version__
from unsloth_zoo.utils import _get_dtype, Version
from unsloth_zoo.hf_utils import dtype_from_config
from ..utils.packing import get_packed_info_from_kwargs
from ..utils.attention_dispatch import (
    AttentionConfig,
    AttentionContext,
    run_attention,
    select_attention_backend,
    SDPA,
)
from .gemma import (
    GemmaFixedRotaryEmbedding,
    GemmaFixedLinearScalingRotaryEmbedding,
    fast_geglu_inference,
)

try:
    from transformers.models.gemma2.modeling_gemma2 import (
        Gemma2Attention,
        Gemma2DecoderLayer,
        Gemma2Model,
        Gemma2ForCausalLM,
        Gemma2RotaryEmbedding,
        apply_rotary_pos_emb,
        repeat_kv,
    )
except:
    transformers_version = Version(transformers_version)
    if not transformers_version >= Version("4.42"):
        raise ImportError(
            f"Unsloth: Your transformers version of {transformers_version} does not support Gemma2.\n"
            f"The minimum required version is 4.42.3.\n"
            f'Try `pip install --upgrade "transformers>=4.42.3"`\n'
            f"to obtain the latest transformers build, then restart this session."
        )

from transformers.modeling_attn_mask_utils import (
    _prepare_4d_causal_attention_mask_for_sdpa,
)

# For Pytorch 2.1.1
try:
    from transformers.models.gemma2.modeling_gemma2 import (
        Gemma2SdpaAttention,
        Gemma2FlashAttention2,
    )
except:
    Gemma2SdpaAttention = Gemma2Attention
    Gemma2FlashAttention2 = Gemma2Attention

if HAS_FLASH_ATTENTION_SOFTCAPPING:
    from flash_attn import flash_attn_func


# Logit softcapping
def Gemma2Attention_fast_forward(
    self,
    hidden_states: torch.Tensor,
    causal_mask: Optional[BlockDiagonalCausalMask] = None,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    past_key_value: Optional[Tuple[torch.Tensor]] = None,
    output_attentions: bool = False,
    use_cache: bool = False,
    padding_mask: Optional[torch.LongTensor] = None,
    *args,
    **kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
    # Clear inference
    if hasattr(self, "paged_attention"):
        del self.paged_attention_K
        del self.paged_attention_V
        del self.paged_attention
        del self.temp_QA
        del self.temp_KV
        del self.RH_Q
        del self.attention

    bsz, q_len, _ = hidden_states.size()

    n_heads = self.config.num_attention_heads
    n_groups = self.num_key_value_groups
    n_kv_heads = self.config.num_key_value_heads
    head_dim = self.head_dim
    assert n_kv_heads * n_groups == n_heads

    Q, K, V = self.apply_qkv(self, hidden_states)
    Q = Q.view(bsz, q_len, n_heads, head_dim).transpose(1, 2)
    K = K.view(bsz, q_len, n_kv_heads, head_dim).transpose(1, 2)
    V = V.view(bsz, q_len, n_kv_heads, head_dim).transpose(1, 2)
    seq_info = get_packed_info_from_kwargs(kwargs, Q.device)

    kv_seq_len = K.shape[-2]
    if past_key_value is not None:
        kv_seq_len += past_key_value[0].shape[-2]

    device_index = Q.device.index
    cos = self.rotary_emb.multi_gpu_cos_cached[device_index]
    sin = self.rotary_emb.multi_gpu_sin_cached[device_index]

    rope_position_ids = (
        position_ids if position_ids is not None else kwargs.get("position_ids")
    )
    if rope_position_ids is not None:
        # Useful for LongRoPE
        cos_var, sin_var = self.rotary_emb.get_cached(kv_seq_len, device_index)
        Q, K = fast_rope_embedding(Q, K, cos_var, sin_var, rope_position_ids)
    else:
        Q, K = fast_rope_embedding(Q, K, cos, sin)

    if past_key_value is not None:
        K = torch.cat([past_key_value[0], K], dim = 2)
        V = torch.cat([past_key_value[1], V], dim = 2)
    past_key_value = (K, V) if use_cache else None

    # Only enable if the attention_mask is True
    use_sliding_window = kwargs.get("use_sliding_window")
    has_sliding_window = (
        use_sliding_window
        if use_sliding_window is not None
        else isinstance(causal_mask, bool) and causal_mask is True
    )

    use_flash = HAS_FLASH_ATTENTION_SOFTCAPPING and attention_mask is None

    if use_flash:
        window = (-1, -1)
        sliding_window = getattr(self.config, "sliding_window", None)
        if has_sliding_window:
            sliding_window = (
                sliding_window if sliding_window is not None else kv_seq_len
            )
            window = (
                (-1, -1)
                if kv_seq_len <= sliding_window
                else (sliding_window, sliding_window)
            )

        if not hasattr(self, "_flash_attention_softmax_scale"):
            self._flash_attention_softmax_scale = 1.0 / (
                self.config.query_pre_attn_scalar**0.5
            )

        use_varlen = seq_info is not None and past_key_value is None

        attention_config = AttentionConfig(
            backend = select_attention_backend(use_varlen),
            n_kv_heads = n_kv_heads,
            n_groups = n_groups,
            flash_dense_kwargs = {
                "causal": True,
                "softcap": self.config.attn_logit_softcapping,
                "softmax_scale": self._flash_attention_softmax_scale,
                "window_size": window,
            },
            flash_varlen_kwargs = {
                "dropout_p": 0.0,
                "softmax_scale": self._flash_attention_softmax_scale,
                "causal": True,
                "softcap": self.config.attn_logit_softcapping,
                "window_size": window,
            },
        )

        context = AttentionContext(
            bsz = bsz,
            q_len = q_len,
            kv_seq_len = kv_seq_len,
            n_heads = n_heads,
            head_dim = head_dim,
            requires_grad = hidden_states.requires_grad,
            seq_info = seq_info,
            attention_mask = attention_mask,
            causal_mask = causal_mask,
            sliding_window = sliding_window,
        )

        A = run_attention(config = attention_config, context = context, Q = Q, K = K, V = V)
        A = A.reshape(bsz, q_len, n_heads * head_dim)
    else:
        fx = (
            slow_inference_attention_softcapping
            if "_flag_for_generation" in kwargs
            else slow_attention_softcapping
        )
        A = fx(Q, K, V, causal_mask, self, bsz, kv_seq_len)
    A = self.apply_o(self, A)
    return A, None, past_key_value


# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L590
def Gemma2DecoderLayer_fast_forward(
    self,
    hidden_states: torch.Tensor,
    causal_mask: Optional[BlockDiagonalCausalMask] = None,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    past_key_value: Optional[Tuple[torch.Tensor]] = None,
    output_attentions: Optional[bool] = False,
    use_cache: Optional[bool] = False,
    padding_mask: Optional[torch.LongTensor] = None,
    *args,
    **kwargs,
):
    if use_cache and hasattr(
        self, "_flag_for_generation"
    ):  # past_key_value is not None:
        out_weight = torch.empty(
            self.input_layernorm.weight.shape,
            dtype = torch.float32,
            device = f"{DEVICE_TYPE_TORCH}:0",
        )

        # Self Attention
        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference_gemma(
            self.input_layernorm, hidden_states, out_weight
        )
        hidden_states, self_attn_weights, present_key_value = self.self_attn(
            hidden_states = hidden_states,
            causal_mask = causal_mask,
            attention_mask = attention_mask,
            position_ids = position_ids,
            past_key_value = past_key_value,
            output_attentions = output_attentions,
            use_cache = use_cache,
            padding_mask = padding_mask,
            _flag_for_generation = self._flag_for_generation,
            **kwargs,
        )
        hidden_states = fast_rms_layernorm_inference_gemma(
            self.post_attention_layernorm, hidden_states, out_weight
        )
        hidden_states += residual

        # Fully Connected
        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference_gemma(
            self.pre_feedforward_layernorm, hidden_states, out_weight
        )
        hidden_states = fast_geglu_inference(self.mlp, hidden_states)
        hidden_states = fast_rms_layernorm_inference_gemma(
            self.post_feedforward_layernorm, hidden_states, out_weight
        )
        hidden_states += residual
    else:
        residual = hidden_states
        hidden_states = fast_rms_layernorm(
            self.input_layernorm, hidden_states, gemma = True
        )
        hidden_states, self_attn_weights, present_key_value = self.self_attn(
            hidden_states = hidden_states,
            causal_mask = causal_mask,
            attention_mask = attention_mask,
            position_ids = position_ids,
            past_key_value = past_key_value,
            output_attentions = output_attentions,
            use_cache = use_cache,
            padding_mask = padding_mask,
            **kwargs,
        )
        hidden_states = fast_rms_layernorm(
            self.post_attention_layernorm, hidden_states, gemma = True
        )
        hidden_states = residual + hidden_states

        # Fully Connected
        residual = hidden_states
        hidden_states = fast_rms_layernorm(
            self.pre_feedforward_layernorm, hidden_states, gemma = True
        )
        hidden_states = self.mlp(hidden_states)
        hidden_states = fast_rms_layernorm(
            self.post_feedforward_layernorm, hidden_states, gemma = True
        )
        hidden_states = residual + hidden_states

    outputs = (hidden_states,)
    if output_attentions:
        outputs += (self_attn_weights,)
    if use_cache:
        outputs += (present_key_value,)
    return outputs


from math import sqrt as math_sqrt

KV_CACHE_INCREMENT = 256  # KV Cache update size
torch_nn_functional_softmax = torch.nn.functional.softmax
torch_matmul = torch.matmul
torch_tanh = torch.tanh


def Gemma2Attention_fast_forward_inference(
    self,
    hidden_states: torch.Tensor,
    past_key_value: Optional[Tuple[torch.Tensor]],
    position_ids,
    do_prefill = False,
    attention_mask = None,
    use_sliding_window = False,
    **kwargs,
):
    Xn = hidden_states
    bsz, _, hd = hidden_states.size()
    K1, V1 = past_key_value
    dtype = Xn.dtype

    n_heads = self.config.num_attention_heads
    n_groups = self.num_key_value_groups
    n_kv_heads = self.config.num_key_value_heads
    head_dim = self.head_dim
    # assert(n_kv_heads * n_groups == n_heads)

    hidden_size = self.config.hidden_size
    attention_size = n_heads * head_dim
    seq_len = K1.shape[-2]
    kv_seq_len = seq_len + 1
    device = hidden_states.device

    # Prefill phase
    # if not hasattr(self, "paged_attention"):
    if do_prefill:
        self.paged_attention = torch.empty(
            (KV_CACHE_INCREMENT + seq_len + 1, 2, bsz, n_kv_heads, head_dim),
            dtype = dtype,
            device = device,
        )
        self.paged_attention_K = self.paged_attention[:, 0]
        self.paged_attention_V = self.paged_attention[:, 1]
        self.paged_attention_K[:seq_len] = K1.permute(2, 0, 1, 3)
        self.paged_attention_V[:seq_len] = V1.permute(2, 0, 1, 3)
        self.temp_QA = torch.empty(
            (2, bsz, 1, attention_size), dtype = dtype, device = device
        )
        self.temp_KV = torch.empty(
            (2, bsz, 1, n_kv_heads * head_dim), dtype = dtype, device = device
        )
        self.RH_Q = torch.empty((bsz, n_heads, 1, head_dim), dtype = dtype, device = device)
        # Only for Gemma2
        self.temp_O = torch.empty((bsz, 1, hidden_size), dtype = dtype, device = device)
        self.attention = torch.empty(
            (bsz, n_heads, 1, KV_CACHE_INCREMENT + seq_len), dtype = dtype, device = device
        )

        # See https://github.com/google/gemma_pytorch/commit/03e657582d17cb5a8617ebf333c1c16f3694670e
        # Gemma 9b should use 256 and not 224 (hs / nah). 27b uses the below
        # We default to using the config file itself
        # s = self.config.hidden_size // self.config.num_attention_heads
        self.scalar = 1.0 / math_sqrt(self.config.query_pre_attn_scalar)
        # self.scalar = 1.0 / math_sqrt(self.config.hidden_size // self.config.num_attention_heads)
        self.half_head_dim = head_dim // 2
        self.t = self.config.attn_logit_softcapping
        self.reciprocal_t = 1.0 / self.config.attn_logit_softcapping
    elif kv_seq_len >= self.paged_attention.shape[0]:
        self.paged_attention.resize_(
            (
                self.paged_attention.shape[0] + KV_CACHE_INCREMENT,
                2,
                bsz,
                n_kv_heads,
                head_dim,
            )
        )
        self.paged_attention_K = self.paged_attention[:, 0]
        self.paged_attention_V = self.paged_attention[:, 1]
        self.attention.resize_(
            (bsz, n_heads, 1, self.attention.shape[-1] + KV_CACHE_INCREMENT)
        )

    Qn = fast_linear_forward(self.q_proj, Xn, out = self.temp_QA[0])
    Kn = fast_linear_forward(self.k_proj, Xn, out = self.temp_KV[0])
    Vn = fast_linear_forward(self.v_proj, Xn, out = self.temp_KV[1])
    Qn = Qn.view(bsz, 1, n_heads, head_dim).transpose(1, 2)
    Kn = Kn.view(bsz, 1, n_kv_heads, head_dim).transpose(1, 2)
    Vn = Vn.view(bsz, 1, n_kv_heads, head_dim).transpose(1, 2)

    # cos, sin = self.rotary_emb(Vn, seq_len = kv_seq_len)
    # Qn, Kn = inplace_rope_embedding(Qn, Kn, cos, sin, position_ids)
    cos, sin = self.rotary_emb.get_cached(kv_seq_len, Qn.device.index)
    cos = cos[position_ids].unsqueeze(1)
    sin = sin[position_ids].unsqueeze(1)
    h = self.half_head_dim

    RH_Q = self.RH_Q
    RH_Q[:, :, :, :h] = Qn[:, :, :, h:]
    RH_Q[:, :, :, h:] = Qn[:, :, :, :h]
    RH_Q[:, :, :, :h].neg_()
    Qn *= cos
    Qn.addcmul_(RH_Q, sin)

    RH_K = RH_Q[
        :, :n_kv_heads, :, :
    ]  # torch.empty((n_kv_heads, 1, head_dim), dtype = dtype, device = "cuda:0")
    RH_K[:, :, :, :h] = Kn[:, :, :, h:]
    RH_K[:, :, :, h:] = Kn[:, :, :, :h]
    RH_K[:, :, :, :h].neg_()
    Kn *= cos
    Kn.addcmul_(RH_K, sin)

    # New KV cache
    # Kn = torch.cat([K1, Kn], dim = 2)
    # Vn = torch.cat([V1, Vn], dim = 2)
    self.paged_attention_K[seq_len] = Kn.permute(2, 0, 1, 3)
    self.paged_attention_V[seq_len] = Vn.permute(2, 0, 1, 3)
    Kn = self.paged_attention_K[:kv_seq_len].permute(1, 2, 0, 3)
    Vn = self.paged_attention_V[:kv_seq_len].permute(1, 2, 0, 3)

    # Handle sliding windows
    sliding_window = self.config.sliding_window
    if use_sliding_window and kv_seq_len > sliding_window:
        start = kv_seq_len - sliding_window
        Knn = Kn[:, :, start:, :]  # .contiguous()
        Vnn = Vn[:, :, start:, :]  # .contiguous()
    else:
        Knn, Vnn = Kn, Vn

    # Grouped query attention
    _, _, cached_len, _ = Knn.shape
    if n_groups != 1:
        Knn = Knn[:, :, None, :, :].expand(
            bsz, n_kv_heads, n_groups, cached_len, head_dim
        )
        Vnn = Vnn[:, :, None, :, :].expand(
            bsz, n_kv_heads, n_groups, cached_len, head_dim
        )
        Knn = Knn.reshape(bsz, n_heads, cached_len, head_dim)
        Vnn = Vnn.reshape(bsz, n_heads, cached_len, head_dim)

    # Attention
    # [TODO] Gemma2 uses manual matmul for all batch sizes because SDPA does
    # not support softcapping (tanh logit scaling). If a future PyTorch adds
    # a softcap param to scaled_dot_product_attention, consider using SDPA
    # for bsz > 1 to match the llama/qwen3 pattern.
    Qn *= (
        self.scalar
    )  # See https://github.com/ggerganov/llama.cpp/issues/7805#issuecomment-2153349963
    # It seems like doing (Q * scalar) @ K is better than (Q @ K) * scalar to stop overflows
    A = torch_matmul(Qn, Knn.transpose(2, 3), out = self.attention[:, :, :, :cached_len])

    # Softcapping must happen BEFORE the mask is applied.
    # Reference: google-deepmind/gemma _modules.py and transformers gemma2 eager_attention_forward
    A *= self.reciprocal_t
    A.tanh_()
    A *= self.t  # Logit softcapping

    if attention_mask is not None and isinstance(attention_mask, torch.Tensor):
        # Slice mask to match K/V when sliding window is active
        if attention_mask.shape[-1] != A.shape[-1]:
            attention_mask = attention_mask[:, :, :, -A.shape[-1] :]
        A += attention_mask

    A[:] = torch_nn_functional_softmax(A, dim = -1, dtype = torch.float32)  # .to(A.dtype)
    A = torch_matmul(A, Vnn, out = Qn)
    A = A.transpose(1, 2)
    A = A.reshape(bsz, 1, attention_size)
    A = fast_linear_forward(self.o_proj, A, out = self.temp_O)
    return A, (Kn, Vn)


# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L825
# @torch.inference_mode
def Gemma2Model_fast_forward_inference(
    self,
    input_ids,
    past_key_values,
    position_ids,
    attention_mask = None,
    **kwargs,
):
    out_weights = tuple(
        torch.empty_like(
            self.model.layers[0].input_layernorm.weight,
            dtype = torch.float32,
            device = torch.device(x),
        )
        for x in range(DEVICE_COUNT)
    )
    input_ids = input_ids[:, : self.max_seq_length]
    hidden_states = self.model.embed_tokens(input_ids)
    hidden_states = hidden_states.to(_get_dtype(dtype_from_config(self.config)))
    # 3072**0.5 = 55.5000 in bfloat16, whilst 55.4256 in float32
    # 2048**0.5 = 45.2500 in bfloat16, whilst 45.2548 in float32
    hidden_states *= torch.tensor(
        math_sqrt(self.config.hidden_size), dtype = hidden_states.dtype
    )

    bsz, q_len, hd = hidden_states.shape
    seq_len = past_key_values[0][0].shape[-2]
    if bsz != 1:
        if HAS_FLASH_ATTENTION_SOFTCAPPING:
            SWA = True
            GA = False
        else:
            SWA = _prepare_4d_causal_attention_mask_for_sdpa(
                attention_mask,
                (bsz, q_len),
                hidden_states,
                seq_len,
                sliding_window = self.config.sliding_window,
            )
            GA = _prepare_4d_causal_attention_mask_for_sdpa(
                attention_mask,
                (bsz, q_len),
                hidden_states,
                seq_len,
            )
    else:
        SWA = attention_mask
        GA = attention_mask
    next_decoder_cache = []
    for idx, decoder_layer in enumerate(self.model.layers):
        # For pipeline parallelism, we need to move all tensors to the same device
        # note that this movement is once per GPU in PP
        device_index = getattr(decoder_layer, "_per_layer_device_index", 0)
        hidden_states, position_ids = move_to_device(
            device_index, hidden_states, position_ids
        )

        use_sliding_window = idx % 2 == 0

        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference_gemma(
            decoder_layer.input_layernorm, hidden_states, out_weights[device_index]
        )
        hidden_states, present_key_value = Gemma2Attention_fast_forward_inference(
            decoder_layer.self_attn,
            hidden_states = hidden_states,
            past_key_value = past_key_values[idx],
            position_ids = position_ids,
            attention_mask = SWA if use_sliding_window else GA,
            do_prefill = not hasattr(decoder_layer.self_attn, "paged_attention"),
            use_sliding_window = use_sliding_window,
        )
        hidden_states = fast_rms_layernorm_inference_gemma(
            decoder_layer.post_attention_layernorm,
            hidden_states,
            out_weights[device_index],
        )
        hidden_states += residual

        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference_gemma(
            decoder_layer.pre_feedforward_layernorm,
            hidden_states,
            out_weights[device_index],
        )
        hidden_states = fast_geglu_inference(decoder_layer.mlp, hidden_states)
        hidden_states = fast_rms_layernorm_inference_gemma(
            decoder_layer.post_feedforward_layernorm,
            hidden_states,
            out_weights[device_index],
        )
        hidden_states += residual

        next_decoder_cache.append(present_key_value)
    hidden_states = fast_rms_layernorm_inference_gemma(
        self.model.norm, hidden_states, out_weights[device_index]
    )

    return BaseModelOutputWithPast(
        last_hidden_state = hidden_states,
        past_key_values = next_decoder_cache,
        hidden_states = [],
        attentions = [],
    )


class FastGemma2Model(FastLlamaModel):
    @staticmethod
    def pre_patch():
        init_name, function = patch_linear_scaling(
            model_name = "gemma2",
            rope_module = GemmaFixedRotaryEmbedding,
            scaled_rope_module = GemmaFixedLinearScalingRotaryEmbedding,
            attention_module = Gemma2Attention,
        )
        if init_name is not None:
            exec(function, globals())
            Gemma2Attention.__init__ = eval(init_name)
        Gemma2Attention.forward = Gemma2Attention_fast_forward
        Gemma2SdpaAttention.forward = Gemma2Attention_fast_forward
        Gemma2FlashAttention2.forward = Gemma2Attention_fast_forward
        Gemma2DecoderLayer.forward = Gemma2DecoderLayer_fast_forward
        Gemma2Model.forward = LlamaModel_fast_forward
        Gemma2ForCausalLM.forward = CausalLM_fast_forward(
            Gemma2Model_fast_forward_inference
        )
        PeftModelForCausalLM.forward = PeftModel_fast_forward
        fix_prepare_inputs_for_generation(Gemma2ForCausalLM)

        # Solves https://github.com/unslothai/unsloth/issues/168
        # Static KV Cache was introduced in 4.38.0, causing training to be much slower.
        # Inference can now be CUDAGraphed, but we shall retain the old rotary embeddings.
        # https://github.com/huggingface/transformers/pull/27931
        # https://github.com/huggingface/transformers/blob/v4.37.2/src/transformers/models/llama/modeling_llama.py
        import transformers.models.gemma2.modeling_gemma2

        transformers.models.gemma2.modeling_gemma2.Gemma2RotaryEmbedding = (
            GemmaFixedRotaryEmbedding
        )
        return

    @staticmethod
    def post_patch(model, tokenizer, correct_dtype = None):
        # Gemma does not downcast RoPE
        model, tokenizer = patch_model_and_tokenizer(
            model, tokenizer, downcast_rope = False, correct_dtype = correct_dtype
        )

        # Add 1 to weight
        # return output * (1 + self.weight)
        # https://github.com/huggingface/transformers/blob/main/src/transformers/models/gemma/modeling_gemma.py#L89
        from transformers.models.gemma2.modeling_gemma2 import Gemma2RMSNorm

        # Freeze all parameters except LoRA
        # We do this first since += 1 seems to not be liked by requires_grad = True
        for name, param in model.named_parameters():
            if ".lora_A." in name or ".lora_B." in name:
                param.requires_grad_(True)
            else:
                param.requires_grad_(False)

        # Patch RMS Layernorm
        for name, module in model.named_modules():
            if isinstance(module, Gemma2RMSNorm):
                # Must be in float32
                # https://github.com/keras-team/keras-nlp/blob/v0.8.2/keras_nlp/models/gemma/rms_normalization.py#L36
                # module = module.to(torch.float32)
                # Leave + 1 to Triton kernel itself
                # module.weight += 1.0 # return output * (1 + self.weight)
                if not hasattr(module, "variance_epsilon"):
                    module.variance_epsilon = (
                        module.eps
                    )  # Gemma doesn't use variance_epsilon

        # Clear deleted GPU items
        import gc

        for _ in range(3):
            gc.collect()
            torch.cuda.empty_cache()
        return model, tokenizer


================================================
FILE: unsloth/models/glm4_moe.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
GLM-4.7 Flash (GLM4 MoE Lite) optimized implementation using grouped GEMM.

Key architecture differences from Qwen3 MoE:
- Router uses sigmoid activation (not softmax)
- Has routed_scaling_factor of 1.8
- Has 1 shared expert that processes all tokens
- Uses group-based selection before topk
- Uses MLA (Multi-head Latent Attention)
"""

from .llama import *
import os
from ._utils import __version__
from .llama import (
    LlamaRotaryEmbedding,
    LlamaLinearScalingRotaryEmbedding,
    fix_prepare_inputs_for_generation,
    fast_rms_layernorm_inference,
    fast_swiglu_inference,
    LlamaModel_fast_forward,
    LlamaModel_fast_forward_inference,
    CausalLM_fast_forward,
    PeftModel_fast_forward,
)
import torch
import torch.nn.functional as F
from typing import Optional, Tuple
from ..kernels import fast_rms_layernorm

# Import the grouped gemm utilities from unsloth kernels
# The grouped_gemm module expects its parent directory to be in sys.path
HAS_GROUPED_GEMM = False
try:
    import sys
    import os

    # Add the moe directory (parent of grouped_gemm) to sys.path
    _moe_path = os.path.join(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "kernels", "moe"
    )
    if _moe_path not in sys.path:
        sys.path.insert(0, _moe_path)

    # Import grouped_gemm package first to apply TMA compatibility shim
    # This patches triton.language to support both old and new TMA API names
    import grouped_gemm  # noqa: F401 - triggers TMA compatibility shim

    from grouped_gemm.interface import grouped_gemm
    from grouped_gemm.reference.moe_ops import (
        get_routing_indices,
        permute,
        unpermute,
    )

    HAS_GROUPED_GEMM = True
except ImportError as e:
    import warnings

    warnings.warn(
        f"Grouped GEMM not available: {e}. MoE will use fallback implementation."
    )


# Import transformers GLM4 MoE Lite classes
try:
    from transformers.models.glm4_moe_lite.modeling_glm4_moe_lite import (
        Glm4MoeLiteAttention,
        Glm4MoeLiteMoE,
        Glm4MoeLiteMLP,
        Glm4MoeLiteNaiveMoe,
        Glm4MoeLiteTopkRouter,
        Glm4MoeLiteDecoderLayer,
        Glm4MoeLiteModel,
        Glm4MoeLiteForCausalLM,
        Glm4MoeLiteRMSNorm,
    )

    HAS_GLM4_MOE = True
except ImportError:
    HAS_GLM4_MOE = False

    # Create dummy classes for type checking
    class Glm4MoeLiteAttention:
        pass

    class Glm4MoeLiteMoE:
        pass

    class Glm4MoeLiteMLP:
        pass

    class Glm4MoeLiteNaiveMoe:
        pass

    class Glm4MoeLiteTopkRouter:
        pass

    class Glm4MoeLiteDecoderLayer:
        pass

    class Glm4MoeLiteModel:
        pass

    class Glm4MoeLiteForCausalLM:
        pass


torch_nn_functional_silu = torch.nn.functional.silu


def Glm4MoeLiteMoE_fast_forward(self, hidden_states):
    """
    Optimized MoE forward pass using grouped GEMM.

    GLM4 MoE specifics:
    - Uses sigmoid router activation (not softmax)
    - Has routed_scaling_factor of 1.8
    - Has 1 shared expert that always processes all tokens
    - Uses group-based selection with topk_group
    """
    residuals = hidden_states
    orig_shape = hidden_states.shape
    batch_size, seq_len, hidden_dim = orig_shape
    num_tokens = batch_size * seq_len

    # Flatten hidden states for routing
    hidden_states = hidden_states.view(-1, hidden_dim)

    # Router computation
    router_logits = self.gate(hidden_states)  # [num_tokens, n_routed_experts]
    topk_indices, topk_weights = self.route_tokens_to_experts(router_logits)
    # Cast routing weights to match hidden_states dtype (Qwen3 pattern)
    # Sigmoid router returns fp32, but hidden_states may be bf16
    topk_weights = topk_weights.to(hidden_states.dtype)

    # Get routing indices for grouped GEMM
    with torch.no_grad():
        token_counts_by_expert, gather_indices = get_routing_indices(
            topk_indices, self.n_routed_experts
        )

    # Use grouped GEMM for expert computation
    if HAS_GROUPED_GEMM:
        # Cast hidden_states to match expert weights dtype
        # Under autocast, hidden_states may be fp32 while weights are bf16
        hidden_states = hidden_states.to(self.experts.gate_up_proj.dtype)

        # First grouped GEMM: gate_up_proj with permute_x
        # Input: [num_tokens, hidden_dim] -> Output: [total_tokens, 2*intermediate_dim]
        intermediate = grouped_gemm(
            X = hidden_states,
            W = self.experts.gate_up_proj,
            m_sizes = token_counts_by_expert.int(),
            topk = self.top_k,
            gather_indices = gather_indices,
            permute_x = True,
            permute_y = False,
            autotune = True,
            is_first_gemm = True,
        )

        # Activation: SiLU(gate) * up
        gate, up = intermediate.chunk(2, dim = -1)
        intermediate = torch_nn_functional_silu(gate) * up

        # Second grouped GEMM: down_proj with permute_y
        # Input: [total_tokens, intermediate_dim] -> Output: [total_tokens, hidden_dim]
        expert_output = grouped_gemm(
            X = intermediate,
            W = self.experts.down_proj,
            m_sizes = token_counts_by_expert.int(),
            topk = self.top_k,
            gather_indices = gather_indices,
            permute_x = False,
            permute_y = True,
            autotune = True,
            is_first_gemm = False,
        )

        # Merge topk weights: [num_tokens, top_k, hidden_dim] -> [num_tokens, hidden_dim]
        hidden_states = (
            expert_output.view(num_tokens, self.top_k, hidden_dim)
            * topk_weights.unsqueeze(-1)
        ).sum(dim = 1)
    else:
        # Fallback to naive implementation
        hidden_states = self.experts(hidden_states, topk_indices, topk_weights)

    # Add shared expert output
    hidden_states = hidden_states + self.shared_experts(residuals.view(-1, hidden_dim))

    return hidden_states.view(*orig_shape)


def Glm4MoeLiteNaiveMoe_fast_forward(
    self,
    hidden_states: torch.Tensor,
    top_k_index: torch.Tensor,
    top_k_weights: torch.Tensor,
) -> torch.Tensor:
    """
    Optimized expert forward using grouped GEMM.

    Args:
        hidden_states: [num_tokens, hidden_dim]
        top_k_index: [num_tokens, top_k] indices of selected experts
        top_k_weights: [num_tokens, top_k] weights for selected experts

    Returns:
        [num_tokens, hidden_dim] output after weighted sum of expert outputs
    """
    num_tokens, hidden_dim = hidden_states.shape
    top_k = top_k_index.shape[1]
    # Cast routing weights to match hidden_states dtype (Qwen3 pattern)
    top_k_weights = top_k_weights.to(hidden_states.dtype)

    if not HAS_GROUPED_GEMM:
        # Fallback to original naive implementation
        final_hidden_states = torch.zeros_like(hidden_states)
        with torch.no_grad():
            expert_mask = torch.nn.functional.one_hot(
                top_k_index, num_classes = self.num_experts
            )
            expert_mask = expert_mask.permute(2, 1, 0)
            expert_hit = torch.greater(expert_mask.sum(dim = (-1, -2)), 0).nonzero()

        for expert_idx in expert_hit:
            expert_idx = expert_idx[0]
            if expert_idx == self.num_experts:
                continue
            top_k_pos, token_idx = torch.where(expert_mask[expert_idx])
            current_state = hidden_states[token_idx]
            gate, up = torch.nn.functional.linear(
                current_state, self.gate_up_proj[expert_idx]
            ).chunk(2, dim = -1)
            current_hidden_states = self.act_fn(gate) * up
            current_hidden_states = torch.nn.functional.linear(
                current_hidden_states, self.down_proj[expert_idx]
            )
            current_hidden_states = (
                current_hidden_states * top_k_weights[token_idx, top_k_pos, None]
            )
            final_hidden_states.index_add_(
                0, token_idx, current_hidden_states.to(final_hidden_states.dtype)
            )

        return final_hidden_states

    # Get routing indices for grouped GEMM
    with torch.no_grad():
        token_counts_by_expert, gather_indices = get_routing_indices(
            top_k_index, self.num_experts
        )

    # Cast hidden_states to match expert weights dtype
    # Under autocast, hidden_states may be fp32 while weights are bf16
    hidden_states = hidden_states.to(self.gate_up_proj.dtype)

    # First grouped GEMM: gate_up_proj
    intermediate = grouped_gemm(
        X = hidden_states,
        W = self.gate_up_proj,
        m_sizes = token_counts_by_expert.int(),
        topk = top_k,
        gather_indices = gather_indices,
        permute_x = True,
        permute_y = False,
        autotune = True,
        is_first_gemm = True,
    )

    # Activation: SiLU(gate) * up
    gate, up = intermediate.chunk(2, dim = -1)
    intermediate = self.act_fn(gate) * up

    # Second grouped GEMM: down_proj
    expert_output = grouped_gemm(
        X = intermediate,
        W = self.down_proj,
        m_sizes = token_counts_by_expert.int(),
        topk = top_k,
        gather_indices = gather_indices,
        permute_x = False,
        permute_y = True,
        autotune = True,
        is_first_gemm = False,
    )

    # Merge topk weights
    final_hidden_states = (
        expert_output.view(num_tokens, top_k, hidden_dim) * top_k_weights.unsqueeze(-1)
    ).sum(dim = 1)

    return final_hidden_states


def Glm4MoeLiteDecoderLayer_fast_forward(
    self,
    hidden_states: torch.Tensor,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    past_key_values = None,
    use_cache: bool = False,
    cache_position: Optional[torch.LongTensor] = None,
    position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
    **kwargs,
) -> torch.Tensor:
    """
    Optimized decoder layer forward with fast RMS layernorm.
    """
    # Check if we're in inference mode
    is_inference = use_cache and hasattr(self, "_flag_for_generation")

    if is_inference:
        # Self-attention with fast inference path
        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference(
            self.input_layernorm, hidden_states
        )
        hidden_states, _ = self.self_attn(
            hidden_states = hidden_states,
            attention_mask = attention_mask,
            position_ids = position_ids,
            past_key_values = past_key_values,
            use_cache = use_cache,
            cache_position = cache_position,
            position_embeddings = position_embeddings,
            **kwargs,
        )
        hidden_states = residual + hidden_states

        # MLP/MoE
        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference(
            self.post_attention_layernorm, hidden_states
        )
        hidden_states = self.mlp(hidden_states)
        hidden_states = residual + hidden_states
    else:
        # Training path
        residual = hidden_states
        hidden_states = fast_rms_layernorm(self.input_layernorm, hidden_states)
        hidden_states, _ = self.self_attn(
            hidden_states = hidden_states,
            attention_mask = attention_mask,
            position_ids = position_ids,
            past_key_values = past_key_values,
            use_cache = use_cache,
            cache_position = cache_position,
            position_embeddings = position_embeddings,
            **kwargs,
        )
        hidden_states = residual + hidden_states

        # MLP/MoE
        residual = hidden_states
        hidden_states = fast_rms_layernorm(self.post_attention_layernorm, hidden_states)
        hidden_states = self.mlp(hidden_states)
        hidden_states = residual + hidden_states

    return hidden_states


def Glm4MoeLiteMLP_fast_forward(self, x):
    """
    Optimized MLP forward using fused SwiGLU.
    """
    return fast_swiglu_inference(self, x)


class FastGLM47Model(FastLlamaModel):
    """
    Fast GLM-4.7 Flash (GLM4 MoE Lite) model with grouped GEMM optimization.

    This provides 2-3x throughput improvement for MoE layers by:
    - Replacing sequential expert loops with grouped GEMM operations
    - Fusing permutation operations into the GEMM kernels
    - Using optimized RMS LayerNorm and SwiGLU implementations
    """

    @staticmethod
    def pre_patch():
        if not HAS_GLM4_MOE:
            raise ImportError(
                "Unsloth: GLM4 MoE Lite support requires transformers >= 5.0.0. "
                "Please upgrade with: pip install --upgrade transformers"
            )

        # Patch MoE forward with grouped GEMM optimization
        # TMA compatibility is handled by grouped_gemm/__init__.py which patches
        # triton.language to support both old (_experimental_make_tensor_descriptor)
        # and new (make_tensor_descriptor) API names
        if HAS_GROUPED_GEMM:
            Glm4MoeLiteNaiveMoe.forward = Glm4MoeLiteNaiveMoe_fast_forward
            Glm4MoeLiteMoE.forward = Glm4MoeLiteMoE_fast_forward

        # Note: We don't patch the following for GLM4 MoE because:
        # - GLM4 uses MLA (Multi-head Latent Attention) which has different projection names
        # - Glm4MoeLiteRotaryEmbedding doesn't have extend_rope_embedding method
        # - The decoder layer and model forward functions assume Llama-compatible infrastructure

        return

    @staticmethod
    def from_pretrained(
        model_name = "unsloth/GLM-4.7-Flash",
        max_seq_length = 4096,
        dtype = None,
        load_in_4bit = True,
        token = None,
        device_map = "sequential",
        rope_scaling = None,
        fix_tokenizer = True,
        model_patcher = None,
        tokenizer_name = None,
        trust_remote_code = False,
        **kwargs,
    ):
        # Pop kwargs that are used by loader but not passed to model
        kwargs.pop("unsloth_force_compile", None)

        return FastLlamaModel.from_pretrained(
            model_name = model_name,
            max_seq_length = max_seq_length,
            dtype = dtype,
            load_in_4bit = load_in_4bit,
            token = token,
            device_map = device_map,
            rope_scaling = rope_scaling,
            fix_tokenizer = fix_tokenizer,
            model_patcher = FastGLM47Model,
            tokenizer_name = tokenizer_name,
            trust_remote_code = trust_remote_code,
            **kwargs,
        )


================================================
FILE: unsloth/models/granite.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .llama import *
import os
from ._utils import __version__
from unsloth_zoo.utils import _get_dtype, Version
from unsloth_zoo.hf_utils import dtype_from_config
from ..utils.packing import get_packed_info_from_kwargs
from ..utils.attention_dispatch import (
    AttentionConfig,
    AttentionContext,
    run_attention,
    select_attention_backend,
    SDPA,
)
from .llama import (
    LlamaRotaryEmbedding,
    LlamaLinearScalingRotaryEmbedding,
)
from .mistral import *
from bitsandbytes.nn import Linear4bit as Bnb_Linear4bit
from peft.tuners.lora import Linear4bit as Peft_Linear4bit

try:
    from transformers.models.granite.modeling_granite import (
        GraniteAttention,
        GraniteDecoderLayer,
        GraniteModel,
        GraniteForCausalLM,
    )
except:
    transformers_version = Version(transformers_version)
    if not transformers_version >= Version("4.45.0"):
        raise ImportError(
            f"Unsloth: Your transformers version of {transformers_version} does not support Granite.\n"
            f"The minimum required version is 4.45.0.\n"
            f'Try `pip install --upgrade "transformers>=4.45.0"`\n'
            f"to obtain the latest transformers build, then restart this session."
        )

from transformers.modeling_attn_mask_utils import (
    _prepare_4d_causal_attention_mask_for_sdpa,
)

# For Pytorch 2.1.1
try:
    from transformers.models.granite.modeling_granite import (
        GraniteSdpaAttention,
        GraniteFlashAttention2,
    )
except:
    GraniteSdpaAttention = GraniteAttention
    GraniteFlashAttention2 = GraniteAttention


def GraniteAttention_fast_forward(
    self,
    hidden_states: torch.Tensor,
    causal_mask: Optional[BlockDiagonalCausalMask] = None,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    past_key_value: Optional[Tuple[torch.Tensor]] = None,
    output_attentions: bool = False,
    use_cache: bool = False,
    padding_mask: Optional[torch.LongTensor] = None,
    position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
    *args,
    **kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
    # Clear inference
    if hasattr(self, "paged_attention"):
        del self.paged_attention_K
        del self.paged_attention_V
        del self.paged_attention
        del self.temp_QA
        del self.temp_KV
        del self.RH_Q
        del self.attention

    bsz, q_len, _ = hidden_states.size()

    n_heads = self.config.num_attention_heads
    n_groups = self.num_key_value_groups
    n_kv_heads = self.config.num_key_value_heads
    head_dim = self.head_dim
    dropout_p = self.config.attention_dropout if self.training else 0
    assert n_kv_heads * n_groups == n_heads

    Q, K, V = self.apply_qkv(self, hidden_states)
    Q = Q.view(bsz, q_len, n_heads, head_dim).transpose(1, 2)
    K = K.view(bsz, q_len, n_kv_heads, head_dim).transpose(1, 2)
    V = V.view(bsz, q_len, n_kv_heads, head_dim).transpose(1, 2)
    seq_info = get_packed_info_from_kwargs(kwargs, Q.device)

    kv_seq_len = K.shape[-2]
    if past_key_value is not None:
        kv_seq_len += past_key_value[0].shape[-2]

    assert position_embeddings is not None
    cos, sin = position_embeddings
    rope_position_ids = (
        position_ids if position_ids is not None else kwargs.get("position_ids")
    )
    if rope_position_ids is not None:
        # Useful for LongRoPE
        Q, K = fast_rope_embedding(Q, K, cos, sin, rope_position_ids)
    else:
        Q, K = fast_rope_embedding(Q, K, cos, sin)

    if past_key_value is not None:
        K = torch.cat([past_key_value[0], K], dim = 2)
        V = torch.cat([past_key_value[1], V], dim = 2)
    past_key_value = (K, V) if use_cache else None

    # Attention module
    use_varlen = (
        attention_mask is None and seq_info is not None and past_key_value is None
    )

    backend = (
        SDPA if attention_mask is not None else select_attention_backend(use_varlen)
    )

    window = (kv_seq_len, kv_seq_len)
    softmax_scale = getattr(self, "scaling", None)
    attention_config = AttentionConfig(
        backend = backend,
        n_kv_heads = n_kv_heads,
        n_groups = n_groups,
        flash_dense_kwargs = {
            "causal": True,
            "softmax_scale": softmax_scale,
            "dropout_p": dropout_p,
            "window_size": window,
        },
        flash_varlen_kwargs = {
            "dropout_p": 0.0,
            "softmax_scale": softmax_scale,
            "causal": True,
        },
        sdpa_kwargs = {
            k: v
            for k, v in {
                "attn_mask": attention_mask,
                "scale": softmax_scale,
                "dropout_p": dropout_p,
            }.items()
            if v is not None
        },
        xformers_kwargs = {
            "scale": softmax_scale,
            "p": dropout_p,
        },
    )

    context = AttentionContext(
        bsz = bsz,
        q_len = q_len,
        kv_seq_len = kv_seq_len,
        n_heads = n_heads,
        head_dim = head_dim,
        requires_grad = hidden_states.requires_grad,
        seq_info = seq_info,
        attention_mask = attention_mask,
        causal_mask = causal_mask,
    )

    A = run_attention(config = attention_config, context = context, Q = Q, K = K, V = V)

    attn_output = A.reshape(bsz, q_len, n_heads * head_dim)
    attn_output = self.apply_o(self, attn_output)
    attn_weights = None
    return attn_output, attn_weights, past_key_value


def GraniteDecoderLayer_fast_forward(
    self,
    hidden_states: torch.Tensor,
    causal_mask: Optional[BlockDiagonalCausalMask] = None,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    past_key_value: Optional[Tuple[torch.Tensor]] = None,
    output_attentions: Optional[bool] = False,
    use_cache: Optional[bool] = False,
    padding_mask: Optional[torch.LongTensor] = None,
    position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
    *args,
    **kwargs,
):
    residual_multiplier = (
        self.residual_multiplier
        if hasattr(self, "residual_multiplier")
        else self.config.residual_multiplier
    )

    if use_cache and hasattr(
        self, "_flag_for_generation"
    ):  # past_key_value is not None:
        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference(
            self.input_layernorm, hidden_states
        )
        hidden_states, self_attn_weights, present_key_value = self.self_attn(
            hidden_states = hidden_states,
            causal_mask = causal_mask,
            attention_mask = attention_mask,
            position_ids = position_ids,
            past_key_value = past_key_value,
            output_attentions = output_attentions,
            use_cache = use_cache,
            padding_mask = padding_mask,
            position_embeddings = position_embeddings,
            _flag_for_generation = self._flag_for_generation,
            **kwargs,
        )
        hidden_states = torch.add(residual, hidden_states, alpha = residual_multiplier)

        # Fully Connected
        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference(
            self.post_attention_layernorm, hidden_states
        )
        hidden_states = fast_swiglu_inference(self.mlp, hidden_states)
        hidden_states = torch.add(residual, hidden_states, alpha = residual_multiplier)
    else:
        residual = hidden_states
        hidden_states = fast_rms_layernorm(self.input_layernorm, hidden_states)
        hidden_states, self_attn_weights, present_key_value = self.self_attn(
            hidden_states = hidden_states,
            causal_mask = causal_mask,
            attention_mask = attention_mask,
            position_ids = position_ids,
            past_key_value = past_key_value,
            output_attentions = output_attentions,
            use_cache = use_cache,
            padding_mask = padding_mask,
            position_embeddings = position_embeddings,
            **kwargs,
        )
        hidden_states = torch.add(residual, hidden_states, alpha = residual_multiplier)

        # Fully Connected
        residual = hidden_states
        hidden_states = fast_rms_layernorm(self.post_attention_layernorm, hidden_states)
        hidden_states = self.mlp(hidden_states)
        hidden_states = torch.add(residual, hidden_states, alpha = residual_multiplier)

    outputs = (hidden_states,)
    if output_attentions:
        outputs += (self_attn_weights,)
    if use_cache:
        outputs += (present_key_value,)
    return outputs


from math import sqrt as math_sqrt

KV_CACHE_INCREMENT = 256  # KV Cache update size
torch_nn_functional_softmax = torch.nn.functional.softmax
torch_matmul = torch.matmul
torch_tanh = torch.tanh


def GraniteAttention_fast_forward_inference(
    self,
    hidden_states: torch.Tensor,
    past_key_value: Optional[Tuple[torch.Tensor]],
    position_ids,
    do_prefill = False,
    attention_mask = None,
    use_sliding_window = False,
    position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
):
    assert (
        position_embeddings is not None
    ), f"Granite model requires position embeddings to be specified"

    Xn = hidden_states
    bsz, _, hd = hidden_states.size()
    K1, V1 = past_key_value
    dtype = Xn.dtype

    n_heads = self.config.num_attention_heads
    n_groups = self.num_key_value_groups
    n_kv_heads = self.config.num_key_value_heads
    head_dim = self.head_dim
    # assert(n_kv_heads * n_groups == n_heads)

    hidden_size = self.config.hidden_size
    attention_size = n_heads * head_dim
    seq_len = K1.shape[-2]
    kv_seq_len = seq_len + 1
    device = hidden_states.device

    # Prefill phase
    # if not hasattr(self, "paged_attention"):
    if do_prefill:
        self.paged_attention = torch.empty(
            (KV_CACHE_INCREMENT + seq_len + 1, 2, bsz, n_kv_heads, head_dim),
            dtype = dtype,
            device = device,
        )
        self.paged_attention_K = self.paged_attention[:, 0]
        self.paged_attention_V = self.paged_attention[:, 1]
        self.paged_attention_K[:seq_len] = K1.permute(2, 0, 1, 3)
        self.paged_attention_V[:seq_len] = V1.permute(2, 0, 1, 3)
        self.temp_QA = torch.empty(
            (2, bsz, 1, attention_size), dtype = dtype, device = device
        )
        self.temp_KV = torch.empty(
            (2, bsz, 1, n_kv_heads * head_dim), dtype = dtype, device = device
        )
        self.RH_Q = torch.empty((bsz, n_heads, 1, head_dim), dtype = dtype, device = device)
        self.temp_O = torch.empty((bsz, 1, hidden_size), dtype = dtype, device = device)
        self.attention = torch.empty(
            (bsz, n_heads, 1, KV_CACHE_INCREMENT + seq_len), dtype = dtype, device = device
        )

        self.half_head_dim = head_dim // 2
    elif kv_seq_len >= self.paged_attention.shape[0]:
        self.paged_attention.resize_(
            (
                self.paged_attention.shape[0] + KV_CACHE_INCREMENT,
                2,
                bsz,
                n_kv_heads,
                head_dim,
            )
        )
        self.paged_attention_K = self.paged_attention[:, 0]
        self.paged_attention_V = self.paged_attention[:, 1]
        self.attention.resize_(
            (bsz, n_heads, 1, self.attention.shape[-1] + KV_CACHE_INCREMENT)
        )

    Qn = fast_linear_forward(self.q_proj, Xn, out = self.temp_QA[0])
    Kn = fast_linear_forward(self.k_proj, Xn, out = self.temp_KV[0])
    Vn = fast_linear_forward(self.v_proj, Xn, out = self.temp_KV[1])
    Qn = Qn.view(bsz, 1, n_heads, head_dim).transpose(1, 2)
    Kn = Kn.view(bsz, 1, n_kv_heads, head_dim).transpose(1, 2)
    Vn = Vn.view(bsz, 1, n_kv_heads, head_dim).transpose(1, 2)

    # cos, sin = self.rotary_emb(Vn, seq_len = kv_seq_len)
    # Qn, Kn = inplace_rope_embedding(Qn, Kn, cos, sin, position_ids)
    cos, sin = position_embeddings
    cos, sin = cos[position_ids], sin[position_ids]
    h = self.half_head_dim

    RH_Q = self.RH_Q
    RH_Q[:, :, :, :h] = Qn[:, :, :, h:]
    RH_Q[:, :, :, h:] = Qn[:, :, :, :h]
    RH_Q[:, :, :, :h].neg_()
    Qn *= cos
    Qn.addcmul_(RH_Q, sin)

    RH_K = RH_Q[
        :, :n_kv_heads, :, :
    ]  # torch.empty((n_kv_heads, 1, head_dim), dtype = dtype, device = "cuda:0")
    RH_K[:, :, :, :h] = Kn[:, :, :, h:]
    RH_K[:, :, :, h:] = Kn[:, :, :, :h]
    RH_K[:, :, :, :h].neg_()
    Kn *= cos
    Kn.addcmul_(RH_K, sin)

    # New KV cache
    # Kn = torch.cat([K1, Kn], dim = 2)
    # Vn = torch.cat([V1, Vn], dim = 2)
    self.paged_attention_K[seq_len] = Kn.permute(2, 0, 1, 3)
    self.paged_attention_V[seq_len] = Vn.permute(2, 0, 1, 3)
    Kn = self.paged_attention_K[:kv_seq_len].permute(1, 2, 0, 3)
    Vn = self.paged_attention_V[:kv_seq_len].permute(1, 2, 0, 3)

    # Grouped query attention
    _, _, cached_len, _ = Kn.shape
    if bsz == 1 or ((not SDPA_HAS_GQA) and n_groups != 1):
        Kn = Kn[:, :, None, :, :].expand(
            bsz, n_kv_heads, n_groups, cached_len, head_dim
        )
        Vn = Vn[:, :, None, :, :].expand(
            bsz, n_kv_heads, n_groups, cached_len, head_dim
        )
        Kn = Kn.reshape(bsz, n_heads, cached_len, head_dim)
        Vn = Vn.reshape(bsz, n_heads, cached_len, head_dim)

    # Attention
    if bsz == 1:
        Qn *= self.scaling
        A = torch_matmul(
            Qn, Kn.transpose(2, 3), out = self.attention[:, :, :, :cached_len]
        )
        A[:] = torch_nn_functional_softmax(A, dim = -1, dtype = torch.float32)
        A = torch_matmul(A, Vn, out = Qn)
    else:
        if (
            attention_mask is not None
            and attention_mask.dim() == 4
            and attention_mask.dtype != torch.bool
        ):
            attention_mask = attention_mask.eq(0)
        if SDPA_HAS_GQA:
            A = scaled_dot_product_attention(
                Qn,
                Kn,
                Vn,
                attn_mask = attention_mask,
                scale = self.scaling,
                enable_gqa = True,
            )
        else:
            A = scaled_dot_product_attention(
                Qn,
                Kn,
                Vn,
                attn_mask = attention_mask,
                scale = self.scaling,
            )
    A = A.transpose(1, 2)
    A = A.reshape(bsz, 1, attention_size)
    A = fast_linear_forward(self.o_proj, A, out = self.temp_O)
    return A, (Kn, Vn)


# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L825
# @torch.inference_mode
def GraniteModel_fast_forward_inference(
    self,
    input_ids,
    past_key_values,
    position_ids,
    attention_mask = None,
):
    input_ids = input_ids[:, : self.max_seq_length]
    hidden_states = self.model.embed_tokens(input_ids)
    hidden_states = hidden_states.to(_get_dtype(dtype_from_config(self.config)))
    hidden_states *= self.model.embedding_multiplier
    residual_multiplier = (
        self.residual_multiplier
        if hasattr(self, "residual_multiplier")
        else self.config.residual_multiplier
    )

    bsz, q_len, hd = hidden_states.shape
    seq_len = past_key_values[0][0].shape[-2]
    if bsz != 1:
        attention_mask = _prepare_4d_causal_attention_mask_for_sdpa(
            attention_mask,
            (bsz, q_len),
            hidden_states,
            seq_len,
        )
        # Pre-convert to bool once for all layers (avoids per-layer .eq(0))
        if attention_mask is not None and attention_mask.dtype != torch.bool:
            attention_mask = attention_mask.eq(0)
    else:
        attention_mask = None

    position_embeddings = self.model.rotary_emb.get_cached(
        self.max_seq_length, hidden_states.device.index
    )

    next_decoder_cache = []
    for idx, decoder_layer in enumerate(self.model.layers):
        device_index = getattr(decoder_layer, "_per_layer_device_index", 0)
        hidden_states, position_ids = move_to_device(
            device_index, hidden_states, position_ids
        )

        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference(
            decoder_layer.input_layernorm, hidden_states
        )
        hidden_states, present_key_value = GraniteAttention_fast_forward_inference(
            decoder_layer.self_attn,
            hidden_states = hidden_states,
            past_key_value = past_key_values[idx],
            position_ids = position_ids,
            attention_mask = attention_mask,
            do_prefill = not hasattr(decoder_layer.self_attn, "paged_attention"),
            position_embeddings = position_embeddings,
        )

        hidden_states = torch.add(residual, hidden_states, alpha = residual_multiplier)

        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference(
            decoder_layer.post_attention_layernorm, hidden_states
        )
        hidden_states = fast_swiglu_inference(decoder_layer.mlp, hidden_states)
        hidden_states = torch.add(residual, hidden_states, alpha = residual_multiplier)

        next_decoder_cache.append(present_key_value)
    hidden_states = fast_rms_layernorm_inference(self.model.norm, hidden_states)

    return BaseModelOutputWithPast(
        last_hidden_state = hidden_states,
        past_key_values = next_decoder_cache,
        hidden_states = [],
        attentions = [],
    )


class GraniteRotaryEmbedding(LlamaRotaryEmbedding):
    def __init__(self, config):
        super().__init__(config = config)


def patched_init(original_init):
    def new_init(self, *args, **kwargs):
        # we can use self.residual_multiplier arg in GraniteDecoderLayer_fast_forward as mentioned here
        # https://github.com/huggingface/transformers/blob/e5fd865ebae062b7cf03a81b8c6affeb39f30bec/src/transformers/models/granite/modeling_granite.py#L243
        # The problem is, we don't have access to either the value or config in GraniteModel_fast_forward_inference
        # So we need a way to pass this value around. It is probably better to pass on entire config just in case we need it later
        config = kwargs.get("config", args[0] if args else None)
        if config is not None:
            self.config = config
        original_init(self, *args, **kwargs)

    return new_init


class FastGraniteModel(FastLlamaModel):
    @staticmethod
    def pre_patch():
        init_name, function = patch_linear_scaling(
            model_name = "granite",
            rope_module = GraniteRotaryEmbedding,
            scaled_rope_module = LlamaLinearScalingRotaryEmbedding,
            attention_module = GraniteAttention,
        )
        if init_name is not None:
            exec(function, globals())
            GraniteAttention.__init__ = eval(init_name)
        GraniteAttention.forward = GraniteAttention_fast_forward
        GraniteSdpaAttention.forward = GraniteAttention_fast_forward
        GraniteFlashAttention2.forward = GraniteAttention_fast_forward
        GraniteDecoderLayer.forward = GraniteDecoderLayer_fast_forward
        GraniteModel.forward = LlamaModel_fast_forward
        GraniteForCausalLM.forward = CausalLM_fast_forward(
            GraniteModel_fast_forward_inference
        )
        GraniteForCausalLM.__init__ = patched_init(GraniteForCausalLM.__init__)
        PeftModelForCausalLM.forward = PeftModel_fast_forward
        fix_prepare_inputs_for_generation(GraniteForCausalLM)

        import transformers.models.granite.modeling_granite

        transformers.models.granite.modeling_granite.GraniteRotaryEmbedding = (
            GraniteRotaryEmbedding
        )

        return

    @staticmethod
    def post_patch(model, tokenizer, correct_dtype = None):
        # Torch.compile fails on embedding matrix??
        # Workaround randomnly fixes it for torch versions < 2.2
        model.model.embed_tokens = torch.nn.Embedding.from_pretrained(
            model.model.embed_tokens.weight
        )
        model.config.update({"unsloth_version": __version__})

        # We also do this for the lm_head
        lm_head = torch.nn.Linear(1, 1, bias = None)
        del lm_head.weight
        lm_head.weight = model.lm_head.weight
        lm_head.in_features = lm_head.weight.shape[1]
        lm_head.out_features = lm_head.weight.shape[0]
        model.lm_head = lm_head

        # Granite has tied weights! This means lm_head == embed_tokens
        if (
            model.model.embed_tokens.weight.data_ptr()
            != model.lm_head.weight.data_ptr()
        ):
            lm_head = torch.nn.Linear(1, 1, bias = None)
            del lm_head.weight
            lm_head.weight = model.model.embed_tokens.weight
            lm_head.in_features = lm_head.weight.shape[1]
            lm_head.out_features = lm_head.weight.shape[0]
            model.lm_head = lm_head

        # Also patch all dtypes - BnB seems to not allocate the correct type?
        # BnB default dtype seems to be float16!
        correct_dtype = lm_head.weight.dtype

        for name, module in model.named_modules():
            if isinstance(module, (Bnb_Linear4bit, Peft_Linear4bit)):
                weight = module.weight
                quant_state = weight.quant_state

                if type(quant_state) is list:
                    # BnB seems to have float16 as default!
                    module.weight.quant_state[2] = (
                        correct_dtype  # Cast to correct dtype
                    )
                else:
                    # https://github.com/TimDettmers/bitsandbytes/pull/763/files
                    quant_state.dtype = correct_dtype
            # Downcast RoPE embedding to correct data type
            if name.endswith("rotary_emb") or hasattr(module, "cos_cached"):
                if hasattr(module, "cos_cached") and (
                    module.cos_cached.dtype != correct_dtype
                ):
                    module.cos_cached = module.cos_cached.to(correct_dtype)
                    module.sin_cached = module.sin_cached.to(correct_dtype)

                elif hasattr(module, "short_cos_cached") and (
                    module.short_cos_cached.dtype != correct_dtype
                ):
                    module.short_cos_cached = module.short_cos_cached.to(correct_dtype)
                    module.short_sin_cached = module.short_sin_cached.to(correct_dtype)

        # Clear deleted GPU items
        import gc

        for _ in range(3):
            gc.collect()
            torch.cuda.empty_cache()
        return model, tokenizer


================================================
FILE: unsloth/models/llama.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch
import gc
import math
import functools
from typing import Optional, Tuple, List, Union

from ._utils import *
from ._utils import apply_unsloth_gradient_checkpointing
from ._utils import __version__, importlib_version
from ._utils import move_to_device
from ._utils import (
    _get_inference_mode_context_manager,
    _prepare_model_for_qat,
    is_bfloat16_supported,
    get_quant_type,
)
from .loader_utils import _get_fp8_mode_and_check_settings
from ..utils.packing import (
    get_packed_info_from_kwargs,
    mask_packed_sequence_boundaries,
)
from ..utils.attention_dispatch import (
    AttentionConfig,
    AttentionContext,
    run_attention,
    SDPA,
    select_attention_backend,
)
from torch.nn.functional import scaled_dot_product_attention
from transformers import __version__ as transformers_version
from unsloth_zoo.utils import Version, _get_dtype
from unsloth_zoo.hf_utils import (
    dtype_from_config,
    add_dtype_kwargs,
    fix_lora_auto_mapping,
)
from unsloth_zoo.peft_utils import SKIP_QUANTIZATION_MODULES
from ..device_type import (
    is_hip,
    get_device_type,
    DEVICE_TYPE,
    DEVICE_TYPE_TORCH,
    DEVICE_COUNT,
    ALLOW_PREQUANTIZED_MODELS,
)

transformers_version = Version(transformers_version)
# Transformers moved rotary embeddings out of all attention layers
IS_ATTENTION_REFACTOR = transformers_version > Version("4.47.1")
try:
    from transformers.modeling_layers import GradientCheckpointingLayer
except:
    GradientCheckpointingLayer = type(None)

from transformers.models.llama.modeling_llama import (
    logger,
    BaseModelOutputWithPast,
    CausalLMOutputWithPast,
)
from transformers.modeling_attn_mask_utils import (
    _prepare_4d_causal_attention_mask_for_sdpa,
)
from ..kernels import *
from ..tokenizer_utils import *
from .vision import FastBaseModel

# Final patching code
from transformers.models.llama.modeling_llama import (
    LlamaAttention,
    LlamaDecoderLayer,
    LlamaModel,
    LlamaForCausalLM,
)

# For Pytorch 2.1.1
try:
    from transformers.models.llama.modeling_llama import (
        LlamaSdpaAttention,
        LlamaFlashAttention2,
    )
except:
    LlamaSdpaAttention = LlamaAttention
    LlamaFlashAttention2 = LlamaAttention

from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    AutoModelForSequenceClassification,
    BitsAndBytesConfig,
    AutoConfig,
)
from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING
from transformers import set_seed as transformers_set_seed
from peft import LoraConfig, TaskType, get_peft_model as _get_peft_model
from peft import PeftModelForCausalLM, PeftModelForSequenceClassification
from ..save import patch_saving_functions
import re, os, inspect, math, sys
import types

try:
    from huggingface_hub.utils import get_token
except:
    # Old HF Hub versions <= 0.0.25
    from huggingface_hub.utils._token import get_token
from triton import __version__ as triton_version

HAS_XFORMERS = xformers is not None
BlockDiagonalCausalMask = (
    xformers.attn_bias.BlockDiagonalCausalMask if HAS_XFORMERS else None
)

if DEVICE_TYPE == "xpu":
    clean_gpu_cache = torch.xpu.empty_cache
    get_current_device = torch.xpu.current_device
else:
    clean_gpu_cache = torch.cuda.empty_cache
    get_current_device = torch.cuda.current_device


def original_apply_qkv(self, X):
    Q = self.q_proj(X)
    K = self.k_proj(X)
    V = self.v_proj(X)
    return Q, K, V


def original_apply_o(self, X):
    O = self.o_proj(X)
    return O


from math import sqrt as math_sqrt

KV_CACHE_INCREMENT = 512  # KV Cache update size
torch_nn_functional_softmax = torch.nn.functional.softmax
# SDPA has GQA internally
SDPA_HAS_GQA = "enable_gqa" in scaled_dot_product_attention.__doc__

from peft.utils.other import ModulesToSaveWrapper


def _offload_frozen_module_for_training(
    module: ModulesToSaveWrapper,
    device_type: str,
    offload_device: Optional[str] = "cpu",
) -> None:
    """
    Offload frozen module to CPU and configure trainable copy for mixed precision training.

    This function optimizes memory usage by:
    1. Moving the trainable copy to the target device with appropriate precision
    2. Optionally offloading the original frozen module to CPU/disk to free VRAM
    3. Converting float16 to float32 for compatibility with certain GPUs (e.g., Tesla T4)

    Args:
        module: The module to configure. Must be a ModulesToSaveWrapper with a
            `modules_to_save` attribute containing trainable and original modules.
        device_type: Target device string for training (e.g., "cuda:0", "xpu:0")
        offload_device: Device to offload frozen parameters (default: "cpu").
            If None, the original frozen module remains on its current device.
            Note: Currently only "cpu" is supported; disk offloading is planned.

    Returns:
        None (modifies module in-place)

    Note:
        - Float16 weights are automatically promoted to float32 for GPU compatibility
        - When offload_device is specified, frozen parameters are moved to free VRAM
        - Future versions will support disk-based offloading for even larger models

    See Also:
        - https://github.com/unslothai/unsloth/pull/1200 (Tesla T4 float32 requirement)
    """
    # Early return with explicit None if module doesn't support mixed precision training
    if not hasattr(module, "modules_to_save"):
        return None

    new_dtype = module.modules_to_save.default.weight.dtype
    if new_dtype == torch.float16:
        # See https://github.com/unslothai/unsloth/pull/1200
        # Tesla T4 must use float32 and not float16
        new_dtype = torch.float32

    module.modules_to_save.default.to(
        device = device_type, dtype = new_dtype, non_blocking = True
    )
    module.modules_to_save.default.requires_grad_(True)

    # [TODO] Move old module to CPU - should be disk!
    if offload_device is not None:
        module.original_module.to(device = offload_device, non_blocking = True)
    module.original_module.requires_grad_(False)


# Fix new HF's inference code
def _fast_prepare_inputs_for_generation(
    self,
    input_ids,
    attention_mask = None,
    inputs_embeds = None,
    **kwargs,
):
    past_key_values = kwargs.get("past_key_values", None)
    original_attention_mask = attention_mask

    # Handle inputs_embeds - only use on FIRST generation step (no cache)
    # This fixes GitHub issue #3798: inputs_embeds was ignored
    use_inputs_embeds = inputs_embeds is not None and past_key_values is None

    if input_ids is not None and input_ids.numel() > 0:
        bs, seq_length = input_ids.shape
        device = input_ids.device
    elif inputs_embeds is not None:
        bs, seq_length, _ = inputs_embeds.shape
        device = inputs_embeds.device
    else:
        bs, seq_length = 1, 0
        device = "cuda" if torch.cuda.is_available() else "cpu"

    if past_key_values is not None:
        # Check for uninitialized DynamicCache
        if len(past_key_values) == 0:
            past_key_values = None
            kwargs["past_key_values"] = None
            use_inputs_embeds = inputs_embeds is not None
        # New since 4.56
        elif (
            hasattr(past_key_values, "get_seq_length")
            and past_key_values.get_seq_length() == 0
        ):
            past_key_values = None
            kwargs["past_key_values"] = None
            use_inputs_embeds = inputs_embeds is not None
        else:
            if input_ids is not None and input_ids.numel() > 0:
                bs = input_ids.shape[0]
                input_ids = input_ids[:, [-1]]
                device = input_ids.device
                seq_length = 1
            elif inputs_embeds is not None:
                bs, seq_length, _ = inputs_embeds.shape
                device = inputs_embeds.device
            else:
                bs, seq_length = 1, 0
                device = "cuda" if torch.cuda.is_available() else "cpu"

            if hasattr(past_key_values, "get_seq_length"):
                past_len = int(past_key_values.get_seq_length())
            else:
                # legacy tuple cache: (layer, (K,V))
                past_len = int(past_key_values[0][0].shape[-2])

            max_cache_len = None
            if hasattr(past_key_values, "get_max_cache_shape"):
                m = past_key_values.get_max_cache_shape()
                max_cache_len = int(m) if m is not None and m > 0 else None
            elif hasattr(past_key_values, "get_max_length"):
                m = past_key_values.get_max_length()
                max_cache_len = int(m) if m is not None else None

            # ensure cache_position
            cache_position = kwargs.get("cache_position", None)
            if cache_position is None:
                kwargs["cache_position"] = torch.arange(
                    past_len,
                    past_len + seq_length,
                    device = device,
                    dtype = torch.long,
                )
            else:
                if (
                    hasattr(cache_position, "device")
                    and cache_position.device != device
                ):
                    kwargs["cache_position"] = cache_position.to(device)

            # Get to the base model
            base_model = self
            if hasattr(base_model, "base_model_prefix"):
                base_model = getattr(base_model, base_model.base_model_prefix)

            if hasattr(
                base_model, "_prepare_4d_causal_attention_mask_with_cache_position"
            ):
                if not hasattr(base_model, "_unsloth_mask_needs_device"):

                    def _check_needs_device(fn) -> bool:
                        try:
                            sig = inspect.signature(inspect.unwrap(fn))
                            return "device" in sig.parameters
                        except:
                            # transformers <= 4.51.3 includes device arg but > 4.51.3 does not
                            return transformers_version < Version("4.52.0")

                    base_model._unsloth_mask_needs_device = _check_needs_device(
                        base_model._prepare_4d_causal_attention_mask_with_cache_position
                    )

                if max_cache_len is not None:
                    target_length = max_cache_len
                elif (
                    original_attention_mask is not None
                    and original_attention_mask.dim() == 2
                ):
                    target_length = original_attention_mask.shape[-1]
                else:
                    target_length = past_len + seq_length

                mask_kwargs = {
                    "sequence_length": seq_length,
                    "target_length": target_length,
                    "dtype": self.dtype,
                    "cache_position": kwargs["cache_position"],
                    "batch_size": bs,
                    "config": self.config,
                    "past_key_values": past_key_values,
                }
                if base_model._unsloth_mask_needs_device:
                    mask_kwargs["device"] = device

                attention_mask = (
                    base_model._prepare_4d_causal_attention_mask_with_cache_position(
                        attention_mask,
                        **mask_kwargs,
                    )
                )
            else:
                if transformers_version <= Version("4.52.4"):
                    logger.warning_once(
                        f"{self.__class__.__name__} has no `_prepare_4d_causal_attention_mask_with_cache_position` method "
                        "defined in its base modeling class. Compiled forward passes will be sub-optimal. If you're "
                        "writing code, see Llama for an example implementation. If you're a user, please report this "
                        "issue on GitHub."
                    )

    if kwargs.get("position_ids", None) is None:
        if original_attention_mask is not None and original_attention_mask.dim() == 2:
            position_ids = original_attention_mask.long().cumsum(-1) - 1
            position_ids.masked_fill_(original_attention_mask == 0, 1)
            position_ids = position_ids[:, -seq_length:]
            kwargs["position_ids"] = position_ids
        elif kwargs.get("cache_position", None) is not None:
            cp = kwargs["cache_position"]
            if cp.dim() == 1:
                cp = cp.unsqueeze(0).expand(bs, -1)
            kwargs["position_ids"] = cp

    result = {
        "attention_mask": attention_mask,
        **kwargs,
    }
    if use_inputs_embeds:
        result["inputs_embeds"] = inputs_embeds
        result["input_ids"] = None
    else:
        result["input_ids"] = input_ids
    return result


def fix_prepare_inputs_for_generation(module):
    # Fix prepare_inputs_for_generation
    if hasattr(module, "prepare_inputs_for_generation"):
        module.prepare_inputs_for_generation = _fast_prepare_inputs_for_generation


torch_matmul = torch.matmul


def LlamaAttention_fast_forward_inference(
    self,
    hidden_states: torch.Tensor,
    past_key_value: Optional[Tuple[torch.Tensor]],
    position_ids,
    do_prefill = False,
    attention_mask = None,
    rotary_seq_len = None,
):
    """
    https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L406
    Fast inference using KV cache.
    QK^T can be computed in 4 chunks

    [Q, q] @ [K, k].T where q, k are the new tokens.
    [QK^T, Qk^T]
    [qK^T, qk^T]

    Since the attention mask wipes Qk^T, we just get
    [QK^T,    0]
    [qK^T, qk^T]

    Since softmax is row-wise, we get
    softmax([QK^T,    0])
    softmax([qK^T, qk^T])

    We then multiply by   [V]
                          [v]
    softmax([QK^T,    0]) [softmax(QK^T)V] *
    softmax([qK^T, qk^T]) [softmax([qK^T, qk^T]) @ [V, v]]

    But notice * [softmax(QK^T)V] is just the last attention.
    We just need to compute the last final row.

    This means we can pass in a row of Q, but we need to
    remember K and V, which are called the KV cache.
    """
    Xn = hidden_states
    bsz, _, hd = hidden_states.size()
    K1, V1 = past_key_value
    dtype = Xn.dtype

    n_heads = self.config.num_attention_heads
    n_groups = self.num_key_value_groups
    n_kv_heads = self.config.num_key_value_heads
    head_dim = self.head_dim
    # assert(n_kv_heads * n_groups == n_heads)

    hidden_size = self.config.hidden_size
    attention_size = n_heads * head_dim
    seq_len = K1.shape[-2]
    kv_seq_len = seq_len + 1

    # Prefill phase
    # if not hasattr(self, "paged_attention"):
    device = hidden_states.device
    if do_prefill:
        self.paged_attention = torch.empty(
            (KV_CACHE_INCREMENT + seq_len + 1, 2, bsz, n_kv_heads, head_dim),
            dtype = dtype,
            device = device,
        )
        self.paged_attention_K = self.paged_attention[:, 0]
        self.paged_attention_V = self.paged_attention[:, 1]
        self.paged_attention_K[:seq_len] = K1.permute(2, 0, 1, 3)
        self.paged_attention_V[:seq_len] = V1.permute(2, 0, 1, 3)
        self.temp_QA = torch.empty(
            (2, bsz, 1, attention_size), dtype = dtype, device = device
        )
        self.temp_KV = torch.empty(
            (2, bsz, 1, n_kv_heads * head_dim), dtype = dtype, device = device
        )
        self.RH_Q = torch.empty((bsz, n_heads, 1, head_dim), dtype = dtype, device = device)

        # Mistral Nemo 12b has weird dimensions
        if attention_size != hidden_size:
            self.temp_O = torch.empty((bsz, 1, hidden_size), dtype = dtype, device = device)
        else:
            self.temp_O = self.temp_QA[1][:, :, :hidden_size]

        self.attention = torch.empty(
            (bsz, n_heads, 1, KV_CACHE_INCREMENT + seq_len), dtype = dtype, device = device
        )
        self.scalar = 1.0 / math_sqrt(self.head_dim)
        self.half_head_dim = head_dim // 2
    elif kv_seq_len >= self.paged_attention.shape[0]:
        self.paged_attention.resize_(
            (
                self.paged_attention.shape[0] + KV_CACHE_INCREMENT,
                2,
                bsz,
                n_kv_heads,
                head_dim,
            )
        )
        self.paged_attention_K = self.paged_attention[:, 0]
        self.paged_attention_V = self.paged_attention[:, 1]
        self.attention.resize_(
            (bsz, n_heads, 1, self.attention.shape[-1] + KV_CACHE_INCREMENT)
        )

    Qn = fast_linear_forward(self.q_proj, Xn, out = self.temp_QA[0])
    Kn = fast_linear_forward(self.k_proj, Xn, out = self.temp_KV[0])
    Vn = fast_linear_forward(self.v_proj, Xn, out = self.temp_KV[1])
    Qn = Qn.view(bsz, 1, n_heads, head_dim).transpose(1, 2)
    Kn = Kn.view(bsz, 1, n_kv_heads, head_dim).transpose(1, 2)
    Vn = Vn.view(bsz, 1, n_kv_heads, head_dim).transpose(1, 2)

    # cos, sin = self.rotary_emb(Vn, seq_len = kv_seq_len)
    # Qn, Kn = inplace_rope_embedding(Qn, Kn, cos, sin, position_ids)

    # Need to do it prior 2 steps before hitting full on short KV cache
    # or else error
    # ensure correct shape
    if position_ids.dim() == 1:
        position_ids = position_ids[:, None]
    position_ids = position_ids.to(Qn.device)

    if rotary_seq_len is None:
        rotary_seq_len = max(kv_seq_len, int(position_ids.max().item()) + 1)
    self.rotary_emb.extend_rope_embedding(Vn, rotary_seq_len + 1)  # +1 slack
    cos, sin = self.rotary_emb.get_cached(rotary_seq_len, Qn.device.index or 0)

    cos = cos[position_ids].unsqueeze(1).to(device = Qn.device, dtype = Qn.dtype)
    sin = sin[position_ids].unsqueeze(1).to(device = Qn.device, dtype = Qn.dtype)

    h = self.half_head_dim

    RH_Q = self.RH_Q
    RH_Q[:, :, :, :h] = Qn[:, :, :, h:]
    RH_Q[:, :, :, h:] = Qn[:, :, :, :h]
    RH_Q[:, :, :, :h].neg_()  # torch.neg(RH_Q[:,:,:,:h], out = RH_Q[:,:,:,:h])
    Qn *= cos
    Qn.addcmul_(RH_Q, sin)

    RH_K = RH_Q[
        :, :n_kv_heads, :, :
    ]  # torch.empty((n_kv_heads, 1, head_dim), dtype = dtype, device = "cuda:0")
    RH_K[:, :, :, :h] = Kn[:, :, :, h:]
    RH_K[:, :, :, h:] = Kn[:, :, :, :h]
    RH_K[:, :, :, :h].neg_()  # torch.neg(RH_K[:,:,:,:h], out = RH_K[:,:,:,:h])
    Kn *= cos
    Kn.addcmul_(RH_K, sin)

    # New KV cache
    # Kn = torch.cat([K1, Kn], dim = 2)
    # Vn = torch.cat([V1, Vn], dim = 2)
    self.paged_attention_K[seq_len] = Kn.permute(2, 0, 1, 3)
    self.paged_attention_V[seq_len] = Vn.permute(2, 0, 1, 3)
    Kn = self.paged_attention_K[:kv_seq_len].permute(1, 2, 0, 3)
    Vn = self.paged_attention_V[:kv_seq_len].permute(1, 2, 0, 3)

    # Handle sliding windows
    sliding_window = getattr(self.config, "sliding_window", None)
    if sliding_window is not None and kv_seq_len > sliding_window:
        # From https://github.com/huggingface/transformers/blob/main/src/transformers/models/mistral/modeling_mistral.py#L193
        start = kv_seq_len - sliding_window
        Knn = Kn[:, :, start:, :]  # .contiguous()
        Vnn = Vn[:, :, start:, :]  # .contiguous()
        if attention_mask is not None:
            attention_mask = attention_mask[..., start:]
    else:
        Knn, Vnn = Kn, Vn

    # Grouped query attention
    _, _, cached_len, _ = Knn.shape
    if bsz == 1 or ((not SDPA_HAS_GQA) and n_groups != 1):
        Knn = Knn[:, :, None, :, :].expand(
            bsz, n_kv_heads, n_groups, cached_len, head_dim
        )
        Vnn = Vnn[:, :, None, :, :].expand(
            bsz, n_kv_heads, n_groups, cached_len, head_dim
        )
        Knn = Knn.reshape(bsz, n_heads, cached_len, head_dim)
        Vnn = Vnn.reshape(bsz, n_heads, cached_len, head_dim)

    # when qlen==vlen and attn_mask is None, we should use causal attention
    Q_len = Qn.shape[-2]
    K_len = Knn.shape[-2]
    if attention_mask is None and Q_len == K_len:
        is_causal = True
    else:
        is_causal = False
    # Attention
    if bsz == 1:
        Qn *= self.scalar  # See https://github.com/ggerganov/llama.cpp/issues/7805#issuecomment-2153349963
        # It seems like doing (Q * scalar) @ K is better than (Q @ K) * scalar to stop overflows
        A = torch_matmul(
            Qn, Knn.transpose(2, 3), out = self.attention[:, :, :, :cached_len]
        )
        A[:] = torch_nn_functional_softmax(
            A, dim = -1, dtype = torch.float32
        )  # .to(A.dtype)
        A = torch_matmul(A, Vnn, out = Qn)
    # --- attention_mask fixup for SDPA if user passes 2D padding mask
    else:
        if attention_mask is not None and attention_mask.dim() == 2:
            attention_mask = attention_mask[:, None, None, :].to(torch.bool)
            # is it more appropriate to use _prepare_4d_causal_attention_mask_for_sdpa?
        elif (
            attention_mask is not None
            and attention_mask.dim() == 4
            and attention_mask.dtype != torch.bool
        ):
            # Decode is more stable with boolean keep masks than additive bf16 masks.
            attention_mask = attention_mask.eq(0)

        if SDPA_HAS_GQA:
            A = scaled_dot_product_attention(
                Qn,
                Knn,
                Vnn,
                attn_mask = attention_mask,
                is_causal = is_causal,
                enable_gqa = True,
            )
        else:
            A = scaled_dot_product_attention(
                Qn, Knn, Vnn, attn_mask = attention_mask, is_causal = is_causal
            )
    A = A.transpose(1, 2)
    A = A.reshape(bsz, 1, attention_size)
    A = fast_linear_forward(self.o_proj, A, out = self.temp_O)
    return A, (Kn, Vn)


torch_nn_functional_silu = torch.nn.functional.silu


def fast_swiglu_inference(
    self, X, temp_gate = None, temp_up = None, gate_multiplier = None, down_multiplier = None
):
    # gate = self.gate_proj(X)
    # up   = self.up_proj(X)
    bsz, _, hd = X.shape
    # mlp_size = self.config.intermediate_size
    # temp = torch.empty((2, bsz, 1, mlp_size), dtype = X.dtype, device = "cuda:0")

    gate = fast_linear_forward(self.gate_proj, X, out = temp_gate)

    if gate_multiplier is not None:
        gate *= gate_multiplier

    up = fast_linear_forward(self.up_proj, X, out = temp_up)

    gate = torch_nn_functional_silu(gate, inplace = True)
    gate *= up

    # X = self.down_proj(gate)
    down = fast_linear_forward(self.down_proj, gate, out = up[:, :, :hd])

    if down_multiplier is not None:
        down *= down_multiplier

    return down


torch_square = torch.square
torch_mean = torch.mean


def fast_rms_layernorm_inference(self, X, XX = None, XX2 = None, variance = None):
    old_dtype = X.dtype
    if XX is None:
        XX = X.to(torch.float32)
        variance = XX.square().mean(-1, keepdim = True)
    else:
        XX.copy_(X)
        torch_mean(torch_square(XX, out = XX2), -1, keepdim = True, out = variance)
    variance += self.variance_epsilon
    XX *= variance.rsqrt_()

    if XX is None:
        X = XX.to(old_dtype)
    else:
        X.copy_(XX)

    X *= self.weight
    return X


def fast_rms_layernorm_inference_gemma(self, X, out_weight = None):
    XX = X.to(torch.float32)
    variance = XX.square().mean(-1, keepdim = True)
    variance += self.variance_epsilon
    XX *= variance.rsqrt_()

    if out_weight is None:
        out_weight = self.weight + 1.0
    else:
        out_weight[:] = self.weight
        out_weight += 1.0

    XX *= out_weight
    return XX.to(X.dtype)


# Normal layernorm with mean removal
@torch.compile(fullgraph = False, dynamic = True, options = torch_compile_options)
def fast_layernorm_compiled(layernorm, X):
    old_dtype = X.dtype
    X = X.float()
    mean = X.mean(-1, keepdim = True)
    Xbar = X - mean
    X = (
        Xbar
        * torch.rsqrt(Xbar.square().mean(-1, keepdim = True) + layernorm.variance_epsilon)
        * layernorm.weight.float()
    )
    return X.to(old_dtype)


# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L320
def LlamaAttention_fast_forward(
    self,
    hidden_states: torch.Tensor,
    causal_mask: Optional[BlockDiagonalCausalMask] = None,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    past_key_value: Optional[Tuple[torch.Tensor]] = None,
    output_attentions: bool = False,
    use_cache: bool = False,
    padding_mask: Optional[torch.LongTensor] = None,
    position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
    *args,
    **kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
    # Clear inference
    if hasattr(self, "paged_attention"):
        del self.paged_attention_K
        del self.paged_attention_V
        del self.paged_attention
        del self.temp_QA
        del self.temp_KV
        del self.RH_Q
        del self.attention
    bsz, q_len, _ = hidden_states.size()

    n_heads = self.config.num_attention_heads
    n_groups = self.num_key_value_groups
    n_kv_heads = self.config.num_key_value_heads
    head_dim = self.head_dim
    assert n_kv_heads * n_groups == n_heads

    Q, K, V = self.apply_qkv(self, hidden_states)
    Q = Q.view(bsz, q_len, n_heads, head_dim).transpose(1, 2)
    K = K.view(bsz, q_len, n_kv_heads, head_dim).transpose(1, 2)
    V = V.view(bsz, q_len, n_kv_heads, head_dim).transpose(1, 2)
    seq_info = get_packed_info_from_kwargs(kwargs, Q.device)

    kv_seq_len = K.shape[-2]
    if past_key_value is not None:
        kv_seq_len += past_key_value[0].shape[-2]

    if position_embeddings and kv_seq_len <= position_embeddings[0].shape[0]:
        cos, sin = position_embeddings
    else:
        rotary_emb = self.rotary_emb
        rotary_emb.extend_rope_embedding(V, seq_len = kv_seq_len)
        cos, sin = rotary_emb.get_cached(kv_seq_len, Q.device.index)
        cos = cos.to(device = Q.device, dtype = Q.dtype)
        sin = sin.to(device = Q.device, dtype = Q.dtype)

    rope_position_ids = position_ids
    if rope_position_ids is None and seq_info is not None:
        rope_position_ids = kwargs.get("position_ids")

    # Q, K = (
    #     fast_rope_embedding(Q, K, cos, sin)
    #     if rope_position_ids is None
    #     else inplace_rope_embedding(Q, K, cos, sin, rope_position_ids)
    # )
    Q, K = fast_rope_embedding(Q, K, cos, sin, rope_position_ids)

    if past_key_value is not None:
        K = torch.cat([past_key_value[0], K], dim = 2)
        V = torch.cat([past_key_value[1], V], dim = 2)
    past_key_value = (K, V) if use_cache else None

    # Attention module
    use_varlen = seq_info is not None and past_key_value is None
    backend = (
        SDPA if attention_mask is not None else select_attention_backend(use_varlen)
    )

    # should dropout be hardcoded to 0.0?
    config = AttentionConfig(
        backend = backend,
        n_kv_heads = n_kv_heads,
        n_groups = n_groups,
        flash_dense_kwargs = {"causal": True},
        flash_varlen_kwargs = {"dropout_p": 0.0, "causal": True},
    )
    context = AttentionContext(
        bsz = bsz,
        q_len = q_len,
        kv_seq_len = kv_seq_len,
        n_heads = n_heads,
        head_dim = head_dim,
        requires_grad = hidden_states.requires_grad,
        seq_info = seq_info,
        attention_mask = attention_mask,
        causal_mask = causal_mask,
    )

    A = run_attention(config = config, context = context, Q = Q, K = K, V = V)
    attn_output = A.reshape(bsz, q_len, n_heads * head_dim)
    attn_output = self.apply_o(self, attn_output)
    attn_weights = None
    return attn_output, attn_weights, past_key_value


# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L590
def LlamaDecoderLayer_fast_forward(
    self,
    hidden_states: torch.Tensor,
    causal_mask = None,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    past_key_value: Optional[Tuple[torch.Tensor]] = None,
    output_attentions: Optional[bool] = False,
    use_cache: Optional[bool] = False,
    padding_mask: Optional[torch.LongTensor] = None,
    position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
    *args,
    **kwargs,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
    """
    Args:
        hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
        attention_mask (`torch.FloatTensor`, *optional*): attention mask of size
            `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under
            returned tensors for more detail.
        use_cache (`bool`, *optional*):
            If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
            (see `past_key_values`).
        past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
    """
    if use_cache and hasattr(self, "_flag_for_generation"):
        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference(
            self.input_layernorm, hidden_states
        )
        hidden_states, self_attn_weights, present_key_value = self.self_attn(
            hidden_states = hidden_states,
            causal_mask = causal_mask,
            attention_mask = attention_mask,
            position_ids = position_ids,
            past_key_value = past_key_value,
            output_attentions = output_attentions,
            use_cache = use_cache,
            padding_mask = padding_mask,
            position_embeddings = position_embeddings,
            **kwargs,
        )
        hidden_states += residual

        # Fully Connected
        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference(
            self.post_attention_layernorm, hidden_states
        )
        hidden_states = fast_swiglu_inference(self.mlp, hidden_states)
        hidden_states += residual
    else:
        residual = hidden_states
        hidden_states = fast_rms_layernorm(self.input_layernorm, hidden_states)
        hidden_states, self_attn_weights, present_key_value = self.self_attn(
            hidden_states = hidden_states,
            causal_mask = causal_mask,
            attention_mask = attention_mask,
            position_ids = position_ids,
            past_key_value = past_key_value,
            output_attentions = output_attentions,
            use_cache = use_cache,
            padding_mask = padding_mask,
            position_embeddings = position_embeddings,
            **kwargs,
        )
        hidden_states = residual + hidden_states

        # Fully Connected
        residual = hidden_states
        hidden_states = fast_rms_layernorm(self.post_attention_layernorm, hidden_states)
        hidden_states = self.mlp(hidden_states)
        hidden_states = residual + hidden_states

    outputs = (hidden_states,)
    if output_attentions:
        outputs += (self_attn_weights,)
    if use_cache:
        outputs += (present_key_value,)
    return outputs


# https://github.com/unslothai/unsloth/issues/404#issuecomment-2323473452
__DTYPE_MAP = {
    "float32": torch.float32,
    torch.float32: torch.float32,
    "float16": torch.float16,
    torch.float16: torch.float16,
    "bfloat16": torch.bfloat16,
    torch.bfloat16: torch.bfloat16,
}


# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L825
def LlamaModel_fast_forward(
    self,
    input_ids: Optional[torch.LongTensor] = None,
    causal_mask: Optional[BlockDiagonalCausalMask] = None,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    past_key_values: Optional[List[torch.FloatTensor]] = None,
    inputs_embeds: Optional[torch.FloatTensor] = None,
    use_cache: Optional[bool] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    return_dict: Optional[bool] = None,
    *args,
    **kwargs,
) -> Union[Tuple, BaseModelOutputWithPast]:
    output_attentions = (
        output_attentions
        if output_attentions is not None
        else self.config.output_attentions
    )
    assert output_attentions is False
    output_hidden_states = (
        output_hidden_states
        if output_hidden_states is not None
        else self.config.output_hidden_states
    )
    use_cache = use_cache if use_cache is not None else self.config.use_cache

    return_dict = (
        return_dict if return_dict is not None else self.config.use_return_dict
    )

    # retrieve input_ids and inputs_embeds
    if input_ids is not None and inputs_embeds is not None:
        raise ValueError(
            "Unsloth: You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time"
        )
    elif input_ids is not None:
        batch_size, seq_length = input_ids.shape
    elif inputs_embeds is not None:
        batch_size, seq_length, _ = inputs_embeds.shape
    else:
        raise ValueError(
            "Unsloth: You have to specify either decoder_input_ids or decoder_inputs_embeds"
        )

    seq_length_with_past = seq_length

    # Fix out of bounds tokenization unless we were given packed metadata
    allow_overlength = getattr(self, "_unsloth_allow_packed_overlength", False) or (
        "packed_seq_lengths" in kwargs
    )
    if hasattr(self, "max_seq_length") and not allow_overlength:
        if seq_length > self.max_seq_length:
            shape = input_ids.shape if input_ids is not None else inputs_embeds.shape
            logger.warning_once(
                f"Unsloth: Input IDs of shape {shape} with length {seq_length} > the model's max sequence length of {self.max_seq_length}.\n"
                "We shall truncate it ourselves. It's imperative if you correct this issue first."
            )
        if input_ids is not None:
            input_ids = input_ids[:, : self.max_seq_length]
        elif inputs_embeds is not None:
            inputs_embeds = inputs_embeds[:, : self.max_seq_length, :]
        if (
            attention_mask is not None
            and attention_mask.shape[-1] > self.max_seq_length
        ):
            attention_mask = attention_mask[:, : self.max_seq_length]

    past_key_values_length = 0

    if past_key_values is not None:
        past_key_values_length = past_key_values[0][0].shape[2]
        seq_length_with_past = seq_length_with_past + past_key_values_length

    # We already handle KV cache position_ids ourselves.
    if False:  # (past_key_values_length != 0):
        position_ids = torch.arange(
            past_key_values_length,
            seq_length + past_key_values_length,
            dtype = torch.int32,
            device = f"{DEVICE_TYPE_TORCH}:0",
        )
        position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
    elif position_ids is not None:
        position_ids = position_ids.view(-1, seq_length).to(torch.int32)  # .long()
    else:
        position_ids = None

    if position_ids is not None:
        if position_ids.shape[0] != batch_size:
            position_ids = position_ids.repeat((batch_size, 1))

    # Embed positions
    if inputs_embeds is None:
        inputs_embeds = self.embed_tokens(input_ids)

    inputs_embeds = inputs_embeds.to(_get_dtype(dtype_from_config(self.config)))

    # Normalized from Gemma
    IS_GEMMA = self.config.model_type.startswith("gemma")
    IS_GEMMA2 = self.config.model_type.startswith("gemma2")
    IS_COHERE = self.config.model_type.startswith("cohere")
    IS_GRANITE = self.config.model_type.startswith("granite")
    IS_FALCON_H1 = self.config.model_type.startswith("falcon_h1")

    train_embed_tokens = self.embed_tokens.weight.requires_grad

    if IS_GEMMA:
        # Match Gemma exactly by casting to bfloat16 / float16
        # inputs_embeds *= math_sqrt(self.config.hidden_size)
        # Ie 3072**0.5 = 55.5000 in bfloat16, whilst 55.4256 in float32
        # &  2048**0.5 = 45.2500 in bfloat16, whilst 45.2548 in float32
        normalizer = torch.tensor(
            math_sqrt(self.config.hidden_size), dtype = inputs_embeds.dtype
        )

        if train_embed_tokens:
            # Careful we must not do an inplace op!
            inputs_embeds = inputs_embeds * normalizer
        else:
            inputs_requires_grad = inputs_embeds.requires_grad
            if not inputs_embeds.is_leaf:
                inputs_embeds = inputs_embeds.detach()
                inputs_requires_grad = True
            elif inputs_requires_grad:
                inputs_embeds.requires_grad_(False)
            inputs_embeds *= normalizer
            # inputs_embeds *= math_sqrt(self.config.hidden_size)
            if inputs_requires_grad:
                inputs_embeds.requires_grad_(True)

    # Fix up attention mask by setting elements to 0
    # Specifically for DPO
    if (
        getattr(self, "_has_no_labels", False) is True
        and (attention_mask is not None)
        and (past_key_values is None)
        and (not train_embed_tokens)
        and self.training
    ):
        # Careful for inference the attention_mask is size (1, kv_seq_len)
        # Whilst the input_embeds is size (1, 1, 4096)
        inputs_requires_grad = inputs_embeds.requires_grad
        if not inputs_embeds.is_leaf:
            inputs_embeds = inputs_embeds.detach()
            inputs_requires_grad = True
        elif inputs_requires_grad:
            inputs_embeds.requires_grad_(False)
        attention_mask = attention_mask[:, : self.max_seq_length]  # Must resize!
        inputs_embeds *= attention_mask.unsqueeze(0).transpose(0, 1).transpose(1, 2)
        if inputs_requires_grad:
            inputs_embeds.requires_grad_(True)

    # Ignore attention_mask
    if attention_mask is None:
        padding_mask = None
    elif self.training:
        attention_mask = None
        padding_mask = None
    else:
        # if 0 in attention_mask:
        #     padding_mask = attention_mask
        # else:
        padding_mask = None

        attention_mask = _prepare_4d_causal_attention_mask_for_sdpa(
            attention_mask,
            (batch_size, seq_length),
            inputs_embeds,
            past_key_values_length,
            sliding_window = getattr(self.config, "sliding_window", None),
        )
        # Must NOT convert to bool - weirdly this causes stuff to error out!
        # if attention_mask is not None:
        #     attention_mask = attention_mask.to(torch.bool)

    hidden_states = inputs_embeds
    if IS_GRANITE or IS_FALCON_H1:  # granite has embedding multiplier
        hidden_states = self.config.embedding_multiplier * hidden_states

    if past_key_values is None and self.training:
        use_cache = False
        # if use_cache:
        #     logger.warning_once(
        #         "Unsloth: `use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`"
        #     )
        #     use_cache = False

    # decoder layers
    all_hidden_states = () if output_hidden_states else None
    all_self_attns = () if output_attentions else None
    next_decoder_cache = () if use_cache else None

    # Gradient checkpointing methods (ie sqrt)
    if hasattr(self, "_gradient_checkpointing_boundaries"):
        boundaries = self._gradient_checkpointing_boundaries
    else:
        boundaries = None

    # Check checkpointing method
    gradient_checkpointing = False

    if self.gradient_checkpointing and self.training and not use_cache:
        gradient_checkpointing = True

    # Gemma2 has alternating SWA and global attn
    use_static_mask = True
    dynamic_SWA_mask = None
    dynamic_GA_mask = None
    if IS_GEMMA2:
        if HAS_FLASH_ATTENTION_SOFTCAPPING and attention_mask is None:
            self.SWA_mask = True
            self.GA_mask = False
        elif attention_mask is not None:
            # Fixes https://github.com/unslothai/unsloth/issues/853
            # Unsloth needs a 2D mask, not a [2, 1, n, n] mask!

            # https://github.com/pytorch/pytorch/issues/103749
            # Need to convert to float and not using bool
            # attention_mask = (1.0 - attention_mask.float()) * torch.finfo(inputs_embeds.dtype).min
            dynamic_SWA_mask = _prepare_4d_causal_attention_mask_for_sdpa(
                attention_mask,
                (batch_size, seq_length),
                inputs_embeds,
                past_key_values_length,
                sliding_window = self.config.sliding_window,
            )
            dynamic_GA_mask = _prepare_4d_causal_attention_mask_for_sdpa(
                attention_mask,
                (batch_size, seq_length),
                inputs_embeds,
                past_key_values_length,
                sliding_window = None,
            )
            use_static_mask = False

        elif not hasattr(self, "SWA_mask"):
            if HAS_FLEX_ATTENTION:
                # Use Flex Attention instead!
                self.SWA_mask = create_flex_attention_sliding_window_mask(
                    self.max_seq_length, self.config.sliding_window
                )
                self.GA_mask = create_flex_attention_causal_mask(self.max_seq_length)
            else:
                n = self.max_seq_length  # self.config.max_position_embeddings
                # masked_fill is making stuff slower!
                # self. GA_mask = create_boolean_mask(n = n, sliding_window = 0)
                # self.SWA_mask = create_boolean_mask(n = n, sliding_window = self.config.sliding_window)
                from transformers.modeling_attn_mask_utils import AttentionMaskConverter

                self.SWA_mask = (
                    AttentionMaskConverter(
                        is_causal = True,
                        sliding_window = self.config.sliding_window,
                    )
                    .to_causal_4d(
                        1,
                        n,
                        n,
                        dtype = inputs_embeds.dtype,
                        device = DEVICE_TYPE_TORCH,
                    )
                    .squeeze(0)
                    .squeeze(0)
                )

                self.GA_mask = (
                    AttentionMaskConverter(
                        is_causal = True,
                    )
                    .to_causal_4d(
                        1,
                        n,
                        n,
                        dtype = inputs_embeds.dtype,
                        device = DEVICE_TYPE_TORCH,
                    )
                    .squeeze(0)
                    .squeeze(0)
                )
            pass

    if (
        IS_ATTENTION_REFACTOR
        and (
            hasattr(self, "rotary_emb")
            or not hasattr(self.layers[0].self_attn, "rotary_emb")
        )
    ) or IS_GRANITE:
        # Transformers main has made it mandatory to pass position_embeddings
        # https://github.com/huggingface/transformers/pull/34858
        # Also, transformers 4.45.0 supports granite but with the attention refactor (it always had the refactor)
        # unsloth's check for granite too has "version >= 4.45.0 (rightly so)".
        # so let granite always use the attention refactor implementation.

        self.rotary_emb.extend_rope_embedding(
            hidden_states, self.config.max_position_embeddings
        )
        position_embeddings = self.rotary_emb.get_cached(
            self.config.max_position_embeddings, hidden_states.device.index
        )
    else:
        position_embeddings = None

    # Go through every layer!
    for idx, decoder_layer in enumerate(self.layers):
        if output_hidden_states:
            all_hidden_states += (hidden_states,)
        past_key_value = past_key_values[idx] if past_key_values is not None else None

        mask = causal_mask
        if IS_GEMMA2:
            use_sliding_window = idx % 2 == 0
            if use_sliding_window:
                mask = self.SWA_mask if use_static_mask else dynamic_SWA_mask
            else:
                mask = self.GA_mask if use_static_mask else dynamic_GA_mask
            kwargs["use_sliding_window"] = use_sliding_window

        if gradient_checkpointing and not isinstance(
            decoder_layer, GradientCheckpointingLayer
        ):

            def create_custom_forward(module):
                def custom_forward(*inputs):
                    return module(
                        *inputs,
                        past_key_value,
                        output_attentions,
                        padding_mask = padding_mask,
                        position_embeddings = position_embeddings,
                        **kwargs,
                    )

                return custom_forward

            layer_outputs = torch.utils.checkpoint.checkpoint(
                create_custom_forward(decoder_layer),
                hidden_states,
                mask,
                attention_mask,
                position_ids,
                use_reentrant = True,
                preserve_rng_state = False,
            )
            hidden_states = layer_outputs[0]

        else:
            layer_outputs = decoder_layer(
                hidden_states,
                causal_mask = mask,
                attention_mask = attention_mask,
                position_ids = position_ids,
                past_key_value = past_key_value,
                output_attentions = output_attentions,
                use_cache = use_cache,
                padding_mask = padding_mask,
                position_embeddings = position_embeddings,
                **kwargs,
            )
            hidden_states = layer_outputs[0]

        if use_cache:
            next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
        if output_attentions:
            all_self_attns += (layer_outputs[1],)

    # Final layernorm
    if use_cache:
        if IS_FALCON_H1:
            hidden_states = fast_rms_layernorm_inference(
                self.final_layernorm, hidden_states
            )
        else:
            hidden_states = (
                fast_rms_layernorm_inference_gemma
                if IS_GEMMA
                else fast_rms_layernorm_inference
            )(self.norm, hidden_states)
    elif IS_COHERE:
        hidden_states = self.norm(hidden_states)
    elif IS_FALCON_H1:
        hidden_states = fast_rms_layernorm(
            self.final_layernorm, hidden_states, gemma = IS_GEMMA
        )
    else:
        hidden_states = fast_rms_layernorm(self.norm, hidden_states, gemma = IS_GEMMA)

    if output_hidden_states:
        all_hidden_states += (hidden_states,)
    next_cache = next_decoder_cache if use_cache else None

    if not return_dict:
        return tuple(
            v
            for v in [hidden_states, next_cache, all_hidden_states, all_self_attns]
            if v is not None
        )
    return BaseModelOutputWithPast(
        last_hidden_state = hidden_states,
        past_key_values = next_cache,
        hidden_states = all_hidden_states,
        attentions = all_self_attns,
    )


# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L825
def _LlamaModel_fast_forward_inference(
    attention_fast_forward_inference = LlamaAttention_fast_forward_inference,
    mlp_fast_forward_inference = fast_swiglu_inference,
):
    # This makes the attention and MLP customisable.
    # Now for models like qwen3 or cohere which use custom attention operations, we can use this function
    def LlamaModel_fast_forward_inference_custom(
        self,
        input_ids,
        past_key_values,
        position_ids,
        attention_mask = None,
        **kwargs,
    ):
        input_ids = input_ids[:, : self.max_seq_length]
        bsz, q_len = input_ids.shape
        hd = self.config.hidden_size
        mlp_size = self.config.intermediate_size

        X = self.model.embed_tokens(input_ids)
        X = X.to(_get_dtype(dtype_from_config(self.config)))
        bsz, q_len, hd = X.shape
        assert q_len == 1
        # Get saved buffers to reduce memory movement
        residual = torch.empty(
            (bsz, q_len, hd), dtype = torch.float32, device = f"{DEVICE_TYPE_TORCH}:0"
        )
        _XX = torch.empty(
            (2, bsz, q_len, hd), dtype = torch.float32, device = f"{DEVICE_TYPE_TORCH}:0"
        )
        XX, XX2 = _XX[0], _XX[1]
        variance = torch.empty(
            (bsz, q_len, 1), dtype = torch.float32, device = f"{DEVICE_TYPE_TORCH}:0"
        )
        temp_mlp = torch.empty(
            (2, bsz, 1, mlp_size), dtype = X.dtype, device = f"{DEVICE_TYPE_TORCH}:0"
        )
        temp_gates, temp_ups = (
            tuple(temp_mlp[0].to(torch.device(x)) for x in range(DEVICE_COUNT)),
            tuple(temp_mlp[1].to(torch.device(x)) for x in range(DEVICE_COUNT)),
        )

        seq_len = past_key_values[0][0].shape[-2]
        kv_seq_len = seq_len + 1
        if attention_mask is not None:
            attention_mask = _prepare_4d_causal_attention_mask_for_sdpa(
                attention_mask,
                (bsz, q_len),
                X,
                seq_len,
                sliding_window = getattr(self.config, "sliding_window", None),
            )
            # Pre-convert to bool once for all layers (avoids per-layer .eq(0))
            if attention_mask is not None and attention_mask.dtype != torch.bool:
                attention_mask = attention_mask.eq(0)
        else:
            attention_mask = None

        # Compute rotary_seq_len once to avoid per-layer GPU-CPU sync from .item()
        rotary_seq_len = max(kv_seq_len, int(position_ids.max().item()) + 1)

        next_decoder_cache = []

        for idx, decoder_layer in enumerate(self.model.layers):
            device_index = getattr(decoder_layer, "_per_layer_device_index", 0)
            X, residual, position_ids = move_to_device(
                device_index, X, residual, position_ids
            )
            residual.copy_(X)  # residual = X
            X = fast_rms_layernorm_inference(
                decoder_layer.input_layernorm,
                X,
                XX = XX,
                XX2 = XX2,
                variance = variance,
            )
            X, present_key_value = attention_fast_forward_inference(
                decoder_layer.self_attn,
                hidden_states = X,
                past_key_value = past_key_values[idx],
                position_ids = position_ids,
                attention_mask = attention_mask,
                do_prefill = not hasattr(decoder_layer.self_attn, "paged_attention"),
                rotary_seq_len = rotary_seq_len,
            )
            X += residual

            residual.copy_(X)  # residual = X
            X = fast_rms_layernorm_inference(
                decoder_layer.post_attention_layernorm,
                X,
                XX = XX,
                XX2 = XX2,
                variance = variance,
            )
            X = mlp_fast_forward_inference(
                decoder_layer.mlp,
                X,
                temp_gate = temp_gates[device_index],
                temp_up = temp_ups[device_index],
            )
            X += residual

            next_decoder_cache.append(present_key_value)
        X = fast_rms_layernorm_inference(
            self.model.norm,
            X,
            XX = XX,
            XX2 = XX2,
            variance = variance,
        )

        return BaseModelOutputWithPast(
            last_hidden_state = X,
            past_key_values = next_decoder_cache,
            hidden_states = [],
            attentions = [],
        )

    return LlamaModel_fast_forward_inference_custom


# For ensuring backwards compatibility, we create LlamaModel_fast_forward_inference that is consumed by other models
LlamaModel_fast_forward_inference = _LlamaModel_fast_forward_inference()


def CausalLM_fast_forward(fast_forward_inference):
    def _CausalLM_fast_forward(
        self,
        input_ids: torch.LongTensor = None,
        causal_mask: Optional[BlockDiagonalCausalMask] = None,
        attention_mask: Optional[torch.Tensor] = None,
        position_ids: Optional[torch.LongTensor] = None,
        past_key_values: Optional[List[torch.FloatTensor]] = None,
        inputs_embeds: Optional[torch.FloatTensor] = None,
        labels: Optional[torch.LongTensor] = None,
        use_cache: Optional[bool] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
        num_logits_to_keep: Optional[int] = 0,
        logits_to_keep: Optional[int] = 0,
        *args,
        **kwargs,
    ) -> Union[Tuple, CausalLMOutputWithPast]:
        if past_key_values is not None:
            outputs = fast_forward_inference(
                self,
                input_ids,
                past_key_values,
                position_ids = position_ids,
                attention_mask = attention_mask,
                **kwargs,
            )
        else:
            causal_mask = (
                xformers.attn_bias.LowerTriangularMask() if HAS_XFORMERS else None
            )

            output_attentions = (
                output_attentions
                if output_attentions is not None
                else self.config.output_attentions
            )
            output_hidden_states = (
                output_hidden_states
                if output_hidden_states is not None
                else self.config.output_hidden_states
            )
            return_dict = (
                return_dict if return_dict is not None else self.config.use_return_dict
            )
            # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
            self.model._has_no_labels = labels is None
            outputs = self.model(
                input_ids = input_ids,
                causal_mask = causal_mask,
                attention_mask = attention_mask,
                position_ids = position_ids,
                past_key_values = past_key_values,
                inputs_embeds = inputs_embeds,
                use_cache = use_cache,
                output_attentions = output_attentions,
                output_hidden_states = output_hidden_states,
                return_dict = return_dict,
                **kwargs,
            )
        hidden_states = outputs[0]

        bsz, q_len, hd = hidden_states.shape
        lm_head = self.lm_head.weight
        lm_head_device = lm_head.device

        logit_softcapping = getattr(self.config, "final_logit_softcapping", 0)
        logit_scaling = getattr(self.config, "logit_scale", 0)
        dtype = lm_head.dtype
        num_logits_to_keep = max(num_logits_to_keep, logits_to_keep)

        # Move items to same device as lm_head
        hidden_states = hidden_states.to(lm_head_device)
        if labels is not None:
            labels = labels.to(lm_head_device)

        # Output last hidden states without logits if asked
        if os.environ.get("UNSLOTH_RETURN_HIDDEN_STATES", "0") == "1":
            if num_logits_to_keep != 0:
                hidden_states = hidden_states[:, -num_logits_to_keep:, :]
            return CausalLMOutputWithPast(
                loss = None,
                logits = hidden_states,
                past_key_values = outputs.past_key_values,
                hidden_states = outputs.hidden_states,
                attentions = outputs.attentions,
            )

        if bsz == 1 and q_len == 1:
            logits = torch.mv(lm_head, hidden_states.ravel().to(dtype))
            logits = logits.unsqueeze(0).unsqueeze(0)
        elif num_logits_to_keep != 0:
            logits = self.lm_head(hidden_states[:, -num_logits_to_keep:, :].to(dtype))
        else:
            RETURN_LOGITS = os.environ.get("UNSLOTH_RETURN_LOGITS", "0") == "1"
            # < 1024 Normal Unsloth uses less VRAM!
            if bsz * q_len <= 1024 and not RETURN_LOGITS:
                # Use unsloth_fused_ce_loss which actually calculates the best chunk size to reduce VRAM usage
                RETURN_LOGITS = False

            if not RETURN_LOGITS and labels is not None:
                n_items = kwargs.get("num_items_in_batch", None)
                if n_items is None:
                    n_items = kwargs.get("n_items", None)

                if self.config.model_type == "falcon_h1":
                    hidden_states = hidden_states * self.config.lm_head_multiplier

                ### DISABLED since T4 breaks
                # OutOfResources: out of resource: shared memory, Required: 98304, Hardware limit: 65536. Reducing block sizes or `num_stages` may help.
                # loss = fused_linear_cross_entropy(
                #     hidden_states      = hidden_states,
                #     lm_weight          = lm_head,
                #     labels             = labels,
                #     num_items_in_batch = n_items,
                #     logit_softcapping  = logit_softcapping,
                # )
                loss = unsloth_fused_ce_loss(
                    trainer = None,
                    hidden_states = hidden_states,
                    lm_head_weight = lm_head,
                    lm_head_bias = None,
                    labels = labels,
                    mask = None,
                    n_items = n_items,
                    scaling = getattr(self, "accelerator_scaler", None),
                    target_gb = None,
                    torch_compile = True,
                    logit_softcapping = logit_softcapping,
                )
                if not return_dict:
                    output = (logits,) + outputs[1:]
                    return (loss,) + output if loss is not None else output

                output = CausalLMOutputWithPast(
                    loss = loss,
                    logits = EMPTY_LOGITS,
                    past_key_values = outputs.past_key_values,
                    hidden_states = outputs.hidden_states,
                    attentions = outputs.attentions,
                )
                return output
            pass
            logits = self.lm_head(hidden_states.to(dtype))

        logits = logits.to(_get_dtype(dtype_from_config(self.config)))
        loss = None
        logit_softcapping = getattr(self.config, "final_logit_softcapping", 0)
        logit_scaling = getattr(self.config, "logit_scale", 0)
        if self.config.model_type == "granite":
            # granite uses logit_scaling as key and they divide by the scale unlike cohere
            # notice that for granite, logits_scale is 16 and for cohere it is 0.125 (aka 1/8) in their respective configs
            # granite: https://github.com/huggingface/transformers/blob/4d1d0f29a493098e6bc6b904b82e29cb331827f5/src/transformers/models/granite/modeling_granite.py#L1103
            # cohere: https://github.com/huggingface/transformers/blob/4d1d0f29a493098e6bc6b904b82e29cb331827f5/src/transformers/models/cohere/modeling_cohere.py#L1176
            logit_scaling = 1 / getattr(self.config, "logits_scaling", 1)
        elif self.config.model_type == "falcon_h1":
            logit_scaling = self.config.lm_head_multiplier

        if labels is not None:
            shift_logits = logits
            # if not hasattr(self, "extra_ignored_labels"):
            #     # Fixes https://github.com/unslothai/unsloth/issues/10
            #     self.extra_ignored_labels = torch.full((self.max_seq_length, 1), -100, device = "cuda:0")
            # pass
            shift_labels = torch.empty_like(labels)
            shift_labels[..., :-1] = labels[..., 1:]
            shift_labels[..., -1] = -100
            mask_packed_sequence_boundaries(
                shift_labels,
                kwargs.get("packed_seq_lengths"),
            )
            # shift_labels = torch.hstack((labels[..., 1:], self.extra_ignored_labels[:labels.shape[0]]))
            n_items = kwargs.get("num_items_in_batch", None)
            if n_items is None:
                n_items = kwargs.get("n_items", None)
            loss = fast_cross_entropy_loss(
                logits = shift_logits,
                labels = shift_labels,
                logit_softcapping = logit_softcapping,
                logit_scaling = logit_scaling,
                n_items = n_items,
            )
        else:
            if logit_scaling != 0:
                if logits.requires_grad:
                    logits = logit_scaling * logits
                else:
                    logits *= logit_scaling
            if logit_softcapping != 0:
                if logits.requires_grad:
                    logits = (1.0 / logit_softcapping) * logits
                    logits = torch.tanh(logits)
                    logits = logit_softcapping * logits
                else:
                    logits *= 1.0 / logit_softcapping
                    logits.tanh_()
                    logits *= logit_softcapping

        if not return_dict:
            output = (logits,) + outputs[1:]
            return (loss,) + output if loss is not None else output
        return CausalLMOutputWithPast(
            loss = loss,
            logits = logits,
            past_key_values = outputs.past_key_values,
            hidden_states = outputs.hidden_states,
            attentions = outputs.attentions,
        )

    return _CausalLM_fast_forward


@torch._disable_dynamo
def PeftModel_fast_forward(
    self,
    input_ids = None,
    causal_mask = None,
    attention_mask = None,
    inputs_embeds = None,
    labels = None,
    output_attentions = None,
    output_hidden_states = None,
    return_dict = None,
    task_ids = None,
    num_logits_to_keep = 0,
    logits_to_keep = 0,
    **kwargs,
):
    is_classification = "Classification" in str(type(self.base_model.model))
    if is_classification:
        return self.base_model(
            input_ids = input_ids,
            attention_mask = attention_mask,
            inputs_embeds = inputs_embeds,
            labels = labels,
            output_attentions = output_attentions,
            output_hidden_states = output_hidden_states,
            return_dict = return_dict,
            **kwargs,
        )
    else:
        return self.base_model(
            input_ids = input_ids,
            causal_mask = causal_mask,
            attention_mask = attention_mask,
            inputs_embeds = inputs_embeds,
            labels = labels,
            output_attentions = output_attentions,
            output_hidden_states = output_hidden_states,
            return_dict = return_dict,
            num_logits_to_keep = num_logits_to_keep,
            logits_to_keep = logits_to_keep,
            **kwargs,
        )


def _get_rope_theta(config, default = 10000.0):
    """Get rope_theta from config, handling both transformers 4.x and 5.x."""
    try:
        return config.rope_theta
    except (AttributeError, KeyError):
        pass
    rp = getattr(config, "rope_parameters", None)
    if isinstance(rp, dict):
        return rp.get("rope_theta", default)
    return default


# Solves https://github.com/unslothai/unsloth/issues/168
# Static KV Cache was introduced in 4.38.0, causing training to be much slower.
# Inference can now be CUDAGraphed, but we shall retain the old rotary embeddings.
# https://github.com/huggingface/transformers/pull/27931
# https://github.com/huggingface/transformers/blob/v4.37.2/src/transformers/models/llama/modeling_llama.py
class LlamaRotaryEmbedding(torch.nn.Module):
    # Fixes https://github.com/huggingface/transformers/pull/28837
    # https://github.com/microsoft/DeepSpeed/issues/4932
    # The precision of RoPE buffers is not correct, so we cast to int64.
    def __init__(
        self,
        dim = None,
        max_position_embeddings = 2048,
        base = 10000,
        device = None,
        config = None,  # [TODO] Hack to pass in config - need to remove later
    ):
        super().__init__()
        if config is not None:
            # [TODO] Hack to pass in config - need to remove later
            base = _get_rope_theta(config, default = base)
            partial_rotary_factor = (
                config.partial_rotary_factor
                if hasattr(config, "partial_rotary_factor")
                else 1.0
            )
            dim = getattr(config, "head_dim", None)
            if dim is None:
                dim = int((config.hidden_size // config.num_attention_heads))
            device = DEVICE_TYPE_TORCH
            max_position_embeddings = config.max_position_embeddings

        self.dim = dim
        self.max_position_embeddings = max_position_embeddings
        self.base = base
        # Dynamic RoPE we first set it to a max of 4 * 8192 tokens then we iteratively grow this
        self.current_rope_size = min(4 * 8192, self.max_position_embeddings)
        self.multi_gpu_cos_cached = [None] * DEVICE_COUNT
        self.multi_gpu_sin_cached = [None] * DEVICE_COUNT

        # Normal Llama-3 RoPE
        inv_freq = 1.0 / (
            self.base
            ** (
                torch.arange(0, self.dim, 2, dtype = torch.int64, device = "cpu").float()
                / self.dim
            )
        )
        inv_freq = self._apply_inv_freq_scaling(inv_freq)
        self.register_buffer("inv_freq", inv_freq, persistent = False)

        # Build here to make `torch.jit.trace` work.
        for device_idx in range(DEVICE_COUNT):
            self._set_cos_sin_cache(
                seq_len = self.current_rope_size,
                device = torch.device(device_idx),
                dtype = torch.get_default_dtype(),
            )

        # dummy so that patch_utils doesn't fail for now
        self.cos_cached = torch.empty(
            1, device = get_current_device(), dtype = torch.get_default_dtype()
        )
        self.sin_cached = torch.empty(
            1, device = get_current_device(), dtype = torch.get_default_dtype()
        )

    def _apply_inv_freq_scaling(self, inv_freq):
        """Override to apply custom inv_freq scaling (e.g., extended RoPE)."""
        return inv_freq

    def _apply_time_scaling(self, t):
        """Override to apply custom time scaling (e.g., linear scaling)."""
        return t

    def _set_cos_sin_cache(self, seq_len, device, dtype):
        # Note: on the original Llama codebase, these tensors are created on the target device (and not on CPU) and
        # in FP32. They are applied (multiplied) in FP32 as well.
        self.current_rope_size = seq_len
        t = torch.arange(
            self.current_rope_size, device = self.inv_freq.device, dtype = torch.int64
        ).float()
        t = self._apply_time_scaling(t)

        freqs = torch.outer(t, self.inv_freq)
        # Different from paper, but it uses a different permutation in order to obtain the same calculation
        emb = torch.cat((freqs, freqs), dim = -1)
        cos = emb.cos().to(dtype = dtype, device = device, non_blocking = True)
        sin = emb.sin().to(dtype = dtype, device = device, non_blocking = True)
        self.multi_gpu_cos_cached[device.index] = cos
        self.multi_gpu_sin_cached[device.index] = sin
        return cos, sin

    def forward(self, x, position_ids = None, seq_len = None):
        # x: [bs, num_attention_heads, seq_len, head_size]
        if seq_len is not None and seq_len > self.current_rope_size:
            self._set_cos_sin_cache(seq_len = seq_len, device = x.device, dtype = x.dtype)

        device_index = x.device.index
        return (
            self.multi_gpu_cos_cached[device_index][:seq_len],
            self.multi_gpu_sin_cached[device_index][:seq_len],
        )

    def get_cached(self, seq_len = None, device_index = None):
        if device_index is None:
            device_index = get_current_device()
        return self.multi_gpu_cos_cached[device_index], self.multi_gpu_sin_cached[
            device_index
        ]

    def extend_rope_embedding(self, x, seq_len):
        if seq_len <= self.current_rope_size:
            return
        # Iteratively grow by increments of 8192
        self.current_rope_size = ((seq_len // 8192) + ((seq_len % 8192) != 0)) * 8192
        for device_idx in range(DEVICE_COUNT):
            self._set_cos_sin_cache(
                self.current_rope_size, device = torch.device(device_idx), dtype = x.dtype
            )


class LlamaLinearScalingRotaryEmbedding(LlamaRotaryEmbedding):
    """LlamaRotaryEmbedding extended with linear scaling. Credits to the Reddit user /u/kaiokendev"""

    # Fixes https://github.com/huggingface/transformers/pull/28837
    # https://github.com/microsoft/DeepSpeed/issues/4932
    # The precision of RoPE buffers is not correct, so we cast to int64.
    def __init__(
        self,
        dim = None,
        max_position_embeddings = 2048,
        base = 10000,
        device = None,
        scaling_factor = 1.0,
        config = None,  # [TODO] Hack to pass in config - need to remove later
    ):
        self.scaling_factor = scaling_factor
        super().__init__(
            dim = dim,
            max_position_embeddings = max_position_embeddings,
            base = base,
            device = device,
            config = config,
        )

    def _apply_time_scaling(self, t):
        """Apply linear scaling to time indices."""
        return t / self.scaling_factor


# See https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/layers/rotary_embedding.py#L736
# For Llama 3.1
class LlamaExtendedRotaryEmbedding(LlamaRotaryEmbedding):
    def __init__(
        self,
        dim = None,
        max_position_embeddings = 2048,
        base = 10000,
        device = None,
        config = None,  # [TODO] Hack to pass in config - need to remove later
    ):
        super().__init__(
            dim = dim,
            max_position_embeddings = max_position_embeddings,
            base = base,
            device = device,
            config = config,
        )

    # From https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/api/model.py#L41
    def _apply_inv_freq_scaling(self, freqs: torch.Tensor):
        # Values obtained from grid search
        scale_factor = 8
        low_freq_factor = 1
        high_freq_factor = 4
        old_context_len = 8192  # original llama3 length

        low_freq_wavelen = old_context_len / low_freq_factor
        high_freq_wavelen = old_context_len / high_freq_factor
        new_freqs = []
        for freq in freqs:
            wavelen = 2 * math.pi / freq
            if wavelen < high_freq_wavelen:
                new_freqs.append(freq)
            elif wavelen > low_freq_wavelen:
                new_freqs.append(freq / scale_factor)
            else:
                assert low_freq_wavelen != high_freq_wavelen
                smooth = (old_context_len / wavelen - low_freq_factor) / (
                    high_freq_factor - low_freq_factor
                )
                new_freqs.append((1 - smooth) * freq / scale_factor + smooth * freq)
        return torch.tensor(new_freqs, dtype = freqs.dtype, device = freqs.device)


class LongRopeRotaryEmbedding(torch.nn.Module):
    # For Phi 3.5 128K https://huggingface.co/microsoft/Phi-3.5-mini-instruct/blob/main/modeling_phi3.py
    def __init__(
        self,
        dim = None,
        max_position_embeddings = 131072,
        original_max_position_embeddings = 4096,
        base = 10000,
        short_factor = None,
        long_factor = None,
        device = None,
        config = None,  # [TODO] Hack to pass in config - need to remove later
    ):
        super().__init__()
        assert short_factor is not None
        assert long_factor is not None
        assert type(original_max_position_embeddings) is int

        if config is not None:
            # [TODO] Hack to pass in config - need to remove later
            base = _get_rope_theta(config, default = base)
            partial_rotary_factor = (
                config.partial_rotary_factor
                if hasattr(config, "partial_rotary_factor")
                else 1.0
            )
            dim = int((config.hidden_size // config.num_attention_heads))
            device = DEVICE_TYPE_TORCH
            max_position_embeddings = config.max_position_embeddings

        self.dim = dim
        self.max_position_embeddings = max_position_embeddings
        self.original_max_position_embeddings = original_max_position_embeddings
        self.base = base
        # Dynamic RoPE we first set it to a max of 4 * 8192 tokens then we iteratively grow this
        self.current_rope_size = min(
            original_max_position_embeddings, self.max_position_embeddings
        )
        self.multi_gpu_short_cos_cached = [None] * DEVICE_COUNT
        self.multi_gpu_short_sin_cached = [None] * DEVICE_COUNT
        self.multi_gpu_long_cos_cached = [None] * DEVICE_COUNT
        self.multi_gpu_long_sin_cached = [None] * DEVICE_COUNT

        # Long RoPE similar to RoPE except short sequences have 1 cos / sin
        # and long sequences have another cos / sin
        inv_freq_shape = (
            torch.arange(0, self.dim, 2, dtype = torch.int64, device = "cpu").float()
            / self.dim
        )
        short_factor = torch.tensor(short_factor, device = "cpu", dtype = torch.float32)
        long_factor = torch.tensor(long_factor, device = "cpu", dtype = torch.float32)
        short_inv_freq = 1.0 / (short_factor * self.base**inv_freq_shape)
        long_inv_freq = 1.0 / (long_factor * self.base**inv_freq_shape)

        # Phi-3 Scale factor
        scale = self.max_position_embeddings / self.original_max_position_embeddings
        if scale <= 1.0:
            scaling_factor = 1.0
        else:
            scaling_factor = math.sqrt(
                1 + math.log(scale) / math.log(self.original_max_position_embeddings)
            )
        self.scaling_factor = scaling_factor

        # Short and long inv_freq
        self.register_buffer("short_inv_freq", short_inv_freq, persistent = False)
        self.register_buffer("long_inv_freq", long_inv_freq, persistent = False)

        # Build here to make `torch.jit.trace` work.
        # Initialize short sequences cache for all devices
        dtype = torch.bfloat16 if is_bfloat16_supported() else torch.float16
        t = torch.arange(
            original_max_position_embeddings,
            device = self.short_inv_freq.device,
            dtype = torch.int64,
        ).float()
        freqs = torch.outer(t, self.short_inv_freq)
        emb = torch.cat((freqs, freqs), dim = -1)

        for device_idx in range(DEVICE_COUNT):
            device_obj = torch.device(device_idx)
            cos_cached = (emb.cos() * self.scaling_factor).to(
                dtype = dtype, device = device_obj, non_blocking = True
            )
            sin_cached = (emb.sin() * self.scaling_factor).to(
                dtype = dtype, device = device_obj, non_blocking = True
            )
            self.multi_gpu_short_cos_cached[device_idx] = cos_cached
            self.multi_gpu_short_sin_cached[device_idx] = sin_cached

        # dummy so that patch_utils doesn't fail for now
        self.short_cos_cached = torch.empty(
            1, device = get_current_device(), dtype = torch.get_default_dtype()
        )
        self.short_sin_cached = torch.empty(
            1, device = get_current_device(), dtype = torch.get_default_dtype()
        )
        self.long_cos_cached = torch.empty(
            1, device = get_current_device(), dtype = torch.get_default_dtype()
        )
        self.long_sin_cached = torch.empty(
            1, device = get_current_device(), dtype = torch.get_default_dtype()
        )

    def _set_cos_sin_cache(self, seq_len, device, dtype):
        # Note: on the original Llama codebase, these tensors are created on the target device (and not on CPU) and
        # in FP32. They are applied (multiplied) in FP32 as well.
        self.current_rope_size = seq_len

        t = torch.arange(
            self.current_rope_size, device = self.long_inv_freq.device, dtype = torch.int64
        ).float()
        # Long sequences
        freqs = torch.outer(t, self.long_inv_freq)
        emb = torch.cat((freqs, freqs), dim = -1)
        cos_cached = (emb.cos() * self.scaling_factor).to(
            dtype = dtype, device = device, non_blocking = True
        )
        sin_cached = (emb.sin() * self.scaling_factor).to(
            dtype = dtype, device = device, non_blocking = True
        )
        self.multi_gpu_long_cos_cached[device.index] = cos_cached
        self.multi_gpu_long_sin_cached[device.index] = sin_cached
        return cos_cached, sin_cached

    def forward(self, x, position_ids = None, seq_len = None):
        # x: [bs, num_attention_heads, seq_len, head_size]
        if seq_len is not None and seq_len > self.current_rope_size:
            self._set_cos_sin_cache(seq_len = seq_len, device = x.device, dtype = x.dtype)

        device_index = x.device.index

        if seq_len is not None and seq_len < self.original_max_position_embeddings:
            return (
                self.multi_gpu_short_cos_cached[device_index][:seq_len],
                self.multi_gpu_short_sin_cached[device_index][:seq_len],
            )
        else:
            return (
                self.multi_gpu_long_cos_cached[device_index][:seq_len],
                self.multi_gpu_long_sin_cached[device_index][:seq_len],
            )

    def get_cached(self, seq_len = None, device_index = None):
        if device_index is None:
            device_index = get_current_device()
        if seq_len is not None and seq_len < self.original_max_position_embeddings:
            return self.multi_gpu_short_cos_cached[
                device_index
            ], self.multi_gpu_short_sin_cached[device_index]
        return self.multi_gpu_long_cos_cached[
            device_index
        ], self.multi_gpu_long_sin_cached[device_index]

    def extend_rope_embedding(self, x, seq_len):
        if seq_len <= self.current_rope_size:
            return
        # Iteratively grow by increments of 8192
        self.current_rope_size = ((seq_len // 8192) + ((seq_len % 8192) != 0)) * 8192
        for device_idx in range(DEVICE_COUNT):
            self._set_cos_sin_cache(
                self.current_rope_size, device = torch.device(device_idx), dtype = x.dtype
            )


def unsloth_fast_generate(
    self,
    *args,
    **kwargs,
):
    # If the model starts out in training mode, restore training mode after generation
    restore_training_mode = self.training

    FastLlamaModel.for_inference(self)

    # Unpack BatchEncoding passed as input_ids for backwards compatibility.
    # Old notebooks do model.generate(input_ids=tokenizer(...)) where the tokenizer
    # output is a BatchEncoding (dict-like). Transformers v5 generate() calls
    # .shape on it directly and crashes. Unpack into separate kwargs so both
    # v4 and v5 work transparently.
    _maybe_encoding = kwargs.get("input_ids", None)
    if (
        _maybe_encoding is not None
        and not isinstance(_maybe_encoding, torch.Tensor)
        and hasattr(_maybe_encoding, "items")
    ):
        batch_data = kwargs.pop("input_ids")
        for key, val in batch_data.items():
            kwargs.setdefault(key, val)

    dtype = _get_dtype(dtype_from_config(self.config))

    if hasattr(self, "config") and hasattr(self.config, "max_position_embeddings"):
        if (
            "input_ids" in kwargs
            and kwargs["input_ids"] is not None
            and "max_new_tokens" in kwargs
        ):
            _ids = kwargs["input_ids"]
            if hasattr(_ids, "shape") and (
                _ids.shape[-1] + kwargs["max_new_tokens"]
                > self.config.max_position_embeddings
            ):
                raise ValueError(
                    f"Unsloth: input length {_ids.shape[-1]} + max_new_tokens {kwargs['max_new_tokens']} exceeds the maximum sequence length of {self.config.max_position_embeddings}!\n"
                    "You will need to do long context extension by increasing the `max_seq_length` in `FastLanguageModel.from_pretrained`."
                )

    # Must patch accelerate for Xformers
    # if accelerate_new_send_to_device is not None:
    #     import accelerate.utils.operations
    #     accelerate.utils.operations.send_to_device = accelerate_new_send_to_device
    # pass

    # For newer HF
    kwargs["cache_implementation"] = "dynamic"
    # For num_logits_to_keep
    num_logits_to_keep = kwargs.get("num_logits_to_keep", None)
    logits_to_keep = kwargs.get("logits_to_keep", None)
    if num_logits_to_keep is None and logits_to_keep is None:
        kwargs["num_logits_to_keep"] = 1

    # Remove token_type_ids
    kwargs.pop("token_type_ids", None)

    # Check pad_token
    model_eos_token_id = getattr(self.config, "eos_token_id", None)
    if model_eos_token_id is not None and hasattr(model_eos_token_id, "__iter__"):
        model_eos_token_id = model_eos_token_id[0]

    kwargs["pad_token_id"] = kwargs.pop("pad_token_id", model_eos_token_id)

    # Mixed precision autocast
    with (
        _get_inference_mode_context_manager(self),
        torch.autocast(device_type = DEVICE_TYPE_TORCH, dtype = dtype),
    ):
        output = self._old_generate(*args, **kwargs)

    # Return accelerate back
    # if accelerate_new_send_to_device is not None:
    #     accelerate.utils.operations.send_to_device = accelerate_old_send_to_device
    # pass

    if restore_training_mode:
        FastLlamaModel.for_training(self)

    return output


class FastLlamaModel:
    @staticmethod
    def _prepare_for_qat(model, qat_scheme):
        model = _prepare_model_for_qat(model, qat_scheme)
        return model

    @staticmethod
    def pre_patch():
        init_name, function = patch_llama_rope_scaling(
            model_name = "llama",
            rope_module = LlamaRotaryEmbedding,
            scaled_rope_module = LlamaLinearScalingRotaryEmbedding,
            extended_rope_module = LlamaExtendedRotaryEmbedding,
            attention_module = LlamaAttention,
            longrope_module = LongRopeRotaryEmbedding,
        )
        if init_name is not None:
            exec(function, globals())
            LlamaAttention.__init__ = eval(init_name)
        LlamaAttention.forward = LlamaAttention_fast_forward
        LlamaSdpaAttention.forward = LlamaAttention_fast_forward
        LlamaFlashAttention2.forward = LlamaAttention_fast_forward
        LlamaDecoderLayer.forward = LlamaDecoderLayer_fast_forward
        LlamaModel.forward = LlamaModel_fast_forward
        LlamaForCausalLM.forward = CausalLM_fast_forward(
            LlamaModel_fast_forward_inference
        )
        PeftModelForCausalLM.forward = PeftModel_fast_forward
        fix_prepare_inputs_for_generation(LlamaForCausalLM)

        # Solves https://github.com/unslothai/unsloth/issues/168
        # Static KV Cache was introduced in 4.38.0, causing training to be much slower.
        # Inference can now be CUDAGraphed, but we shall retain the old rotary embeddings.
        # https://github.com/huggingface/transformers/pull/27931
        # https://github.com/huggingface/transformers/blob/v4.37.2/src/transformers/models/llama/modeling_llama.py
        import transformers.models.llama.modeling_llama

        transformers.models.llama.modeling_llama.LlamaRotaryEmbedding = (
            LlamaRotaryEmbedding
        )
        transformers.models.llama.modeling_llama.LlamaLinearScalingRotaryEmbedding = (
            LlamaLinearScalingRotaryEmbedding
        )
        return

    @staticmethod
    def from_pretrained(
        model_name = "unsloth/llama-3-8b-bnb-4bit",
        max_seq_length = None,
        dtype = None,
        load_in_4bit = True,
        token = None,
        device_map = "sequential",
        rope_scaling = None,
        fix_tokenizer = True,
        model_patcher = None,
        tokenizer_name = None,
        trust_remote_code = False,
        revision = None,
        fast_inference = False,  # uses vLLM
        gpu_memory_utilization = 0.5,
        float8_kv_cache = False,
        random_state = 3407,
        max_lora_rank = 16,
        disable_log_stats = False,
        unsloth_vllm_standby = False,
        num_labels = None,
        qat_scheme = None,
        load_in_fp8 = False,  # fp8 LoRA (True, False, 'block')
        **kwargs,
    ):
        os.environ["UNSLOTH_USE_NEW_MODEL"] = "0"
        if trust_remote_code:
            if fast_inference:
                raise NotImplementedError(
                    "Unsloth: Fast inference does not support `trust_remote_code` yet."
                )
            print(
                "Unsloth: WARNING `trust_remote_code` is True.\n"
                "Are you certain you want to do remote code execution?"
            )
        if fast_inference:
            if not is_vLLM_available():
                print("Unsloth: vLLM is not installed! Will use Unsloth inference!")
                fast_inference = False
            if DEVICE_TYPE == "cuda":
                major_version, minor_version = torch.cuda.get_device_capability()
                if major_version < 7:
                    print(
                        "Unsloth: vLLM does not work on older GPUs - will switch to Unsloth inference!"
                    )
                    fast_inference = False
            elif DEVICE_TYPE == "hip":
                fast_inference = True
            if (
                unsloth_vllm_standby
                and os.environ.get("UNSLOTH_VLLM_STANDBY", "0") == "0"
            ):
                raise RuntimeError(
                    "Unsloth: `unsloth_vllm_standby` is True, but  environment variable `UNSLOTH_VLLM_STANDBY` is not set to 1!"
                )

        token = hf_login(token)
        if model_patcher is None:
            model_patcher = FastLlamaModel
        SUPPORTS_BFLOAT16 = is_bfloat16_supported()

        if DEVICE_TYPE == "cuda":
            gpu_stats = torch.cuda.get_device_properties(0)
            gpu_stats_name = (
                gpu_stats.name + ". " if gpu_stats.name != "" else "NVIDIA GPU Device. "
            )
            gpu_version = torch.version.cuda
            gpu_stats_snippet = f"CUDA: {gpu_stats.major}.{gpu_stats.minor}. CUDA Toolkit: {gpu_version}."
            try:
                vllm_version = f" vLLM: {importlib_version('vllm')}."
            except:
                vllm_version = ""
        elif DEVICE_TYPE == "hip":
            gpu_stats = torch.cuda.get_device_properties(0)
            gpu_stats_name = resolve_hip_gpu_stats_name(gpu_stats)
            gpu_version = torch.version.hip
            gpu_stats_snippet = f"ROCm Toolkit: {gpu_version}."
            try:
                vllm_version = f" vLLM: {importlib_version('vllm')}."
            except:
                vllm_version = ""
        elif DEVICE_TYPE == "xpu":
            gpu_stats = torch.xpu.get_device_properties(0)
            gpu_stats_name = (
                gpu_stats.name + ". " if gpu_stats.name != "" else "Intel XPU Device. "
            )
            gpu_version = torch.version.xpu
            gpu_stats_snippet = f"Intel Toolkit: {gpu_version}."
            try:
                vllm_version = f" vLLM: {importlib_version('vllm')}."
            except:
                vllm_version = ""
        else:
            raise ValueError(f"Unsloth: Unsupported device type: {DEVICE_TYPE}")

        max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)

        statistics = (
            f"==((====))==  Unsloth {__version__}: Fast {model_patcher.__name__[4:-5]} patching. Transformers: {transformers_version}.{vllm_version}\n"
            f"   {chr(92)}{chr(92)}   /|    {gpu_stats_name}Num GPUs = {DEVICE_COUNT}. Max memory: {max_memory} GB. Platform: {platform_system}.\n"
            f"O^O/ {chr(92)}_/ {chr(92)}    Torch: {torch.__version__}. {gpu_stats_snippet} Triton: {triton_version}\n"
            f"{chr(92)}        /    Bfloat16 = {str(SUPPORTS_BFLOAT16).upper()}. FA [Xformers = {xformers_version}. FA2 = {HAS_FLASH_ATTENTION}]\n"
            f' "-____-"     Free license: http://github.com/unslothai/unsloth'
        )

        print(statistics)

        # Warn about fast transfers
        if "HF_HUB_ENABLE_HF_TRANSFER" in os.environ:
            old_hf_transfer = os.environ["HF_HUB_ENABLE_HF_TRANSFER"]
            if old_hf_transfer in ("False", "false"):
                old_hf_transfer = "0"
            if old_hf_transfer in ("True", "true"):
                old_hf_transfer = "1"
        else:
            old_hf_transfer = "0"
        if old_hf_transfer == "1":
            print(
                "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!"
            )
        if old_hf_transfer != "0":
            os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

        model_patcher.pre_patch()
        # For debugging - we use a download counter to see if environments are not breaking or if HF is down
        get_statistics(kwargs.get("local_files_only", False))

        if dtype is None:
            dtype = torch.float16 if not SUPPORTS_BFLOAT16 else torch.bfloat16
        elif dtype == torch.bfloat16 and not SUPPORTS_BFLOAT16:
            logger.warning_once(
                "Device does not support bfloat16. Will change to float16."
            )
            dtype = torch.float16
        # elif dtype == torch.float16 and SUPPORTS_BFLOAT16:
        #     logger.warning_once("Device supports bfloat16 but you selected float16. Will change to bfloat16.")
        #     dtype = torch.bfloat16

        assert (
            dtype == torch.float16 or dtype == torch.bfloat16 or dtype == torch.float32
        )

        # RoPE Scaling
        model_config = AutoConfig.from_pretrained(
            model_name,
            token = token,
            attn_implementation = "sdpa",
        )
        model_config.model_name = model_name
        model_max_seq_length = model_config.max_position_embeddings

        verify_fp8_support_if_applicable(model_config)

        # Check if RoPE Scaling is even allowed
        model_function = MODEL_FOR_CAUSAL_LM_MAPPING[model_config.__class__]
        IS_FALCON_H1 = model_config.model_type.startswith("falcon_h1")

        preferred_attn_impl = (
            prefer_flex_attn_if_supported(model_function, model_config) or "eager"
        )

        has_rope_scaling = False
        try:
            with open(inspect.getfile(model_function), "r", encoding = "utf-8") as file:
                has_rope_scaling = "self.config.rope_scaling" in file.read()
        except:
            pass
        has_rope_scaling = True

        # If max_seq_length is not specified, use maximum from config
        if max_seq_length is None:
            max_seq_length = model_max_seq_length

        if (rope_scaling is None) and (max_seq_length > model_max_seq_length):
            rope_scaling = max_seq_length / model_max_seq_length

            if fast_inference:
                raise NotImplementedError(
                    "Unsloth: Fast inference does not yet work with RoPE Scaling."
                )

            logger.warning_once(
                f"Unsloth: {model_name} can only handle sequence lengths of at most "
                f"{model_max_seq_length}.\nBut with kaiokendev's RoPE scaling of "
                f"{round(rope_scaling, 3)}, it can be magically be extended to "
                f"{max_seq_length}!"
            )

            # Warn RoPE scaling isn't allowed
            if not has_rope_scaling:
                raise RuntimeError(
                    f"However, {model_name} doesn't support RoPE Scaling!\n"
                    "Please file a feature request at https://github.com/unslothai/unsloth."
                )

            rope_scaling = {
                "type": "linear",
                "factor": rope_scaling,
            }

            # Add to kwargs
            kwargs["rope_scaling"] = rope_scaling

        bnb_config = None
        if load_in_4bit:
            llm_int8_skip_modules = SKIP_QUANTIZATION_MODULES.copy()
            if IS_FALCON_H1:
                # we cannot quantize out_proj layer due to mamba kernels: https://github.com/tiiuae/Falcon-H1/issues/13#issuecomment-2918671274
                llm_int8_skip_modules.append("out_proj")
            bnb_config = BitsAndBytesConfig(
                load_in_4bit = True,
                bnb_4bit_use_double_quant = True,
                bnb_4bit_quant_type = "nf4",
                bnb_4bit_compute_dtype = dtype,
                llm_int8_skip_modules = llm_int8_skip_modules,
            )

        # https://huggingface.co/togethercomputer/LLaMA-2-7B-32K/discussions/12
        # RoPE Scaling's max_position_embeddings must be updated
        max_position_embeddings = max(max_seq_length, model_max_seq_length)
        kwargs.pop("attn_implementation", None)  # No need since we auto call it

        # Cannot be None, since HF now checks for the config
        if load_in_4bit:
            kwargs["quantization_config"] = bnb_config

        kwargs = add_dtype_kwargs(dtype, kwargs)

        raise_handler = RaiseUninitialized()
        if num_labels is not None:
            model = AutoModelForSequenceClassification.from_pretrained(
                model_name,
                device_map = device_map,
                # torch_dtype             = dtype, # transformers changed torch_dtype to dtype
                num_labels = num_labels,
                # quantization_config     = bnb_config,
                token = token,
                max_position_embeddings = max_position_embeddings,
                trust_remote_code = trust_remote_code,
                attn_implementation = preferred_attn_impl,
                **kwargs,
            )
        elif not fast_inference:
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                device_map = device_map,
                # torch_dtype             = dtype, # transformers changed torch_dtype to dtype
                # quantization_config     = bnb_config,
                token = token,
                max_position_embeddings = max_position_embeddings,
                trust_remote_code = trust_remote_code,
                attn_implementation = preferred_attn_impl,
                **kwargs,
            )
            model.fast_generate = make_fast_generate_wrapper(model.generate)
            model.fast_generate_batches = None
        else:
            from unsloth_zoo.vllm_utils import (
                load_vllm,
                get_vllm_state_dict,
                convert_vllm_to_huggingface,
                generate_batches,
            )

            fp8_mode = None
            if load_in_fp8 != False:
                fp8_mode = _get_fp8_mode_and_check_settings(
                    load_in_fp8,
                    fast_inference,
                )

            allowed_args = inspect.getfullargspec(load_vllm).args
            load_vllm_kwargs = dict(
                model_name = model_name,
                config = model_config,
                gpu_memory_utilization = gpu_memory_utilization,
                max_seq_length = max_seq_length,
                dtype = dtype,
                float8_kv_cache = float8_kv_cache,
                enable_lora = True,
                max_lora_rank = max_lora_rank,
                disable_log_stats = disable_log_stats,
                use_bitsandbytes = load_in_4bit,
                unsloth_vllm_standby = unsloth_vllm_standby,
                fp8_mode = fp8_mode,
            )
            for allowed_arg in allowed_args:
                if allowed_arg not in load_vllm_kwargs and allowed_arg in kwargs:
                    load_vllm_kwargs[allowed_arg] = kwargs[allowed_arg]
            pass

            # Load vLLM first
            llm = load_vllm(**load_vllm_kwargs)

            # Convert to HF format
            _, quant_state_dict = get_vllm_state_dict(
                llm,
                config = model_config,
                load_in_fp8 = load_in_fp8,
            )
            model = convert_vllm_to_huggingface(
                quant_state_dict, model_config, dtype, bnb_config
            )
            model.vllm_engine = llm
            model.fast_generate = model.vllm_engine.generate
            model.fast_generate_batches = functools.partial(
                generate_batches, model.vllm_engine
            )
        raise_handler.remove()
        # Return old flag
        os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = old_hf_transfer

        # Counteract saved tokenizers
        tokenizer_name = model_name if tokenizer_name is None else tokenizer_name
        tokenizer = load_correct_tokenizer(
            tokenizer_name = tokenizer_name,
            model_max_length = max_position_embeddings,
            padding_side = "right",
            token = token,
            trust_remote_code = trust_remote_code,
            fix_tokenizer = fix_tokenizer,
        )

        model, tokenizer = patch_tokenizer(model, tokenizer)
        model, tokenizer = model_patcher.post_patch(
            model, tokenizer, correct_dtype = dtype
        )

        # Patch up QKV / O and MLP
        for idx, layer in enumerate(model.model.layers):
            layer.self_attn.apply_qkv = original_apply_qkv
            layer.self_attn.apply_o = original_apply_o

        # Patch Trainer
        from transformers.trainer import Trainer

        try:
            if Trainer._inner_training_loop.__name__ != "_fast_inner_training_loop":
                inner_training_loop = inspect.getsource(Trainer._inner_training_loop)
                Trainer._original_training_loop = inner_training_loop
            else:
                inner_training_loop = Trainer._original_training_loop
        except:
            raise RuntimeError("Unsloth: Unsuccessfully patched inner_training_loop")

        import transformers.trainer

        items_in_trainer = dir(transformers.trainer)
        good_items = []
        for item in items_in_trainer:
            if item in inner_training_loop:
                good_items.append(item)
        exec(
            "from transformers.trainer import ("
            + ", ".join(x for x in good_items)
            + ")",
            globals(),
        )

        start = re.search(
            r"logger\.info\([\"\'].+?Running training", inner_training_loop
        ).span(0)[0]
        end = inner_training_loop.find("\n\n", start)
        original_debug = inner_training_loop[start:end]
        spaces = re.search(r"\n([\s\t]{1,})", original_debug).group(0)[1:]
        front_spaces = re.match(r"([\s\t]{1,})", inner_training_loop).group(0)

        # Cannot use \\ since it will cause a SyntaxWarning in Python 3.12
        # Instead use chr(92) == \\
        debug_info = """debug_info = \\
        f"==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = {len(set(p.device for p in model.parameters()))}\\n"\\
        f"   {chr(92)}{chr(92)}   /|    Num examples = {num_examples:,} | Num Epochs = {num_train_epochs:,} | Total steps = {max_steps:,}\\n"\\
        f"O^O/ {chr(92)}_/ {chr(92)}    Batch size per device = {self._train_batch_size:,} | Gradient accumulation steps = {args.gradient_accumulation_steps}\\n"\\
        f"{chr(92)}        /    Data Parallel GPUs = {args.world_size} | Total batch size ({self._train_batch_size} x {args.gradient_accumulation_steps} x {args.world_size}) = {total_train_batch_size:,}\\n"\\
        f' "-____-"     Trainable parameters = {get_model_param_count(model, trainable_only=True):,} of {get_model_param_count(model):,} ({get_model_param_count(model, trainable_only=True)/get_model_param_count(model)*100:.2f}% trained)'
        logger.warning(debug_info)
        import gc
        for _ in range(3):
            gc.collect()
            if DEVICE_TYPE == "xpu":
                torch.xpu.empty_cache()
            else:
                torch.cuda.empty_cache()"""

        debug_info = debug_info.split("\n")
        debug_info = "\n".join(
            [debug_info[0]] + [spaces + x[8:] for x in debug_info[1:]]
        )
        inner_training_loop = inner_training_loop.replace(original_debug, debug_info)

        debug_info = """n_total_devices = total_train_batch_size // \\
            args.gradient_accumulation_steps // self._train_batch_size
        if n_total_devices > 1:
            logger.warning_once('Unsloth is running with multi GPUs - the effective batch size is multiplied by ' + str(n_total_devices))
        debug_info ="""
        debug_info = debug_info.split("\n")
        debug_info = "\n".join(
            [debug_info[0]] + [spaces + x[8:] for x in debug_info[1:]]
        )
        inner_training_loop = inner_training_loop.replace("debug_info =", debug_info, 1)

        front_spaces = re.match(r"[\t\s]{1,}", inner_training_loop).group(0)
        inner_training_loop = re.sub(
            r"^" + front_spaces, "", inner_training_loop, flags = re.MULTILINE
        )
        inner_training_loop = inner_training_loop.replace(
            "train_dataloader = tpu_spmd_dataloader(train_dataloader)",
            "raise RuntimeError('Unsloth: TPUs are not yet supported!')",
        )
        inner_training_loop = inner_training_loop.replace(
            "_inner_training_loop",
            "_fast_inner_training_loop",
            1,
        )
        inner_training_loop = inner_training_loop.replace(
            "is_torch_tpu_available()",
            "False",
        )
        exec(inner_training_loop, globals())
        Trainer._inner_training_loop = _fast_inner_training_loop

        # Save max_seq_length
        model.max_seq_length = max_seq_length
        m = model
        while hasattr(m, "model"):
            m.max_seq_length = max_seq_length
            m = m.model
        m.max_seq_length = max_seq_length
        # Save to modules as well
        for module in model.modules():
            module.max_seq_length = max_seq_length

        # We check the tokenizer first for errors
        if fix_tokenizer:
            tokenizer = check_tokenizer(
                model = model,
                tokenizer = tokenizer,
                model_name = model_name,
                model_max_length = max_position_embeddings,
                padding_side = "right",
                token = token,
            )
        patch_saving_functions(tokenizer)

        # Fix up config for transformers uploading PEFT
        # Not necessary anymore since we require transformers>=4.37!
        if False:
            name = model.config._name_or_path
            if name.startswith("unsloth/") and name.endswith("-bnb-4bit"):
                name = name[: len(name) - len("-bnb-4bit")]
                model.config.update({"_name_or_path": name})

        # Log Unsloth version for future fastpaths for inference
        model.config.update({"unsloth_version": __version__})

        # Add save modules
        patch_saving_functions(model)
        Trainer._inner_training_loop = _fast_inner_training_loop

        # Fix gradient accumulation
        patch_gradient_accumulation_fix(Trainer)

        # Save tokenizer for inference purposes
        tokenizer.padding_side = "left"  # Force inference
        internal_model = model
        while hasattr(internal_model, "model"):
            internal_model._saved_temp_tokenizer = tokenizer
            # Also set is_loaded_in_8bit to disable incorrect DDP
            internal_model.is_loaded_in_8bit = True

            internal_model = internal_model.model
        internal_model._saved_temp_tokenizer = tokenizer
        # Also set is_loaded_in_8bit to disable incorrect DDP
        internal_model.is_loaded_in_8bit = True

        # For transformers > 4.47.1, we need to add rotary_emb to all attention layers
        if IS_ATTENTION_REFACTOR or hasattr(model.model, "rotary_emb"):
            rotary_emb = model.model.rotary_emb
            for layer in model.model.layers:
                layer.self_attn.rotary_emb = rotary_emb

        # Add for_inference and for_training
        model.for_training = functools.partial(FastLlamaModel.for_training, model)
        model.for_inference = functools.partial(FastLlamaModel.for_inference, model)
        m = model
        while hasattr(m, "model"):
            m.for_training = functools.partial(FastBaseModel.for_training, m)
            m.for_inference = functools.partial(FastBaseModel.for_inference, m)
            m = m.model

        # Patch generate
        is_classification = "Classification" in str(type(model))
        if not is_classification and model.generate.__name__ != "unsloth_fast_generate":
            model._old_generate = model.generate
            unsloth_fast_generate.__doc__ = model._old_generate.__doc__
            model.generate = types.MethodType(unsloth_fast_generate, model)
        # Set weight[padding_idx] = 0 for embeddings that are NOT tied with the
        # lm_head. When weights are tied, zeroing the padding row also zeros
        # the corresponding lm_head row, forcing logit = 0 for the pad token.
        # This is higher than the (negative) logits for real tokens in models
        # like Gemma, causing the decoder to emit <pad> and produce gibberish.
        # Skip entirely if eos_token == pad_token to avoid zeroing EOS embedding.
        eos_token_id = (
            getattr(tokenizer, "eos_token_id", None) if tokenizer is not None else None
        )
        pad_token_id = (
            getattr(tokenizer, "pad_token_id", None) if tokenizer is not None else None
        )
        if tokenizer is not None and eos_token_id != pad_token_id:
            lm_head = getattr(model, "lm_head", None)
            lm_head_weight = (
                getattr(lm_head, "weight", None) if lm_head is not None else None
            )
            with torch.no_grad():
                for name, module in model.named_modules():
                    if type(module) is torch.nn.Embedding:
                        if (
                            getattr(module, "weight", None) is not None
                            and getattr(module, "padding_idx", None) is not None
                        ):
                            if module.padding_idx < module.weight.shape[0]:
                                # Skip if tied to lm_head
                                if (
                                    lm_head_weight is not None
                                    and module.weight.data_ptr()
                                    == lm_head_weight.data_ptr()
                                ):
                                    continue
                                module.weight[module.padding_idx] = 0
        return model, tokenizer

    @staticmethod
    def post_patch(model, tokenizer, correct_dtype = None):
        model, tokenizer = patch_model_and_tokenizer(
            model, tokenizer, downcast_rope = True, correct_dtype = correct_dtype
        )
        return model, tokenizer

    @staticmethod
    def get_peft_model(
        model,
        r = 16,
        target_modules = [
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
        ],
        lora_alpha = 16,
        lora_dropout = 0.0,
        bias = "none",
        layers_to_transform = None,
        layers_pattern = None,
        use_gradient_checkpointing = "unsloth",
        random_state = 3407,
        max_seq_length = 2048,  # not used anymore
        use_rslora = False,
        modules_to_save = None,
        init_lora_weights = True,
        loftq_config = {},
        temporary_location = "_unsloth_temporary_saved_buffers",
        qat_scheme = None,
        target_parameters = None,  # For MoE expert layers (nn.Parameter)
        ensure_weight_tying = False,
        **kwargs,
    ):
        if os.environ.get("UNSLOTH_USE_NEW_MODEL", "0") == "1":
            # Check for other PEFT args in kwargs
            for peft_arg, flag in (
                ("finetune_vision_layers", False),
                ("finetune_language_layers", True),
                ("finetune_attention_modules", True),
                ("finetune_mlp_modules", True),
            ):
                if peft_arg not in kwargs:
                    kwargs[peft_arg] = flag
            return FastBaseModel.get_peft_model(
                model = model,
                r = r,
                target_modules = target_modules,
                lora_alpha = lora_alpha,
                lora_dropout = lora_dropout,
                bias = bias,
                layers_to_transform = layers_to_transform,
                layers_pattern = layers_pattern,
                use_gradient_checkpointing = use_gradient_checkpointing,
                random_state = random_state,
                max_seq_length = max_seq_length,
                use_rslora = use_rslora,
                modules_to_save = modules_to_save,
                init_lora_weights = init_lora_weights,
                loftq_config = loftq_config,
                temporary_location = temporary_location,
                target_parameters = target_parameters,
                ensure_weight_tying = ensure_weight_tying,
                **kwargs,
            )
        if os.environ.get("UNSLOTH_ENABLE_FULL_FINETUNING", "0") == "1":
            print(
                "Unsloth: Full finetuning is enabled, so .get_peft_model has no effect"
            )
            return model
        transformers_set_seed(random_state)

        # Apply gradient checkpointing with smart heuristics
        max_seq = getattr(model, "max_seq_length", 512)
        dtype = model.get_input_embeddings().weight.dtype
        use_gradient_checkpointing = apply_unsloth_gradient_checkpointing(
            use_gradient_checkpointing, max_seq, dtype
        )

        if type(r) is not int:
            raise TypeError(f"Unsloth: Rank of {str(r)} must be an integer.")
        if r <= 0:
            raise TypeError(f"Unsloth: Rank of {str(r)} must be larger than 0.")

        if isinstance(model, PeftModelForCausalLM) or isinstance(
            model, PeftModelForSequenceClassification
        ):
            # Check if exactly the same and then pass through!
            assert hasattr(model, "peft_config")

            peft_config = model.peft_config["default"].to_dict()
            check_parameters = [
                "r",
                "lora_alpha",
                "lora_dropout",
                "bias",
                "layers_to_transform",
                "layers_pattern",
                "use_rslora",
                "init_lora_weights",
            ]
            check_all = True
            for param in check_parameters:
                check_all = check_all and (peft_config[param] == eval(param))

            # Check save_modules
            old_target_modules = list(peft_config["target_modules"])
            modules_to_save = peft_config["modules_to_save"]
            if modules_to_save is None:
                modules_to_save = {}
            modules_to_save = list(modules_to_save)
            old_target_modules += modules_to_save

            # Combine all
            new_target_modules = list(target_modules) + list(
                modules_to_save if modules_to_save is not None else []
            )

            # Now check!
            new_target_modules = set(new_target_modules)
            check_all = check_all and (
                len(set(old_target_modules) ^ new_target_modules) == 0
            )

            check_all = check_all and (
                (loftq_config == {} or loftq_config is None)
                and (
                    peft_config["loftq_config"] == {}
                    or peft_config["loftq_config"] is None
                )
            )

            if check_all:
                # Simply pass through!
                logger.warning(
                    "Unsloth: Already have LoRA adapters! We shall skip this step."
                )

                # Offload!
                # [TODO] First offload lm_head and embed_tokens to CPU (should be disk!!)
                if "embed_tokens" in new_target_modules:
                    print(
                        "Unsloth: Training embed_tokens in mixed precision to save VRAM"
                    )

                    _offload_frozen_module_for_training(
                        model.get_input_embeddings(), DEVICE_TYPE_TORCH
                    )

                if "lm_head" in new_target_modules:
                    print("Unsloth: Training lm_head in mixed precision to save VRAM")

                    _offload_frozen_module_for_training(
                        model.get_output_embeddings(), DEVICE_TYPE_TORCH
                    )

                return model
            else:
                raise TypeError(
                    "Unsloth: Your model already has LoRA adapters. Your new parameters are different."
                )

        if loftq_config is None:
            loftq_config = {}

        signature = str(inspect.signature(LoraConfig))
        SUPPORTS_LOFTQ = "loftq_config" in signature
        SUPPORTS_RSLORA = "use_rslora" in signature

        if lora_dropout != 0:
            logger.warning_once(
                f"Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = {lora_dropout}.\n"
                f"Unsloth will patch all other layers, except LoRA matrices, causing a performance hit."
            )

        if bias != "none":
            logger.warning_once(
                f"Unsloth: bias = `none` is supported for fast patching. You are using bias = {bias}.\n"
                f"Unsloth will patch all other layers, except LoRA matrices, causing a performance hit."
            )

        if not (
            type(init_lora_weights) is bool
            or init_lora_weights == "gaussian"
            or init_lora_weights == "loftq"
            or init_lora_weights == "corda"
        ):
            raise ValueError(
                'Unsloth: `init_lora_weights` must be either [True, False, "gaussian", "loftq", "corda"].'
            )

        if init_lora_weights == "loftq":
            if not SUPPORTS_LOFTQ:
                import peft

                raise RuntimeError(
                    f"Unsloth: Your PEFT version of {peft.__version__} does not support LoftQ init.\n"
                    "Please install PEFT 0.7.2 or higher.\n"
                    "You can also install from source: `pip install git+https://github.com/huggingface/peft.git"
                )

            if loftq_config == {}:
                from peft import LoftQConfig

                logger.warning_once(
                    "Unsloth: init_lora_weights = `loftq` is set, but `loftq_config` is None.\n"
                    "We shall use `loftq_config = LoftQConfig(loftq_bits = 4, loftq_iter = 1)`."
                )
                loftq_config = LoftQConfig(loftq_bits = 4, loftq_iter = 1)

            if hasattr(model.config, "quantization_config"):
                raise ValueError(
                    "Unsloth: You are using `loftq` init, yet `load_in_4bit = True` was set.\n"
                    "Reload your model without any quantization by setting `load_in_4bit = False`."
                )

        assert type(use_rslora) is bool
        if use_rslora:
            if not SUPPORTS_RSLORA:
                # We manually check for PEFT
                import peft

                raise RuntimeError(
                    f"Unsloth: Your PEFT version of {peft.__version__} does not support `use_rslora`.\n"
                    "Please install PEFT 0.7.2 or higher.\n"
                    "You can also install from source: `pip install git+https://github.com/huggingface/peft.git"
                )

        accepted_modules = frozenset(
            (
                "lm_head",
                "q_proj",
                "k_proj",
                "v_proj",
                "o_proj",
                "gate_proj",
                "up_proj",
                "down_proj",
            ),
        )
        model.config.update({"unsloth_version": __version__})

        if type(modules_to_save) is tuple:
            modules_to_save = list(modules_to_save)

        train_lm_head = False
        train_embed_tokens = False
        final_modules = []
        for module in target_modules:
            if module == "embed_tokens":
                # logger.warning_once(
                #     "Unsloth: `embed_tokens` should be placed in `modules_to_save` and not `target_modules`. "\
                #     "Luckily, we shall do it for you!"
                # )
                train_embed_tokens = True
                if modules_to_save is None:
                    modules_to_save = ["embed_tokens"]
                else:
                    modules_to_save.append("embed_tokens")

            else:
                try:
                    assert module in accepted_modules
                    final_modules.append(module)
                except AssertionError as e:
                    final_modules.append(module)
                    print(
                        "Unsloth: You added custom modules, but Unsloth hasn't optimized for this.\n"
                        "Beware - your finetuning might be noticeably slower!"
                    )
                pass

        # Check if we added new tokens!
        if hasattr(model, "_need_to_train_embeddings"):
            # Check if embed_tokens/lm_head are already being trained
            # (either as LoRA targets in final_modules or via modules_to_save)
            _embed_already_trained = (
                train_embed_tokens or "embed_tokens" in final_modules
            )
            _lm_head_already_trained = train_lm_head or "lm_head" in final_modules
            if not _lm_head_already_trained or not _embed_already_trained:
                print(
                    "Unsloth: You added new tokens but did not specify if you wanted to "
                    "train the lm_head and embed_tokens.\nWe must turn it on for you."
                )

                # Only add to modules_to_save if not already a LoRA target
                if not _embed_already_trained:
                    train_embed_tokens = True
                    if modules_to_save is None:
                        modules_to_save = ["embed_tokens"]
                    elif "embed_tokens" not in modules_to_save:
                        modules_to_save.append("embed_tokens")

                if not _lm_head_already_trained:
                    train_lm_head = True
                    if modules_to_save is None:
                        modules_to_save = ["lm_head"]
                    elif "lm_head" not in modules_to_save:
                        modules_to_save.append("lm_head")

        # Check for Llama-3
        # if hasattr(model._saved_temp_tokenizer, "_using_llama3_template"):
        #     if not train_embed_tokens and not train_lm_head:
        #         raise RuntimeError("")

        # First fix untrained tokens
        # Wrong - can cause reserved tokens to pop out!!
        # if train_embed_tokens or train_lm_head:
        #     fix_untrained_tokens(model, eps = 1e-16)
        # pass

        # Check modules_to_save
        if modules_to_save is not None:
            for module in modules_to_save:
                if module == "lm_head":
                    train_lm_head = True
                elif module == "embed_tokens":
                    train_embed_tokens = True
                else:
                    raise TypeError(
                        f"Unsloth: Module = {module} is not allowed. Only 'lm_head' and 'embed_tokens' is allowed."
                    )
        if isinstance(modules_to_save, (tuple, list)):
            modules_to_save = list(set(modules_to_save))

        vllm_engine = None
        if hasattr(model, "vllm_engine"):
            # Fast inference!
            vllm_engine = model.vllm_engine
            vllm_fast_generate = model.fast_generate
            vllm_fast_generate_batches = model.fast_generate_batches

            if modules_to_save is not None:
                raise NotImplementedError(
                    "Unsloth: Currently fast inference does not work with training embeddings or lm_head."
                )

            if bias != "none":
                raise NotImplementedError(
                    "Unsloth: Currently fast inference does not work with using biases for LoRA."
                )

        # Does not get lora yet, so get name from model, not base model
        is_classification = "Classification" in str(type(model))

        # Auto-detect MoE models and populate target_parameters for expert layers
        if target_parameters is None:
            target_parameters = get_moe_target_parameters(model, target_modules)

        arguments = dict(
            r = r,
            lora_alpha = lora_alpha,
            target_modules = final_modules,
            lora_dropout = lora_dropout,
            bias = bias,
            task_type = TaskType.CAUSAL_LM if not is_classification else TaskType.SEQ_CLS,
            layers_to_transform = layers_to_transform,
            init_lora_weights = init_lora_weights,
            loftq_config = loftq_config,
            use_rslora = use_rslora,
            modules_to_save = modules_to_save,
            target_parameters = target_parameters,
            ensure_weight_tying = ensure_weight_tying,
            **kwargs,
        )
        if not SUPPORTS_LOFTQ:
            del arguments["loftq_config"]
        if not SUPPORTS_RSLORA:
            del arguments["use_rslora"]

        _saved_temp_tokenizer = model._saved_temp_tokenizer

        lora_config = LoraConfig(**arguments)
        # First offload lm_head and embed_tokens to disk
        input_embeddings_device = model.get_input_embeddings().weight.device
        if is_classification:
            output_embeddings_device = model.score.weight.device
        else:
            output_embeddings_device = model.get_output_embeddings().weight.device

        if use_gradient_checkpointing == "unsloth":
            if train_embed_tokens:
                print("Unsloth: Offloading input_embeddings to disk to save VRAM")
                offload_input_embeddings(model, temporary_location)

            # Remove old items to save VRAM
            for _ in range(3):
                gc.collect()
                clean_gpu_cache()

            if train_lm_head:
                print("Unsloth: Offloading output_embeddings to disk to save VRAM")
                offload_output_embeddings(model, temporary_location)

            # Remove old items to save VRAM
            for _ in range(3):
                gc.collect()
                clean_gpu_cache()

        model = _get_peft_model(model, lora_config)
        # Fix LoraConfig.auto_mapping is None
        fix_lora_auto_mapping(model)

        # Apply QAT + LoRA if specified
        if qat_scheme is not None:
            print("Unsloth: Applying QAT to mitigate quantization degradation")
            model = FastLlamaModel._prepare_for_qat(model, qat_scheme)

        model._saved_temp_tokenizer = _saved_temp_tokenizer

        model = FastLlamaModel.patch_peft_model(model, use_gradient_checkpointing)

        if ensure_weight_tying:
            try:
                input_embeddings = model.get_input_embeddings()
                output_embeddings = model.get_output_embeddings()

                if input_embeddings is not None and output_embeddings is not None:

                    def _retie_parameter(target_module, source_module):
                        if not hasattr(source_module, "weight"):
                            return
                        weight = source_module.weight
                        # Remove existing registration to avoid "attribute already exists"
                        if "weight" in getattr(target_module, "_parameters", {}):
                            target_module._parameters.pop("weight")
                        if hasattr(target_module, "weight"):
                            try:
                                delattr(target_module, "weight")
                            except Exception as exc:
                                logger.warning_once(
                                    f"Unsloth: Could not delete existing weight attr during retie on "
                                    f"{type(target_module).__name__}: {exc}"
                                )
                        target_module.register_parameter("weight", weight)

                    # Tie trainable copies created by ModulesToSaveWrapper first (these are used in forward)
                    if hasattr(input_embeddings, "modules_to_save") and hasattr(
                        output_embeddings, "modules_to_save"
                    ):
                        if hasattr(
                            input_embeddings.modules_to_save, "default"
                        ) and hasattr(output_embeddings.modules_to_save, "default"):
                            _retie_parameter(
                                output_embeddings.modules_to_save.default,
                                input_embeddings.modules_to_save.default,
                            )

                    # Tie original_module references as well if present
                    if hasattr(input_embeddings, "original_module") and hasattr(
                        output_embeddings, "original_module"
                    ):
                        _retie_parameter(
                            output_embeddings.original_module,
                            input_embeddings.original_module,
                        )
            except Exception as e:
                logger.warning_once(
                    f"Unsloth: Failed to ensure weight tying between embeddings and lm_head: {e}"
                )

        if train_embed_tokens:
            print("Unsloth: Training embed_tokens in mixed precision to save VRAM")
            assert hasattr(model.get_input_embeddings(), "modules_to_save")

            _offload_frozen_module_for_training(
                model.get_input_embeddings(), DEVICE_TYPE_TORCH, offload_device = None
            )

        if train_lm_head:
            print("Unsloth: Training lm_head in mixed precision to save VRAM")
            assert hasattr(model.get_output_embeddings(), "modules_to_save")

            _offload_frozen_module_for_training(
                model.get_output_embeddings(), DEVICE_TYPE_TORCH, offload_device = None
            )

        # Patch tokenizer to pad to the right
        internal_model = model
        while hasattr(internal_model, "model"):
            if hasattr(internal_model, "_saved_temp_tokenizer"):
                internal_model._saved_temp_tokenizer.padding_side = "right"
            # Also set is_loaded_in_8bit to disable incorrect DDP
            internal_model.is_loaded_in_8bit = True
            internal_model = internal_model.model
        if hasattr(internal_model, "_saved_temp_tokenizer"):
            internal_model._saved_temp_tokenizer.padding_side = "right"
        # Also set is_loaded_in_8bit to disable incorrect DDP
        internal_model.is_loaded_in_8bit = True

        # Clear deleted GPU items
        for _ in range(3):
            gc.collect()
            clean_gpu_cache()

        patch_peft_fast_inference(model)

        # Add for_inference and for_training
        model.for_training = functools.partial(FastLlamaModel.for_training, model)
        model.for_inference = functools.partial(FastLlamaModel.for_inference, model)
        m = model
        while hasattr(m, "model"):
            m.for_training = functools.partial(FastBaseModel.for_training, m)
            m.for_inference = functools.partial(FastBaseModel.for_inference, m)
            m = m.model
        return model

    @staticmethod
    def patch_peft_model(
        model,
        use_gradient_checkpointing = "unsloth",
    ):
        if os.environ.get("UNSLOTH_USE_NEW_MODEL", "0") == "1":
            return FastBaseModel.patch_peft_model(
                model = model,
                use_gradient_checkpointing = use_gradient_checkpointing,
            )
        if not isinstance(model, PeftModelForCausalLM) and not isinstance(
            model, PeftModelForSequenceClassification
        ):
            raise TypeError(
                "Unsloth: Your model needs to call `.get_peft_model` first!"
            )

        # Get activation function
        model_type = model.config.model_type

        if model_type == "llama":
            apply_lora_mlp = apply_lora_mlp_swiglu
        elif model_type == "mistral":
            apply_lora_mlp = apply_lora_mlp_swiglu
        elif model_type == "qwen2":
            apply_lora_mlp = apply_lora_mlp_swiglu
        elif model_type == "gemma":
            apply_lora_mlp = apply_lora_mlp_geglu_approx
        elif model_type == "gemma2":
            apply_lora_mlp = apply_lora_mlp_geglu_approx
        elif model_type == "cohere":
            apply_lora_mlp = apply_lora_mlp_swiglu
        elif model_type == "granite":
            apply_lora_mlp = apply_lora_mlp_swiglu
        elif model_type == "qwen3":
            apply_lora_mlp = apply_lora_mlp_swiglu
        elif model_type == "falcon_h1":
            apply_lora_mlp = apply_lora_mlp_swiglu
        elif model_type == "qwen3moe":
            apply_lora_mlp = apply_lora_mlp_swiglu
        else:
            raise NotImplementedError(f"Unsloth: {model_type} is not yet implemented!")

        model = prepare_model_for_kbit_training(
            model,
            use_gradient_checkpointing = use_gradient_checkpointing,
            use_reentrant = True,
        )

        # Fix up config for transformers uploading PEFT
        for active_adapter in model.peft_config.keys():
            # Not necessary since we requires transformers >= 4.37
            if False:
                name = model.peft_config[active_adapter].base_model_name_or_path
                if name.startswith("unsloth/") and name.endswith("-bnb-4bit"):
                    name = name[: len(name) - len("-bnb-4bit")]
                    model.peft_config[active_adapter].base_model_name_or_path = name
                pass
            # Add revision to enable future fast inference paths
            # [TODO] Bugs out!see https://github.com/unslothai/unsloth/issues/492
            # model.peft_config[active_adapter].revision = f"unsloth"

        from transformers.trainer import Trainer

        if Trainer._inner_training_loop.__name__ != "_fast_inner_training_loop":
            raise RuntimeError(
                "Unsloth: Unsuccessfully patched Trainer! Please file a bug report!"
            )

        # Fix loftq issues
        # loftq_config must not = None, but rather {}
        all_configs = model.peft_config
        for key, current_config in all_configs.items():
            if (
                hasattr(current_config, "loftq_config")
                and current_config.loftq_config is None
            ):
                new_args = current_config.__dict__
                new_args["loftq_config"] = {}
                current_config = current_config.__class__(**new_args)
                all_configs[key] = current_config

        # Do patching
        n_mlp = 0
        n_qkv = 0
        n_o = 0

        active_adapter = (
            model.active_adapters[0]
            if hasattr(model, "active_adapters")
            else model.active_adapter
        )

        # Get dropout and bias
        lora_dropout = model.peft_config[active_adapter].lora_dropout
        bias = model.peft_config[active_adapter].bias

        # We also do not inplace edit QKV for Cohere!
        _apply_lora_mlp = (
            functools.partial(apply_lora_mlp, inplace = False)
            if model_type == "cohere"
            else apply_lora_mlp
        )

        if lora_dropout == 0 and bias == "none":
            for idx, layer in enumerate(model.model.model.layers):
                if model_type != "falcon_h1":
                    # LoRAMLP.apply doesn't have functionality for gate and down multipliers yet.
                    # Don't patch falcon h1 for the time being.

                    # MLP patching
                    mlp_module = layer.mlp
                    gate_proj = mlp_module.gate_proj
                    up_proj = mlp_module.up_proj
                    down_proj = mlp_module.down_proj

                    if (
                        hasattr(gate_proj, "lora_A")
                        and hasattr(up_proj, "lora_A")
                        and hasattr(down_proj, "lora_A")
                        and (getattr(gate_proj, "base_layer", gate_proj).bias is None)
                        and (getattr(up_proj, "base_layer", up_proj).bias is None)
                        and (getattr(down_proj, "base_layer", down_proj).bias is None)
                        and (
                            len(getattr(gate_proj, "lora_magnitude_vector", []) or [])
                            == 0
                        )
                        and (
                            len(getattr(up_proj, "lora_magnitude_vector", []) or [])
                            == 0
                        )
                        and (
                            len(getattr(down_proj, "lora_magnitude_vector", []) or [])
                            == 0
                        )
                    ):
                        # https://stackoverflow.com/questions/50599045/python-replacing-a-function-within-a-class-of-a-module
                        if hasattr(mlp_module, "_unsloth_forward"):
                            # then we've patched the mlp to use TiledMLP
                            mlp_module._unsloth_forward = types.MethodType(
                                _apply_lora_mlp, mlp_module
                            )
                        else:
                            mlp_module.forward = types.MethodType(
                                _apply_lora_mlp, mlp_module
                            )
                        n_mlp += 1
                    else:
                        logger.warning_once(
                            "Not an error, but Unsloth cannot patch MLP layers with our manual autograd engine since either LoRA adapters\n"
                            "are not enabled or a bias term (like in Qwen) is used."
                        )

                # QKV attention patching
                q_proj = layer.self_attn.q_proj
                k_proj = layer.self_attn.k_proj
                v_proj = layer.self_attn.v_proj
                if (
                    hasattr(q_proj, "lora_A")
                    and hasattr(k_proj, "lora_A")
                    and hasattr(v_proj, "lora_A")
                    and (getattr(q_proj, "base_layer", q_proj).bias is None)
                    and (getattr(k_proj, "base_layer", k_proj).bias is None)
                    and (getattr(v_proj, "base_layer", v_proj).bias is None)
                    and (len(getattr(q_proj, "lora_magnitude_vector", []) or []) == 0)
                    and (len(getattr(k_proj, "lora_magnitude_vector", []) or []) == 0)
                    and (len(getattr(v_proj, "lora_magnitude_vector", []) or []) == 0)
                ):
                    layer.self_attn.apply_qkv = apply_lora_qkv
                    n_qkv += 1
                else:
                    if model_type == "qwen2":
                        n_qkv += 1
                    else:
                        logger.warning_once(
                            "Not an error, but Unsloth cannot patch Attention layers with our manual autograd engine since either LoRA adapters\n"
                            "are not enabled or a bias term (like in Qwen) is used."
                        )

                # O attention patching
                o_proj = layer.self_attn.o_proj
                if (
                    hasattr(o_proj, "lora_A")
                    and (getattr(o_proj, "base_layer", o_proj).bias is None)
                    and (len(getattr(o_proj, "lora_magnitude_vector", []) or []) == 0)
                ):
                    layer.self_attn.apply_o = apply_lora_o
                    n_o += 1
                else:
                    logger.warning_once(
                        "Not an error, but Unsloth cannot patch O projection layer with our manual autograd engine since either LoRA adapters\n"
                        "are not enabled or a bias term (like in Qwen) is used."
                    )

        logger.warning_once(
            f"Unsloth {__version__} patched {len(model.model.model.layers)} layers with "
            f"{n_qkv} QKV layers, {n_o} O layers and {n_mlp} MLP layers.",
        )
        patch_saving_functions(model)

        # Patch cross entropy loss labels
        # Fixes https://github.com/unslothai/unsloth/issues/10
        max_seq_length = model.max_seq_length
        # extra_ignored_labels = torch.full((max_seq_length, 1), -100, device = "cuda:0")
        # model.model.extra_ignored_labels = extra_ignored_labels
        internal_model = model
        while hasattr(internal_model, "model"):
            internal_model.max_seq_length = max_seq_length
            internal_model = internal_model.model
        internal_model.max_seq_length = max_seq_length
        # Save to modules as well
        for module in model.modules():
            module.max_seq_length = max_seq_length

        # Patch tokenizer to pad to the right
        internal_model = model
        while hasattr(internal_model, "model"):
            if hasattr(internal_model, "_saved_temp_tokenizer"):
                internal_model._saved_temp_tokenizer.padding_side = "right"
            internal_model = internal_model.model
        if hasattr(internal_model, "_saved_temp_tokenizer"):
            internal_model._saved_temp_tokenizer.padding_side = "right"

        # Clear deleted GPU items
        for _ in range(3):
            gc.collect()
            clean_gpu_cache()

        patch_peft_fast_inference(model)

        # Add for_inference and for_training
        model.for_training = functools.partial(FastLlamaModel.for_training, model)
        model.for_inference = functools.partial(FastLlamaModel.for_inference, model)
        m = model
        while hasattr(m, "model"):
            m.for_training = functools.partial(FastBaseModel.for_training, m)
            m.for_inference = functools.partial(FastBaseModel.for_inference, m)
            m = m.model
        return model

    @staticmethod
    def for_inference(model):
        if not hasattr(model, "parameters"):
            raise TypeError(
                "Unsloth: I think you're passing a tokenizer, not the model to for_inference!"
            )

        def _for_inference(m):
            if hasattr(m, "gradient_checkpointing"):
                m.gradient_checkpointing = False
            if hasattr(m, "training"):
                m.training = False
            # Pad tokenizer to the left
            if hasattr(m, "_saved_temp_tokenizer"):
                m._saved_temp_tokenizer.padding_side = "left"
            # Set a flag for generation!
            m._flag_for_generation = True

        m = model
        while hasattr(m, "model"):
            _for_inference(m)
            m = m.model
        _for_inference(m)
        model.eval()  # to turn off training on modules deeper in

        # Since transformers 4.53, must turn off explicitly
        for module in model.modules():
            if hasattr(module, "gradient_checkpointing"):
                module.gradient_checkpointing = False

        # Also disable training for embeddings for NEFTune
        if hasattr(model, "get_input_embeddings"):
            embeddings = model.get_input_embeddings()
            if hasattr(embeddings, "training"):
                embeddings.training = False
        if hasattr(model, "get_output_embeddings"):
            embeddings = model.get_output_embeddings()
            if hasattr(embeddings, "training"):
                embeddings.training = False
        return model

    @staticmethod
    def for_training(model, use_gradient_checkpointing = True):
        if not hasattr(model, "parameters"):
            raise TypeError(
                "Unsloth: I think you're passing a tokenizer, not the model to for_training!"
            )

        # Delete all fast inference loras
        for param in model.parameters():
            if hasattr(param, "_fast_lora"):
                del param._fast_lora

        def _for_training(m):
            if hasattr(m, "gradient_checkpointing"):
                m.gradient_checkpointing = use_gradient_checkpointing
            if hasattr(m, "training"):
                m.training = True
            # Pad tokenizer to the left
            if hasattr(m, "_saved_temp_tokenizer"):
                m._saved_temp_tokenizer.padding_side = "right"
            # Set a flag for generation!
            if hasattr(m, "_flag_for_generation"):
                del m._flag_for_generation

        m = model
        while hasattr(m, "model"):
            _for_training(m)
            m = m.model
        _for_training(m)
        model.train()  # to turn on training on modules deeper in

        # Since transformers 4.53, must turn on explicitly
        for module in model.modules():
            if hasattr(module, "gradient_checkpointing"):
                module.gradient_checkpointing = use_gradient_checkpointing

        # Also re-enable training for embeddings for NEFTune
        if hasattr(model, "get_input_embeddings"):
            embeddings = model.get_input_embeddings()
            if hasattr(embeddings, "training"):
                embeddings.training = True
        if hasattr(model, "get_output_embeddings"):
            embeddings = model.get_output_embeddings()
            if hasattr(embeddings, "training"):
                embeddings.training = True
        return model


from .rl import PatchFastRL

PatchFastRL(FastLanguageModel = FastLlamaModel)


================================================
FILE: unsloth/models/llama4.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# from unsloth_studio.models import patch_llama4
# patch_llama4()


================================================
FILE: unsloth/models/loader.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from ._utils import (
    _prepare_model_for_qat,
    is_bfloat16_supported,
    is_vLLM_available,
    HAS_FLASH_ATTENTION,
    HAS_FLASH_ATTENTION_SOFTCAPPING,
    USE_MODELSCOPE,
    get_transformers_model_type,
    hf_login,
)
from .granite import FastGraniteModel
from .llama import FastLlamaModel, logger
from .mistral import FastMistralModel
from .qwen2 import FastQwen2Model
from .qwen3 import FastQwen3Model
from .qwen3_moe import FastQwen3MoeModel
from .cohere import FastCohereModel
from transformers import AutoConfig
from transformers import __version__ as transformers_version
from peft import PeftConfig, PeftModel
from .loader_utils import (
    _get_fp8_mode_and_check_settings,
    _offline_quantize_to_fp8,
    _tag_model_with_fp8_torchao_config,
    get_model_name,
    prepare_device_map,
)
import os, contextlib, sys

try:
    from huggingface_hub import get_token
except:
    try:
        from huggingface_hub.utils import get_token
    except:
        # For older versions of huggingface_hub
        from huggingface_hub.utils._token import get_token
from huggingface_hub import HfFileSystem
import importlib.util
from ..device_type import (
    is_hip,
    get_device_type,
    DEVICE_TYPE,
    DEVICE_TYPE_TORCH,
    DEVICE_COUNT,
    ALLOW_PREQUANTIZED_MODELS,
    ALLOW_BITSANDBYTES,
)

# https://github.com/huggingface/transformers/pull/26037 allows 4 bit loading!
from unsloth_zoo.utils import Version, _get_dtype
from unsloth_zoo.hf_utils import dtype_from_config
from unsloth_zoo.tiled_mlp import patch_tiled_mlp

transformers_version = Version(transformers_version)
SUPPORTS_FOURBIT = transformers_version >= Version("4.37")
SUPPORTS_GEMMA = transformers_version >= Version("4.38")
SUPPORTS_GEMMA2 = transformers_version >= Version("4.42")
SUPPORTS_LLAMA31 = transformers_version >= Version("4.43.2")
SUPPORTS_LLAMA32 = transformers_version > Version("4.45.0")
SUPPORTS_GRANITE = transformers_version >= Version("4.46.0")
SUPPORTS_QWEN3 = transformers_version >= Version("4.50.3")
SUPPORTS_QWEN3_MOE = transformers_version >= Version("4.50.3")
SUPPORTS_FALCON_H1 = transformers_version >= Version("4.53.0")
SUPPORTS_GEMMA3N = transformers_version >= Version("4.53.0")
SUPPORTS_GPTOSS = transformers_version >= Version("4.55.0")
# Transformers v5 meta-device loading corrupts non-persistent buffers (inv_freq).
# See _fix_rope_inv_freq() below for details.
_NEEDS_ROPE_FIX = transformers_version >= Version("5.0.0")
if SUPPORTS_GEMMA:
    from .gemma import FastGemmaModel
if SUPPORTS_GEMMA2:
    from .gemma2 import FastGemma2Model
if SUPPORTS_FALCON_H1:
    from .falcon_h1 import FastFalconH1Model
import torch
from ._utils import (
    patch_compiling_bitsandbytes,
    patch_model_and_tokenizer,
    prepare_model_for_kbit_training,
    apply_unsloth_gradient_checkpointing,
    patch_compiled_autograd,
    process_vision_info,
    unsloth_compile_transformers,
    fast_inference_setup,
)

global FORCE_FLOAT32
# Forces float32 precision since float16 goes to infinity
FORCE_FLOAT32 = [
    "gemma3,",  # Add comma bc gemma3 will match gemma3n
    "gemma3text",  # Gemma3TextModel (EmbeddingGemma, standalone text-only Gemma3)
    "gemma3n",
    "gpt_oss",
    "qwen3_5",  # Qwen3.5 GDN layers produce NaN grad norms in float16 training
]

global DISABLE_COMPILE_MODEL_NAMES
# Must be alphabetically sorted for each entry
DISABLE_COMPILE_MODEL_NAMES = [
    "aya_vision",
    "modernbert",
    "granite,llava_next",  # Granite-vision 3
]

global DISABLE_SDPA_MODEL_NAMES
# Disables some SDPA modules since it's wrong
DISABLE_SDPA_MODEL_NAMES = [
    "gemma3,",  # Add comma bc gemma3 will match gemma3n
    "gemma3_text",  # Gemma3TextModel (EmbeddingGemma) - substring match, keep underscore
]


def _fix_rope_inv_freq(model):
    """Fix inv_freq corruption caused by transformers v5 meta-device loading.

    Transformers v5 initializes models on the meta device, then
    _move_missing_keys_from_meta_to_device() (modeling_utils.py) replaces ALL
    non-persistent buffers with torch.empty_like() -- uninitialized memory.

    Vanilla transformers restores inv_freq via _init_weights() which checks for
    hasattr(module, "original_inv_freq"). Unsloth's LlamaRotaryEmbedding and
    subclasses do not have this attribute, so inv_freq stays corrupted. This
    produces wrong positional encodings and causes 5-11x higher training loss.

    This function recomputes inv_freq from the stored base and dim, applies
    any model-specific scaling, and rebuilds the cos/sin caches.

    Only runs on transformers >= 5.0.0. No-op on v4.
    """
    if not _NEEDS_ROPE_FIX:
        return model

    for name, module in model.named_modules():
        # Unsloth's LlamaRotaryEmbedding and subclasses (Extended, LinearScaling,
        # Granite). Native v5 rotary classes (Gemma3, etc.) have original_inv_freq
        # which v5's _init_weights() uses to restore inv_freq, so they are fine.
        if (
            hasattr(module, "inv_freq")
            and hasattr(module, "base")
            and hasattr(module, "dim")
            and hasattr(module, "_apply_inv_freq_scaling")
            and hasattr(module, "multi_gpu_cos_cached")
        ):
            inv_freq = 1.0 / (
                module.base
                ** (
                    torch.arange(
                        0, module.dim, 2, dtype = torch.int64, device = "cpu"
                    ).float()
                    / module.dim
                )
            )
            inv_freq = module._apply_inv_freq_scaling(inv_freq)
            module.inv_freq = inv_freq
            for device_idx in range(len(module.multi_gpu_cos_cached)):
                if module.multi_gpu_cos_cached[device_idx] is not None:
                    module._set_cos_sin_cache(
                        seq_len = module.current_rope_size,
                        device = torch.device(device_idx),
                        dtype = torch.get_default_dtype(),
                    )

        # LongRopeRotaryEmbedding (Phi-3.5 style with short_inv_freq + long_inv_freq)
        elif (
            hasattr(module, "short_inv_freq")
            and hasattr(module, "long_inv_freq")
            and hasattr(module, "base")
            and hasattr(module, "dim")
        ):
            config = getattr(model, "config", None)
            rope_scaling = getattr(config, "rope_scaling", None) if config else None
            if rope_scaling is not None:
                short_factor = rope_scaling.get("short_factor", None)
                long_factor = rope_scaling.get("long_factor", None)
                if short_factor is not None and long_factor is not None:
                    inv_freq_shape = (
                        torch.arange(
                            0, module.dim, 2, dtype = torch.int64, device = "cpu"
                        ).float()
                        / module.dim
                    )
                    sf = torch.tensor(short_factor, device = "cpu", dtype = torch.float32)
                    lf = torch.tensor(long_factor, device = "cpu", dtype = torch.float32)
                    module.short_inv_freq = 1.0 / (sf * module.base**inv_freq_shape)
                    module.long_inv_freq = 1.0 / (lf * module.base**inv_freq_shape)

                    dtype = torch.bfloat16 if is_bfloat16_supported() else torch.float16
                    t = torch.arange(
                        module.original_max_position_embeddings,
                        device = module.short_inv_freq.device,
                        dtype = torch.int64,
                    ).float()
                    freqs = torch.outer(t, module.short_inv_freq)
                    emb = torch.cat((freqs, freqs), dim = -1)
                    for device_idx in range(len(module.multi_gpu_short_cos_cached)):
                        if module.multi_gpu_short_cos_cached[device_idx] is not None:
                            device_obj = torch.device(device_idx)
                            module.multi_gpu_short_cos_cached[device_idx] = (
                                emb.cos() * module.scaling_factor
                            ).to(dtype = dtype, device = device_obj, non_blocking = True)
                            module.multi_gpu_short_sin_cached[device_idx] = (
                                emb.sin() * module.scaling_factor
                            ).to(dtype = dtype, device = device_obj, non_blocking = True)
    return model


class FastLanguageModel(FastLlamaModel):
    @staticmethod
    def from_pretrained(
        model_name = "unsloth/Llama-3.2-1B-Instruct",
        max_seq_length = 2048,
        dtype = None,
        load_in_4bit = True,  # 4bit QLoRA
        load_in_8bit = False,  # 8bit  LoRA
        load_in_16bit = False,  # 16bit LoRA
        full_finetuning = False,
        token = None,
        device_map = "sequential",
        rope_scaling = None,
        fix_tokenizer = True,
        trust_remote_code = False,
        use_gradient_checkpointing = "unsloth",
        resize_model_vocab = None,
        revision = None,
        use_exact_model_name = False,
        offload_embedding = False,
        float32_mixed_precision = None,  # Forces float32 mixed precision
        fast_inference = False,  # uses vLLM
        gpu_memory_utilization = 0.5,
        float8_kv_cache = False,
        random_state = 3407,
        max_lora_rank = 64,
        disable_log_stats = True,
        qat_scheme = None,
        load_in_fp8 = False,  # fp8 LoRA (True, False, 'block')
        unsloth_tiled_mlp = False,
        *args,
        **kwargs,
    ):
        # Respect user-provided quantization_config (e.g. BitsAndBytesConfig)
        quantization_config = kwargs.get("quantization_config", None)
        if quantization_config is not None:
            if isinstance(quantization_config, dict):
                q_load_in_4bit = quantization_config.get("load_in_4bit", False)
                q_load_in_8bit = quantization_config.get("load_in_8bit", False)
            else:
                q_load_in_4bit = getattr(quantization_config, "load_in_4bit", False)
                q_load_in_8bit = getattr(quantization_config, "load_in_8bit", False)
            if q_load_in_4bit:
                load_in_4bit = True
                load_in_8bit = False
            if q_load_in_8bit:
                load_in_8bit = True
                load_in_4bit = False

        # Login to allow private models
        token = hf_login(token)
        # Align dtype with bnb_4bit_compute_dtype if provided and dtype is unset.
        if dtype is None and quantization_config is not None:
            bnb_compute_dtype = None
            if isinstance(quantization_config, dict):
                if quantization_config.get("load_in_4bit", False):
                    bnb_compute_dtype = quantization_config.get(
                        "bnb_4bit_compute_dtype", None
                    )
            else:
                if getattr(quantization_config, "load_in_4bit", False):
                    bnb_compute_dtype = getattr(
                        quantization_config, "bnb_4bit_compute_dtype", None
                    )
            if isinstance(bnb_compute_dtype, str):
                bnb_compute_dtype = getattr(torch, bnb_compute_dtype, None)
            if isinstance(bnb_compute_dtype, torch.dtype):
                dtype = bnb_compute_dtype

        # Distributed-safe device placement for quantized models.
        # In multi-GPU (torchrun), each rank must load the model on its own device
        # to avoid Accelerate device relocation errors with quantized weights.
        is_quantized = load_in_4bit or load_in_8bit or load_in_fp8
        if is_quantized and isinstance(device_map, str):
            distributed_device_map, is_dist = prepare_device_map()
            if is_dist:
                device_map = distributed_device_map

        if load_in_8bit or full_finetuning or qat_scheme is not None:
            return FastModel.from_pretrained(
                model_name = model_name,
                max_seq_length = max_seq_length,
                dtype = dtype,
                load_in_4bit = load_in_4bit,
                load_in_8bit = load_in_8bit,
                load_in_16bit = load_in_16bit,
                full_finetuning = full_finetuning,
                token = token,
                device_map = device_map,
                rope_scaling = rope_scaling,  # [TODO] No effect
                fix_tokenizer = fix_tokenizer,  # [TODO] No effect
                trust_remote_code = trust_remote_code,
                use_gradient_checkpointing = use_gradient_checkpointing,
                resize_model_vocab = resize_model_vocab,  # [TODO] No effect
                revision = revision,
                return_logits = False,  # Return logits
                fullgraph = True,  # No graph breaks
                use_exact_model_name = use_exact_model_name,
                offload_embedding = offload_embedding,
                float32_mixed_precision = float32_mixed_precision,
                # Pass vLLM/inference parameters
                fast_inference = fast_inference,
                gpu_memory_utilization = gpu_memory_utilization,
                float8_kv_cache = float8_kv_cache,
                random_state = random_state,
                max_lora_rank = max_lora_rank,
                disable_log_stats = disable_log_stats,
                qat_scheme = qat_scheme,
                load_in_fp8 = load_in_fp8,
                unsloth_tiled_mlp = unsloth_tiled_mlp,
                *args,
                **kwargs,
            )

        if isinstance(dtype, str) and dtype in ["float16", "bfloat16"]:
            dtype = getattr(torch, dtype)
        assert (
            dtype is None
            or dtype == torch.float16
            or dtype == torch.bfloat16
            or dtype == torch.float32
        )

        if fast_inference:
            if importlib.util.find_spec("vllm") is None:
                raise ImportError(
                    "Unsloth: Please install vLLM before enabling `fast_inference`!\n"
                    "You can do this in a terminal via `pip install vllm`"
                )
            if DEVICE_TYPE_TORCH == "cuda":
                for i in range(DEVICE_COUNT):
                    # [TODO] DGX Spark vLLM breaks
                    if "NVIDIA GB10" in str(torch.cuda.get_device_name(i)).upper():
                        print(
                            "Unsloth: DGX Spark detected - `fast_inference=True` is currently broken as of January 2026.\n"
                            "Defaulting to native Unsloth inference."
                        )
                        fast_inference = False
                        break

        # Check if 4bit is allowed specifically for AMD
        if not ALLOW_BITSANDBYTES and not use_exact_model_name:
            if load_in_4bit or load_in_8bit or model_name.lower().endswith("-bnb-4bit"):
                print(
                    "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now."
                )
            load_in_4bit = False

        # Find FP8, BnB 4bit, other mapped names
        old_model_name = model_name
        fp8_mode = None
        if not use_exact_model_name:
            new_model_name = get_model_name(
                model_name,
                load_in_4bit = load_in_4bit,
                load_in_fp8 = load_in_fp8,
                token = token,
                trust_remote_code = trust_remote_code,
            )
            if new_model_name is None and load_in_fp8 != False:
                fp8_mode = _get_fp8_mode_and_check_settings(
                    load_in_fp8,
                    fast_inference,
                    full_finetuning,
                    load_in_4bit,
                    load_in_8bit,
                    load_in_16bit,
                )
                model_name = _offline_quantize_to_fp8(model_name, fp8_mode)
            else:
                assert new_model_name is not None
                model_name = new_model_name
                # If mapper resolved to a pre-quantized FP8 model, disable
                # on-the-fly quantization to avoid double quantization
                if load_in_fp8 != False and new_model_name != old_model_name:
                    load_in_fp8 = False

        # Check if pre-quantized models are allowed
        # AMD Instinct GPUs need blocksize = 128 on bitsandbytes < 0.49.2 (our pre-quants use blocksize = 64)
        if not ALLOW_PREQUANTIZED_MODELS and model_name.lower().endswith(
            ("-unsloth-bnb-4bit", "-bnb-4bit")
        ):
            model_name = model_name.lower().removesuffix("-unsloth-bnb-4bit")
            model_name = model_name.lower().removesuffix("-bnb-4bit")
        # Change -BF16 to all False for 4bit, 8bit etc
        if model_name.lower().endswith("-bf16"):
            load_in_4bit = False
            load_in_8bit = False
            load_in_fp8 = False
            load_in_16bit = True

        if USE_MODELSCOPE and not os.path.exists(model_name):
            from modelscope import snapshot_download

            model_name = snapshot_download(model_name)

        # First check if it's a normal model via AutoConfig
        from huggingface_hub.utils import (
            disable_progress_bars,
            enable_progress_bars,
            are_progress_bars_disabled,
        )

        was_disabled = are_progress_bars_disabled()
        disable_progress_bars()

        autoconfig_error = None
        peft_error = None
        model_config = None
        peft_config = None
        try:
            model_config = AutoConfig.from_pretrained(
                model_name,
                token = token,
                revision = revision,
                trust_remote_code = trust_remote_code,
            )
            is_model = True
        except ImportError:
            raise
        except Exception as error:
            autoconfig_error = str(error)
            if "architecture" in autoconfig_error:
                if "qwen3_5" in autoconfig_error:
                    raise ImportError(
                        f"Unsloth: Your transformers version of {transformers_version} does not support Qwen3.5.\n"
                        f"The minimum required version is 5.2.0.\n"
                        f'Try `pip install --upgrade "transformers>=5.2.0"`\n'
                        f"to obtain the latest transformers build, then restart this session."
                    )
                raise ValueError(
                    f"`{model_name}` is not supported yet in `transformers=={transformers_version}`.\n"
                    f"Please update transformers via `pip install --upgrade transformers` and try again."
                )
            is_model = False
        try:
            peft_config = PeftConfig.from_pretrained(
                model_name,
                token = token,
                revision = revision,
                trust_remote_code = trust_remote_code,
            )
            is_peft = True
        except ImportError:
            raise
        except Exception as error:
            peft_error = str(error)
            if "architecture" in peft_error:
                raise ValueError(
                    f"`{model_name}` is not supported yet in `transformers=={transformers_version}`.\n"
                    f"Please update transformers via `pip install --upgrade transformers` and try again."
                )
            is_peft = False

        # Old transformers versions check
        both_exist = (is_model and is_peft) and not SUPPORTS_LLAMA32

        # Error out if both LoRA and normal model config exists.
        if both_exist:
            raise RuntimeError(
                "Unsloth: Your repo has a LoRA adapter and a base model.\n"
                "You have 2 files `config.json` and `adapter_config.json`.\n"
                "We must only allow one config file.\n"
                "Please separate the LoRA and base models to 2 repos."
            )
        model_types = get_transformers_model_type(
            peft_config if peft_config is not None else model_config,
            trust_remote_code = trust_remote_code,
        )
        if len(model_types) == 1:
            model_type = model_types[0]
        else:
            # Leave as tuple if more than one arch
            model_type = model_types

        # New transformers need to check manually.
        if SUPPORTS_LLAMA32:
            # Check if folder exists locally
            if os.path.isdir(model_name):
                exist_adapter_config = os.path.exists(
                    os.path.join(model_name, "adapter_config.json")
                )
                exist_config = os.path.exists(os.path.join(model_name, "config.json"))
                both_exist = exist_adapter_config and exist_config
            else:
                # Because HfFileSystem assumes linux paths, we need to set the path with forward slashes, even on Windows.
                files = HfFileSystem(token = token).glob(f"{model_name}/*.json")
                files = list(os.path.split(x)[-1] for x in files)
                if (
                    sum(x == "adapter_config.json" or x == "config.json" for x in files)
                    >= 2
                ):
                    both_exist = True

        if not is_model and not is_peft:
            error = autoconfig_error if autoconfig_error is not None else peft_error
            # Old transformers version
            if "rope_scaling" in error.lower() and not SUPPORTS_LLAMA31:
                raise ImportError(
                    f"Unsloth: Your transformers version of {transformers_version} does not support new RoPE scaling methods.\n"
                    f"This includes Llama 3.1. The minimum required version is 4.43.2\n"
                    f'Try `pip install --upgrade "transformers>=4.43.2"`\n'
                    f"to obtain the latest transformers build, then restart this session."
                )
            # Create a combined error message showing both failures
            combined_error = (
                "Unsloth: Failed to load model. Both AutoConfig and PeftConfig loading failed.\n\n"
                f"AutoConfig error: {autoconfig_error}\n\n"
                f"PeftConfig error: {peft_error}\n\n"
            )
            raise RuntimeError(combined_error)

        # Get base model for PEFT:
        if is_peft:
            # Check base model again for PEFT
            model_name = peft_config.base_model_name_or_path
            if not use_exact_model_name:
                model_name = get_model_name(
                    model_name,
                    load_in_4bit = load_in_4bit,
                    load_in_fp8 = load_in_fp8,
                    token = token,
                    trust_remote_code = trust_remote_code,
                )
            # Check if pre-quantized models are allowed
            # AMD Instinct GPUs need blocksize = 128 on bitsandbytes < 0.49.2 (our pre-quants use blocksize = 64)
            if not ALLOW_PREQUANTIZED_MODELS and model_name.lower().endswith(
                ("-unsloth-bnb-4bit", "-bnb-4bit")
            ):
                model_name = model_name.lower().removesuffix("-unsloth-bnb-4bit")
                model_name = model_name.lower().removesuffix("-bnb-4bit")
            # Change -BF16 to all False for 4bit, 8bit etc
            if model_name.lower().endswith("-bf16"):
                load_in_4bit = False
                load_in_8bit = False
                load_in_fp8 = False
                load_in_16bit = True

            model_config = AutoConfig.from_pretrained(
                model_name,
                token = token,
                trust_remote_code = trust_remote_code,
            )

        if not was_disabled:
            enable_progress_bars()

        if model_type == "llama":
            scaling_type = None
            if getattr(model_config, "rope_scaling", None) is not None:
                scaling_type1 = model_config.rope_scaling.get("type", None)
                scaling_type2 = model_config.rope_scaling.get("rope_type", None)
                scaling_type = (
                    scaling_type1 if scaling_type1 is not None else scaling_type2
                )

            if scaling_type == "llama3" and not SUPPORTS_LLAMA31:
                raise ImportError(
                    f"Unsloth: Your transformers version of {transformers_version} does not support Llama 3.1.\n"
                    f"The minimum required version is 4.43.2\n"
                    f'Try `pip install --upgrade "transformers>=4.43.2"`\n'
                    f"to obtain the latest transformers build, then restart this session."
                )

            dispatch_model = FastLlamaModel

        elif model_type == "mistral":
            dispatch_model = FastMistralModel
        elif model_type == "gemma":
            if not SUPPORTS_GEMMA:
                raise ImportError(
                    f"Unsloth: Your transformers version of {transformers_version} does not support Gemma.\n"
                    f"The minimum required version is 4.38.\n"
                    f'Try `pip install --upgrade "transformers>=4.38"`\n'
                    f"to obtain the latest transformers build, then restart this session."
                )
            dispatch_model = FastGemmaModel
        elif model_type == "gemma2":
            if not SUPPORTS_GEMMA2:
                raise ImportError(
                    f"Unsloth: Your transformers version of {transformers_version} does not support Gemma2.\n"
                    f"The minimum required version is 4.42.3.\n"
                    f'Try `pip install --upgrade "transformers>=4.42.3"`\n'
                    f"to obtain the latest transformers build, then restart this session."
                )
            # Also check for softcapping support in flash-attn which is faster!
            if is_bfloat16_supported() and not HAS_FLASH_ATTENTION:
                print(
                    "Unsloth: If you want to finetune Gemma 2, install flash-attn to make it faster!\n"
                    "To install flash-attn, do the below:\n"
                    '\npip install --no-deps --upgrade "flash-attn>=2.6.3"'
                )
            elif HAS_FLASH_ATTENTION and not HAS_FLASH_ATTENTION_SOFTCAPPING:
                print(
                    "Unsloth: If you want to finetune Gemma 2, upgrade flash-attn to version 2.6.3 or higher!\n"
                    "Newer versions support faster and less memory usage kernels for Gemma 2's attention softcapping!\n"
                    "To update flash-attn, do the below:\n"
                    '\npip install --no-deps --upgrade "flash-attn>=2.6.3"'
                )

            dispatch_model = FastGemma2Model
        elif model_type == "qwen2":
            dispatch_model = FastQwen2Model
        elif model_type == "qwen3":  # or model_type == "qwen3_moe":
            if not SUPPORTS_QWEN3 or not SUPPORTS_QWEN3_MOE:
                raise ImportError(
                    f"Unsloth: Your transformers version of {transformers_version} does not support Qwen3.\n"
                    f"The minimum required version is 4.50.3.\n"
                    f'Try `pip install --upgrade "transformers>=4.50.3"`\n'
                    f"to obtain the latest transformers build, then restart this session."
                )
            dispatch_model = (
                FastQwen3Model if model_type == "qwen3" else FastQwen3MoeModel
            )
        # elif model_type == "falcon_h1":
        #     dispatch_model = FastFalconH1Model
        #     if not SUPPORTS_FALCON_H1:
        #         raise ImportError(
        #             f"Unsloth: Your transformers version of {transformers_version} does not support FalconH1.\n"\
        #             f"The minimum required version is 4.50.3.\n"\
        #             f'Try `pip install --upgrade "transformers>=4.50.3"`\n'\
        #             f"to obtain the latest transformers build, then restart this session."\
        #         )
        # Temporary disable optimized Cohere until errors match
        # elif model_type == "cohere":
        #     dispatch_model = FastCohereModel
        # Temporary disable optimized Granite until errors match
        # elif model_type == "granite":
        #     dispatch_model = FastGraniteModel
        else:
            return FastModel.from_pretrained(
                model_name = old_model_name,
                max_seq_length = max_seq_length,
                dtype = dtype,
                load_in_4bit = load_in_4bit,
                load_in_8bit = load_in_8bit,
                load_in_16bit = load_in_16bit,
                full_finetuning = full_finetuning,
                token = token,
                device_map = device_map,
                rope_scaling = rope_scaling,  # [TODO] No effect
                fix_tokenizer = fix_tokenizer,  # [TODO] No effect
                trust_remote_code = trust_remote_code,
                use_gradient_checkpointing = use_gradient_checkpointing,
                resize_model_vocab = resize_model_vocab,  # [TODO] No effect
                revision = revision,
                return_logits = False,  # Return logits
                fullgraph = True,  # No graph breaks
                use_exact_model_name = use_exact_model_name,
                offload_embedding = offload_embedding,
                float32_mixed_precision = float32_mixed_precision,
                # Pass vLLM/inference parameters
                fast_inference = fast_inference,
                gpu_memory_utilization = gpu_memory_utilization,
                float8_kv_cache = float8_kv_cache,
                random_state = random_state,
                max_lora_rank = max_lora_rank,
                disable_log_stats = disable_log_stats,
                qat_scheme = qat_scheme,
                load_in_fp8 = load_in_fp8,
                unsloth_tiled_mlp = unsloth_tiled_mlp,
                *args,
                **kwargs,
            )

        # Apply gradient checkpointing with smart heuristics
        use_gradient_checkpointing = apply_unsloth_gradient_checkpointing(
            use_gradient_checkpointing, max_seq_length, dtype
        )

        # Check if this is local model since the tokenizer gets overwritten
        if (
            os.path.exists(os.path.join(old_model_name, "tokenizer_config.json"))
            and os.path.exists(os.path.join(old_model_name, "tokenizer.json"))
            and os.path.exists(os.path.join(old_model_name, "special_tokens_map.json"))
        ):
            tokenizer_name = old_model_name
        else:
            tokenizer_name = kwargs.pop("tokenizer_name", None)

        if fast_inference:
            fast_inference, model_name = fast_inference_setup(model_name, model_config)

        load_in_4bit_kwargs = load_in_4bit
        load_in_8bit_kwargs = load_in_8bit
        if quantization_config is not None and not fast_inference:
            load_in_4bit_kwargs = False
            load_in_8bit_kwargs = False

        model, tokenizer = dispatch_model.from_pretrained(
            model_name = model_name,
            max_seq_length = max_seq_length,
            dtype = _get_dtype(dtype),
            load_in_4bit = load_in_4bit_kwargs,
            token = token,
            device_map = device_map,
            rope_scaling = rope_scaling,
            fix_tokenizer = fix_tokenizer,
            model_patcher = dispatch_model,
            tokenizer_name = tokenizer_name,
            trust_remote_code = trust_remote_code,
            revision = revision if not is_peft else None,
            fast_inference = fast_inference,
            gpu_memory_utilization = gpu_memory_utilization,
            float8_kv_cache = float8_kv_cache,
            random_state = random_state,
            max_lora_rank = max_lora_rank,
            disable_log_stats = disable_log_stats,
            load_in_fp8 = load_in_fp8,
            *args,
            **kwargs,
        )

        if resize_model_vocab is not None:
            model.resize_token_embeddings(resize_model_vocab)

        # In case the model supports tagging, add the unsloth tag.
        if hasattr(model, "add_model_tags"):
            model.add_model_tags(
                [
                    "unsloth",
                ]
            )
        if hasattr(tokenizer, "add_model_tags"):
            tokenizer.add_model_tags(
                [
                    "unsloth",
                ]
            )

        if load_in_4bit:
            # Fix up bitsandbytes config, but respect user-provided quantization_config
            if quantization_config is None:
                compute_dtype = dtype_from_config(model.config)
                quantization_config = {
                    # Sometimes compute_dtype is not a string!!
                    "bnb_4bit_compute_dtype": compute_dtype,
                    "bnb_4bit_quant_type": "nf4",
                    "bnb_4bit_use_double_quant": True,
                    "llm_int8_enable_fp32_cpu_offload": False,
                    "llm_int8_has_fp16_weight": False,
                    "llm_int8_skip_modules": None,
                    "llm_int8_threshold": 6.0,
                    "load_in_4bit": True,
                    "load_in_8bit": False,
                    "quant_method": "bitsandbytes",
                }
                model.config.update({"quantization_config": quantization_config})
            else:
                if hasattr(quantization_config, "to_dict"):
                    model.config.update(
                        {"quantization_config": quantization_config.to_dict()}
                    )
                elif isinstance(quantization_config, dict):
                    model.config.update({"quantization_config": quantization_config})

        if load_in_fp8 != False:
            _tag_model_with_fp8_torchao_config(model, fp8_mode)

        if is_peft:
            # From https://github.com/huggingface/peft/issues/184
            # Now add PEFT adapters
            model = PeftModel.from_pretrained(
                model,
                old_model_name,
                token = token,
                revision = revision,
                is_trainable = True,
                trust_remote_code = trust_remote_code,
            )
            # Patch it as well!
            model = dispatch_model.patch_peft_model(model, use_gradient_checkpointing)

        # Patch Tiled MLP
        # to turn on set UNSLOTH_TILED_MLP to "arctic", "target", or "target:{GB}""
        patch_tiled_mlp_choice = os.environ.get(
            "UNSLOTH_TILED_MLP", "arctic" if unsloth_tiled_mlp else "0"
        )
        if patch_tiled_mlp_choice != "0" or unsloth_tiled_mlp:
            patch_tiled_mlp(model, patch_options_str = patch_tiled_mlp_choice)

        model = _fix_rope_inv_freq(model)
        return model, tokenizer


from ..kernels import (
    patch_loss_functions,
    post_patch_loss_function,
)
from .vision import FastBaseModel
from transformers import (
    AutoModelForCausalLM,
)

try:
    from transformers import AutoModelForImageTextToText

    AutoModelForVision2Seq = AutoModelForImageTextToText
except:
    from transformers import AutoModelForVision2Seq


class FastModel(FastBaseModel):
    @staticmethod
    def _prepare_for_qat(model, qat_scheme):
        model = _prepare_model_for_qat(model, qat_scheme)
        return model

    @staticmethod
    def from_pretrained(
        model_name = "unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit",
        max_seq_length = 2048,
        dtype = None,
        load_in_4bit = True,  # 4bit QLoRA
        load_in_8bit = False,  # 8bit  LoRA
        load_in_16bit = False,  # 16bit LoRA
        full_finetuning = False,
        token = None,
        device_map = "sequential",
        rope_scaling = None,  # [TODO] No effect
        fix_tokenizer = True,  # [TODO] No effect
        trust_remote_code = False,
        use_gradient_checkpointing = "unsloth",
        resize_model_vocab = None,  # [TODO] No effect
        revision = None,
        return_logits = False,  # Return logits
        fullgraph = True,  # No graph breaks
        use_exact_model_name = False,
        auto_model = None,
        whisper_language = None,
        whisper_task = None,
        unsloth_force_compile = False,
        offload_embedding = False,
        float32_mixed_precision = None,  # Forces float32 mixed precision
        # Add the missing vLLM/inference parameters
        fast_inference = False,  # uses vLLM
        gpu_memory_utilization = 0.5,
        float8_kv_cache = False,
        random_state = 3407,
        max_lora_rank = 64,
        disable_log_stats = True,
        qat_scheme = None,
        load_in_fp8 = False,  # fp8 LoRA (True, False, 'block')
        unsloth_tiled_mlp = False,
        target_parameters = None,  # For MoE expert parameters
        *args,
        **kwargs,
    ):
        # Respect user-provided quantization_config (e.g. BitsAndBytesConfig)
        quantization_config = kwargs.get("quantization_config", None)
        if quantization_config is not None:
            if isinstance(quantization_config, dict):
                q_load_in_4bit = quantization_config.get("load_in_4bit", False)
                q_load_in_8bit = quantization_config.get("load_in_8bit", False)
            else:
                q_load_in_4bit = getattr(quantization_config, "load_in_4bit", False)
                q_load_in_8bit = getattr(quantization_config, "load_in_8bit", False)
            if q_load_in_4bit:
                load_in_4bit = True
                load_in_8bit = False
            if q_load_in_8bit:
                load_in_8bit = True
                load_in_4bit = False

        # Login to allow private models
        token = hf_login(token)
        if whisper_language is not None:
            assert type(whisper_language) is str
        if whisper_task is not None:
            assert type(whisper_task) is str
        # Align dtype with bnb_4bit_compute_dtype if provided and dtype is unset.
        if dtype is None and quantization_config is not None:
            bnb_compute_dtype = None
            if isinstance(quantization_config, dict):
                if quantization_config.get("load_in_4bit", False):
                    bnb_compute_dtype = quantization_config.get(
                        "bnb_4bit_compute_dtype", None
                    )
            else:
                if getattr(quantization_config, "load_in_4bit", False):
                    bnb_compute_dtype = getattr(
                        quantization_config, "bnb_4bit_compute_dtype", None
                    )
            if isinstance(bnb_compute_dtype, str):
                bnb_compute_dtype = getattr(torch, bnb_compute_dtype, None)
            if isinstance(bnb_compute_dtype, torch.dtype):
                dtype = bnb_compute_dtype
        SUPPORTS_BFLOAT16 = is_bfloat16_supported()
        if dtype is None:
            dtype = torch.float16 if not SUPPORTS_BFLOAT16 else torch.bfloat16
        elif dtype == torch.bfloat16 and not SUPPORTS_BFLOAT16:
            logger.warning_once(
                "Device does not support bfloat16. Will change to float16."
            )
            dtype = torch.float16
        assert dtype in (torch.float16, torch.bfloat16, torch.float32)
        assert load_in_fp8 in (True, False, "block")

        patch_compiled_autograd()
        patch_compiling_bitsandbytes()

        if full_finetuning and (load_in_4bit or load_in_8bit):
            print(
                "Unsloth: You selected full finetuning support, but 4bit / 8bit is enabled - disabling LoRA / QLoRA."
            )
            load_in_4bit = False
            load_in_8bit = False
            load_in_fp8 = False
            load_in_16bit = False

        if (
            int(load_in_4bit)
            + int(load_in_8bit)
            + int(load_in_16bit)
            + int(load_in_fp8 != False)
            >= 2
        ):
            raise RuntimeError(
                "Unsloth: Can only load in 4bit or 8bit or 16bit, not a combination!\n"
                "Also, we by default set `load_in_4bit = True`.\n"
                "If you want 8bit finetuning, set both `load_in_4bit = False` and `load_in_8bit = True`\n"
                "If you want 16bit LoRA finetuning, set `load_in_16bit = True`"
            )

        if qat_scheme is not None and not full_finetuning:
            raise ValueError(
                "Specifying `qat_scheme` in `FastLanguageModel.from_pretrained(...)` is only "
                "compatible with `full_finetuning=True`. If you wish to use QAT with LoRA, "
                "please pass in `qat_scheme` in `FastLanguageModel.get_peft_model(...)` instead."
            )
        if qat_scheme == "phone-deployment":
            qat_scheme = "int8-int4"

        # Distributed-safe device placement for quantized models.
        # In multi-GPU (torchrun), each rank must load the model on its own device
        # to avoid Accelerate device relocation errors with quantized weights.
        is_quantized = load_in_4bit or load_in_8bit or load_in_fp8
        if is_quantized and isinstance(device_map, str):
            distributed_device_map, is_dist = prepare_device_map()
            if is_dist:
                device_map = distributed_device_map

        # Check if 4bit is allowed specifically for AMD
        if not ALLOW_BITSANDBYTES and not use_exact_model_name:
            if load_in_4bit or load_in_8bit or model_name.lower().endswith("-bnb-4bit"):
                print(
                    "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now."
                )
            load_in_4bit = False

        if fast_inference:
            if importlib.util.find_spec("vllm") is None:
                raise ImportError(
                    "Unsloth: Please install vLLM before enabling `fast_inference`!\n"
                    "You can do this in a terminal via `pip install vllm`"
                )
            if DEVICE_TYPE_TORCH == "cuda":
                for i in range(DEVICE_COUNT):
                    # [TODO] DGX Spark vLLM breaks
                    if "NVIDIA GB10" in str(torch.cuda.get_device_name(i)).upper():
                        print(
                            "Unsloth: DGX Spark detected - `fast_inference=True` is currently broken as of January 2026.\n"
                            "Defaulting to native Unsloth inference."
                        )
                        fast_inference = False
                        break

        # Find FP8, BnB 4bit, other mapped names
        old_model_name = model_name
        fp8_mode = None
        if not use_exact_model_name:
            new_model_name = get_model_name(
                model_name, load_in_4bit = load_in_4bit, load_in_fp8 = load_in_fp8
            )
            if new_model_name is None and load_in_fp8 != False:
                fp8_mode = _get_fp8_mode_and_check_settings(
                    load_in_fp8,
                    fast_inference,
                    full_finetuning,
                    load_in_4bit,
                    load_in_8bit,
                    load_in_16bit,
                )
                model_name = _offline_quantize_to_fp8(model_name, fp8_mode)
            else:
                assert new_model_name is not None
                model_name = new_model_name
                # If mapper resolved to a pre-quantized FP8 model, disable
                # on-the-fly quantization to avoid double quantization
                if load_in_fp8 != False and new_model_name != old_model_name:
                    load_in_fp8 = False

        # Check if pre-quantized models are allowed
        # AMD Instinct GPUs need blocksize = 128 on bitsandbytes < 0.49.2 (our pre-quants use blocksize = 64)
        if not ALLOW_PREQUANTIZED_MODELS and model_name.lower().endswith(
            ("-unsloth-bnb-4bit", "-bnb-4bit")
        ):
            model_name = model_name.lower().removesuffix("-unsloth-bnb-4bit")
            model_name = model_name.lower().removesuffix("-bnb-4bit")
        # Change -BF16 to all False for 4bit, 8bit etc
        if model_name.lower().endswith("-bf16"):
            load_in_4bit = False
            load_in_8bit = False
            load_in_fp8 = False
            load_in_16bit = True

        # Check modelscope
        if USE_MODELSCOPE and not os.path.exists(model_name):
            from modelscope import snapshot_download

            model_name = snapshot_download(model_name)

        # First check if it's a normal model via AutoConfig
        from huggingface_hub.utils import (
            disable_progress_bars,
            enable_progress_bars,
            are_progress_bars_disabled,
        )

        was_disabled = are_progress_bars_disabled()
        disable_progress_bars()

        autoconfig_error = None
        peft_error = None
        model_config = None
        peft_config = None
        try:
            model_config = AutoConfig.from_pretrained(
                model_name,
                token = token,
                revision = revision,
                trust_remote_code = trust_remote_code,
            )
            is_model = True
        except ImportError:
            raise
        except Exception as error:
            autoconfig_error = str(error)
            if "architecture" in autoconfig_error:
                if "qwen3_5" in autoconfig_error:
                    raise ImportError(
                        f"Unsloth: Your transformers version of {transformers_version} does not support Qwen3.5.\n"
                        f"The minimum required version is 5.2.0.\n"
                        f'Try `pip install --upgrade "transformers>=5.2.0"`\n'
                        f"to obtain the latest transformers build, then restart this session."
                    )
                raise ValueError(
                    f"`{model_name}` is not supported yet in `transformers=={transformers_version}`.\n"
                    f"Please update transformers via `pip install --upgrade transformers` and try again."
                )
            is_model = False
        try:
            peft_config = PeftConfig.from_pretrained(
                model_name,
                token = token,
                revision = revision,
                trust_remote_code = trust_remote_code,
            )
            is_peft = True
        except ImportError:
            raise
        except Exception as error:
            peft_error = str(error)
            if "architecture" in peft_error:
                raise ValueError(
                    f"`{model_name}` is not supported yet in `transformers=={transformers_version}`.\n"
                    f"Please update transformers via `pip install --upgrade transformers` and try again."
                )
            is_peft = False
        # Old transformers versions check
        both_exist = (is_model and is_peft) and not SUPPORTS_LLAMA32
        # Error out if both LoRA and normal model config exists.
        if both_exist:
            raise RuntimeError(
                "Unsloth: Your repo has a LoRA adapter and a base model.\n"
                "You have 2 files `config.json` and `adapter_config.json`.\n"
                "We must only allow one config file.\n"
                "Please separate the LoRA and base models to 2 repos."
            )
        model_types = get_transformers_model_type(
            peft_config if peft_config is not None else model_config,
            trust_remote_code = trust_remote_code,
        )
        model_types_all = ",".join(model_types) + ","

        # Save model types and loading method
        lowered_model_name = model_name.lower()
        string = os.environ.get("UNSLOTH_MODEL_NAME", "") + model_types_all
        if load_in_4bit:
            string += "_load_in_4bit_"
        if load_in_8bit:
            string += "_load_in_8bit_"
        if load_in_16bit:
            string += "_load_in_16bit_"
        if load_in_fp8:
            string += "load_in_fp8"
        os.environ["UNSLOTH_MODEL_NAME"] = string

        # Check versions
        LATEST = "\nPlease use transformers via `pip install --no-deps git+https://github.com/huggingface/transformers.git`"
        NIGHTLY = '\nPlease use nightly transformers via pip install --upgrade "transformers>=4.49.0"`'
        # Pixtral
        if "pixtral" in model_types_all and transformers_version < Version("4.49.0"):
            raise RuntimeError(
                "Unsloth: Pixtral only works on transformers >= 4.49.0." + LATEST
            )
        # Qwen 2.5
        elif "qwen2_5" in model_types_all and transformers_version < Version("4.49.0"):
            raise RuntimeError(
                "Unsloth: Qwen 2.5 only works on transformers >= 4.49.0." + LATEST
            )
        # Gemma 3N must be before Gemma 3
        elif "gemma3n" in model_types_all:
            if transformers_version < Version("4.53.0"):
                raise RuntimeError(
                    "Unsloth: Gemma 3N only works on transformers >= 4.53.0" + LATEST
                )
            os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
            os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = (
                "float16;torch.float16;torch.float16;"
                "if name.endswith('norm'): "
                "module._pre_set_compute_dtype = torch.float32\n"
                ";"
                "from unsloth_zoo.temporary_patches.gemma3n import patch_Gemma3nConv_Embed_forwards; patch_Gemma3nConv_Embed_forwards()"
            )
            # Set norms to float32 since anyways they get upcasted to float32
            # common in both gemma-3 and gemma-3n
            os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
        # Gemma 3
        elif "gemma3" in model_types_all:
            if transformers_version < Version("4.50.0.dev0"):
                raise RuntimeError(
                    "Unsloth: Gemma 3 only works on transformers >= 4.50.0." + NIGHTLY
                )
            # Set norms to float32 since anyways they get upcasted to float32
            # common in both gemma-3 and gemma-3n
            os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
            # ROCm/HIP: Gemma3 compiled forward produces NaN on RDNA GPUs
            # (gfx1100, gfx1101, gfx1102, gfx1150, gfx1151, etc.).
            # Disable torch.compile for model forward; loss compilation is fine.
            # See https://github.com/unslothai/unsloth/issues/3385
            from unsloth.kernels.utils import is_rdna

            if is_rdna():
                os.environ["UNSLOTH_COMPILE_DISABLE"] = "partial"
        # Cohere
        elif "cohere2" in model_types_all and transformers_version < Version(
            "4.50.0.dev0"
        ):
            raise RuntimeError(
                "Unsloth: Cohere's Command model only works on transformers >= 4.50.0."
                + NIGHTLY
            )
        # Sesame
        elif "csm" in model_types_all:
            os.environ["UNSLOTH_COMPILE_DISABLE"] = "partial"  # Inference is too slow
            os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"  # Sesame fails
            os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = (
                "all;torch.float32;torch.float16;"
                "if name.endswith(('_proj', 'fc1', 'fc2', 'codebook', 'head')): module.to(torch.float16)"
                ";"
            )
        # Granite 4
        elif "granitemoehybrid" in model_types_all:
            # Granite-4 rms norms are stored as 16 bit, but we upcast
            os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
            os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
        # Olmo 2
        elif "olmo2" in model_types_all and transformers_version < Version(
            "4.50.0.dev0"
        ):
            raise RuntimeError(
                "Unsloth: OLMo-2 only works on transformers >= 4.50.0." + NIGHTLY
            )
        elif "falcon_h1" in model_types_all:
            # Falcon must use float32 Triton ie TRITON_F32_DEFAULT = 'ieee'
            # since Mamba kernels error out on using lower precision
            os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = (
                "float16;torch.float32;torch.float16;"
                "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16)"
                ";"
                "os.environ['TRITON_F32_DEFAULT'] = 'ieee'"
            )
        elif "nemotron_h" in model_types_all:
            # NemotronH (hybrid Mamba-2 + Transformer) uses same Mamba kernels as Falcon-H1
            # Mamba kernels need float32 Triton precision
            os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = (
                "float16;torch.float32;torch.float16;"
                "if name.endswith(('q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'head')): module.to(torch.float16)"
                ";"
                "os.environ['TRITON_F32_DEFAULT'] = 'ieee'"
            )
        elif "gpt_oss" in model_types_all:
            os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
            if not load_in_4bit:
                # Only upcast MoE biases for MXFP4, not BnB
                # Set norms to float32 since anyways they get upcasted to float32
                os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = (
                    "all;None;None;"
                    "x = 'gate_up_proj_bias'\n"
                    "if hasattr(module, x): "
                    "setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"
                    ""
                    "x = 'down_proj_bias'\n"
                    "if hasattr(module, x): "
                    "setattr(module, x, torch.nn.Parameter(getattr(module, x).to(torch.float32)) if isinstance(getattr(module, x), torch.nn.Parameter) else getattr(module, x).to(torch.float32))\n"
                    ""
                    ";"
                )
            else:
                # Set down projection compute dtype to be float32 for float16 machines
                # Set norms to float32 since anyways they get upcasted to float32
                os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"] = (
                    "torch.float16;torch.bfloat16;torch.float16;"
                    "if ('down_projs' in name) and hasattr(module, 'weight') and "
                    "torch.amax(dequantize_module_weight(module)) >= 0:"
                    "module._pre_set_compute_dtype = torch.float32\n"
                    ""
                    "if ('mlp.router' in name) and hasattr(module, 'weight'):"
                    "module._pre_set_compute_dtype = torch.float32\n"
                    ";"
                )
            # Set norms to float32 since anyways they get upcasted to float32
            os.environ["UNSLOTH_HIGH_PRECISION_LAYERNORM"] = "1"
        else:
            for check_model_name in DISABLE_COMPILE_MODEL_NAMES:
                if check_model_name in lowered_model_name:
                    os.environ["UNSLOTH_COMPILE_DISABLE"] = "partial"
                    os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"
                    if transformers_version < Version("4.50.0.dev0"):
                        raise RuntimeError(
                            f"Unsloth: {check_model_name} only works on transformers >= 4.50.0."
                            + NIGHTLY
                        )
                    break

        if auto_model is not None:
            # All other models need to disable static cache
            os.environ["UNSLOTH_DISABLE_STATIC_GENERATION"] = "1"

        # New transformers need to check manually.
        if SUPPORTS_LLAMA32:
            # Check if folder exists locally
            if os.path.isdir(model_name):
                exist_adapter_config = os.path.exists(
                    os.path.join(model_name, "adapter_config.json")
                )
                exist_config = os.path.exists(os.path.join(model_name, "config.json"))
                both_exist = exist_adapter_config and exist_config
            else:
                files = HfFileSystem(token = token).glob(f"{model_name}/*.json")
                files = list(os.path.split(x)[-1] for x in files)
                if (
                    sum(x == "adapter_config.json" or x == "config.json" for x in files)
                    >= 2
                ):
                    both_exist = True

        if not is_model and not is_peft:
            error = autoconfig_error if autoconfig_error is not None else peft_error
            # Old transformers version
            if "rope_scaling" in error.lower() and not SUPPORTS_LLAMA31:
                raise ImportError(
                    f"Unsloth: Your transformers version of {transformers_version} does not support new RoPE scaling methods.\n"
                    f"This includes Llama 3.1. The minimum required version is 4.43.2\n"
                    f'Try `pip install --upgrade "transformers>=4.43.2"`\n'
                    f"to obtain the latest transformers build, then restart this session."
                )
            # Create a combined error message showing both failures
            combined_error = (
                "Unsloth: Failed to load model. Both AutoConfig and PeftConfig loading failed.\n\n"
                f"AutoConfig error: {autoconfig_error}\n\n"
                f"PeftConfig error: {peft_error}\n\n"
            )
            raise RuntimeError(combined_error)

        # Get base model for PEFT:
        if is_peft:
            # Check base model again for PEFT
            model_name = peft_config.base_model_name_or_path
            if not use_exact_model_name:
                model_name = get_model_name(model_name, load_in_4bit)
            # Check if pre-quantized models are allowed
            # AMD Instinct GPUs need blocksize = 128 on bitsandbytes < 0.49.2 (our pre-quants use blocksize = 64)
            if not ALLOW_PREQUANTIZED_MODELS and model_name.lower().endswith(
                ("-unsloth-bnb-4bit", "-bnb-4bit")
            ):
                model_name = model_name.lower().removesuffix("-unsloth-bnb-4bit")
                model_name = model_name.lower().removesuffix("-bnb-4bit")
            # Change -BF16 to all False for 4bit, 8bit etc
            if model_name.lower().endswith("-bf16"):
                load_in_4bit = False
                load_in_8bit = False
                load_in_fp8 = False
                load_in_16bit = True

            model_config = AutoConfig.from_pretrained(
                model_name,
                token = token,
                trust_remote_code = trust_remote_code,
            )

        if not was_disabled:
            enable_progress_bars()

        do_logging = os.environ.get("UNSLOTH_ENABLE_LOGGING", "0") == "1"
        if do_logging:
            redirector = contextlib.nullcontext()
        else:
            redirector = contextlib.redirect_stdout(open(os.devnull, "w"))

        model_types = ["siglip"] + model_types
        # Set forced float32 env flag
        os.environ["UNSLOTH_FORCE_FLOAT32"] = "0"
        do_forced_float32 = False
        for model_type_arch in model_types:
            if model_type_arch != "siglip":
                break
        global FORCE_FLOAT32
        for disable_name in FORCE_FLOAT32:
            # add comma to model_types_all matching in case of exact match for end
            if (
                disable_name.lower()
                == model_type_arch.lower().replace("-", "").replace("_", "")
                or disable_name.lower() in model_types_all
            ) and ((dtype == torch.float16) or not SUPPORTS_BFLOAT16):
                os.environ["UNSLOTH_FORCE_FLOAT32"] = "1"
                dtype = torch.bfloat16  # Change to bfloat16 loading
                break
        # Apply gradient checkpointing with smart heuristics
        use_gradient_checkpointing = apply_unsloth_gradient_checkpointing(
            use_gradient_checkpointing, max_seq_length, dtype
        )
        with redirector:
            patch_loss_functions(torch_compile = False)
            model_types, supports_sdpa = unsloth_compile_transformers(
                dtype = dtype,
                model_name = model_name,
                model_types = model_types,
                token = token,
                sdpa_dynamic_mask = True,
                sdpa_bool_masks = True,
                sdpa_gqa_replace = True,
                sdpa_dynamic_compile = True,
                compile_attention = True,
                disable_causal_masks = True,
                compile_torch_modules = True,
                compile_custom_modules = True,
                compile_function_calls = True,
                fuse_lm_head = True,
                gradient_checkpointing = True,
                manual_replacements = True,
                fast_lora_forwards = True,
                fast_residual_stream = False,
                accurate_accumulation = True,
                epilogue_fusion = True,
                max_autotune = False,
                shape_padding = True,
                cudagraphs = False,
                debug = False,
                fullgraph = fullgraph,
                import_from_cache = False,
                disable = False,
                return_logits = return_logits,
                trust_remote_code = trust_remote_code,
                unsloth_force_compile = unsloth_force_compile,
            )
        # Fix SDPA issues
        for model_type in DISABLE_SDPA_MODEL_NAMES:
            if model_type in model_types_all:
                supports_sdpa = False

        # Check if this is local model since the tokenizer gets overwritten
        if (
            os.path.exists(os.path.join(old_model_name, "tokenizer_config.json"))
            and os.path.exists(os.path.join(old_model_name, "tokenizer.json"))
            and os.path.exists(os.path.join(old_model_name, "special_tokens_map.json"))
        ):
            tokenizer_name = old_model_name
        else:
            tokenizer_name = kwargs.pop("tokenizer_name", None)

        # Check if VLM
        architectures = getattr(model_config, "architectures", None)
        if architectures is None:
            architectures = []
        is_vlm = any(x.endswith("ForConditionalGeneration") for x in architectures)
        is_vlm = is_vlm or hasattr(model_config, "vision_config")
        if auto_model is None:
            if is_vlm:
                # Check if the model's auto_map supports the VLM auto class.
                # Some VL models (e.g. Nemotron-VL) only register AutoModelForCausalLM
                # in their auto_map, not AutoModelForImageTextToText/AutoModelForVision2Seq.
                _auto_map = getattr(model_config, "auto_map", {}) or {}
                _vlm_class_name = AutoModelForVision2Seq.__name__
                if (
                    "AutoModelForCausalLM" in _auto_map
                    and _vlm_class_name not in _auto_map
                ):
                    auto_model = AutoModelForCausalLM
                else:
                    auto_model = AutoModelForVision2Seq
            else:
                auto_model = AutoModelForCausalLM

        load_in_4bit_kwargs = load_in_4bit
        load_in_8bit_kwargs = load_in_8bit
        if quantization_config is not None and not fast_inference:
            load_in_4bit_kwargs = False
            load_in_8bit_kwargs = False

        model, tokenizer = FastBaseModel.from_pretrained(
            model_name = model_name,
            max_seq_length = max_seq_length,
            dtype = _get_dtype(dtype),
            load_in_4bit = load_in_4bit_kwargs,
            load_in_8bit = load_in_8bit_kwargs,
            load_in_16bit = load_in_16bit,
            full_finetuning = full_finetuning,
            token = token,
            device_map = device_map,
            trust_remote_code = trust_remote_code,
            revision = revision if not is_peft else None,
            model_types = model_types,
            tokenizer_name = tokenizer_name,
            auto_model = auto_model,
            use_gradient_checkpointing = use_gradient_checkpointing,
            supports_sdpa = supports_sdpa,
            whisper_language = whisper_language,
            whisper_task = whisper_task,
            auto_config = model_config,
            offload_embedding = offload_embedding,
            float32_mixed_precision = float32_mixed_precision,
            # Pass vLLM/inference parameters
            fast_inference = fast_inference,
            gpu_memory_utilization = gpu_memory_utilization,
            float8_kv_cache = float8_kv_cache,
            random_state = random_state,
            max_lora_rank = max_lora_rank,
            disable_log_stats = disable_log_stats,
            load_in_fp8 = load_in_fp8,
            *args,
            **kwargs,
        )

        if resize_model_vocab is not None:
            model.resize_token_embeddings(resize_model_vocab)

        # In case the model supports tagging, add the unsloth tag.
        if hasattr(model, "add_model_tags"):
            model.add_model_tags(
                [
                    "unsloth",
                ]
            )
        if hasattr(tokenizer, "add_model_tags"):
            tokenizer.add_model_tags(
                [
                    "unsloth",
                ]
            )

        if load_in_4bit:
            # Fix up bitsandbytes config, but respect user-provided quantization_config
            if quantization_config is None:
                compute_dtype = dtype_from_config(model.config)
                quantization_config = {
                    # Sometimes compute_dtype is not a string!!
                    "bnb_4bit_compute_dtype": compute_dtype,
                    "bnb_4bit_quant_type": "nf4",
                    "bnb_4bit_use_double_quant": True,
                    "llm_int8_enable_fp32_cpu_offload": False,
                    "llm_int8_has_fp16_weight": False,
                    "llm_int8_skip_modules": None,
                    "llm_int8_threshold": 6.0,
                    "load_in_4bit": True,
                    "load_in_8bit": False,
                    "quant_method": "bitsandbytes",
                }
                model.config.update({"quantization_config": quantization_config})
            else:
                if hasattr(quantization_config, "to_dict"):
                    model.config.update(
                        {"quantization_config": quantization_config.to_dict()}
                    )
                elif isinstance(quantization_config, dict):
                    model.config.update({"quantization_config": quantization_config})

        if load_in_fp8 != False:
            _tag_model_with_fp8_torchao_config(model, fp8_mode)

        if is_peft:
            # From https://github.com/huggingface/peft/issues/184
            # Now add PEFT adapters
            model = PeftModel.from_pretrained(
                model,
                old_model_name,
                token = token,
                revision = revision,
                is_trainable = True,
                trust_remote_code = trust_remote_code,
            )
            # Patch it as well!
            model = FastBaseModel.post_patch_model(
                model, use_gradient_checkpointing, trust_remote_code = trust_remote_code
            )

        # Apply QAT if specified
        if qat_scheme is not None:
            print("Unsloth: Applying QAT to mitigate quantization degradation")
            model = FastModel._prepare_for_qat(model, qat_scheme)

        # Patch Tiled MLP
        # to turn on set UNSLOTH_TILED_MLP to "arctic", "target", or "target:{GB}""
        patch_tiled_mlp_choice = os.environ.get(
            "UNSLOTH_TILED_MLP", "arctic" if unsloth_tiled_mlp else "0"
        )
        if patch_tiled_mlp_choice != "0" or unsloth_tiled_mlp:
            patch_tiled_mlp(model, patch_options_str = patch_tiled_mlp_choice)

        model = _fix_rope_inv_freq(model)
        return model, tokenizer


class FastVisionModel(FastModel):
    pass


class FastTextModel(FastModel):
    pass


================================================
FILE: unsloth/models/loader_utils.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from ..device_type import DEVICE_TYPE_TORCH
import importlib
import os
import torch
import re
import tempfile
from typing import Union
from .mapper import (
    INT_TO_FLOAT_MAPPER,
    FLOAT_TO_INT_MAPPER,
    MAP_TO_UNSLOTH_16bit,
    FLOAT_TO_FP8_BLOCK_MAPPER,
    FLOAT_TO_FP8_ROW_MAPPER,
)

# https://github.com/huggingface/transformers/pull/26037 allows 4 bit loading!
from transformers import __version__ as transformers_version
from unsloth.models._utils import TorchAOConfig
from unsloth_zoo.utils import Version
import gc

transformers_version = Version(transformers_version)
SUPPORTS_FOURBIT = transformers_version >= Version("4.37")

LOCAL_RANK_KEYS = ("LOCAL_RANK", "RANK")
WORLD_SIZE_KEYS = ("WORLD_SIZE",)

BAD_MAPPINGS = {
    "unsloth/Qwen3-32B-unsloth-bnb-4bit".lower(): "unsloth/Qwen3-32B-bnb-4bit".lower(),  # 32B dynamic quant is way too big
    "unsloth/Qwen3-30B-A3B-unsloth-bnb-4bit".lower(): "unsloth/Qwen3-30B-A3B".lower(),  # HF loads MoEs too slowly
    "unsloth/Qwen3-30B-A3B-bnb-4bit".lower(): "unsloth/Qwen3-30B-A3B".lower(),  # We rather do it on the fly
    "unsloth/Qwen3-30B-A3B-Base-unsloth-bnb-4bit".lower(): "unsloth/Qwen3-30B-A3B-Base".lower(),  # HF loads MoEs too slowly
    "unsloth/Qwen3-30B-A3B-Base-bnb-4bit".lower(): "unsloth/Qwen3-30B-A3B-Base".lower(),  # We rather do it on the fly
}


def _get_torchao_fp8_config(fp8_mode):
    # Import lazily so an optional, broken vLLM install does not break plain `import unsloth`.
    from unsloth_zoo.vllm_utils import _get_torchao_fp8_config as _impl

    return _impl(fp8_mode)


def _get_env_int(keys):
    for key in keys:
        value = os.environ.get(key)
        if value is None:
            continue
        try:
            return int(value)
        except ValueError:
            continue
    return None


def _infer_distributed_ranks():
    if torch.distributed.is_available() and torch.distributed.is_initialized():
        try:
            return torch.distributed.get_rank(), torch.distributed.get_world_size()
        except Exception:
            pass
    return _get_env_int(LOCAL_RANK_KEYS), _get_env_int(WORLD_SIZE_KEYS)


def is_distributed():
    rank, world_size = _infer_distributed_ranks()
    return (world_size or 1) > 1 or (rank is not None and rank > 0)


def prepare_device_map():
    rank, world_size = _infer_distributed_ranks()
    distributed = (world_size or 1) > 1 or (rank is not None and rank > 0)
    if not distributed:
        return None, False

    local_rank = 0 if rank is None else rank
    device_map = {"": f"{DEVICE_TYPE_TORCH}:{local_rank}"}
    try:
        if DEVICE_TYPE_TORCH == "cuda":
            torch.cuda.set_device(local_rank)
        elif DEVICE_TYPE_TORCH == "xpu" and hasattr(torch, "xpu"):
            torch.xpu.set_device(local_rank)
    except Exception:
        pass
    return device_map, True


def __get_model_name(
    model_name,
    load_in_4bit = True,
    INT_TO_FLOAT_MAPPER = None,
    FLOAT_TO_INT_MAPPER = None,
    MAP_TO_UNSLOTH_16bit = None,
    load_in_fp8 = False,
    FLOAT_TO_FP8_BLOCK_MAPPER = None,
    FLOAT_TO_FP8_ROW_MAPPER = None,
):
    model_name = str(model_name)
    lower_model_name = model_name.lower()

    assert load_in_fp8 in (True, False, "block")
    if load_in_fp8 != False:
        if load_in_fp8 == True and (os.environ.get("UNSLOTH_HAS_FBGEMM", "0") == "1"):
            if lower_model_name in FLOAT_TO_FP8_ROW_MAPPER:
                # Faster row scaling only works if FBGEMM works!
                return FLOAT_TO_FP8_ROW_MAPPER[lower_model_name]
            elif lower_model_name in FLOAT_TO_FP8_BLOCK_MAPPER:
                # Otherwise we use the slower blockwise type
                return FLOAT_TO_FP8_BLOCK_MAPPER[lower_model_name]
        else:
            if lower_model_name in FLOAT_TO_FP8_BLOCK_MAPPER:
                return FLOAT_TO_FP8_BLOCK_MAPPER[lower_model_name]
        # Mapper didn't find a pre-quantized model.
        # For vllm >= 0.12.0, we can quantize the model to FP8 on the fly,
        # so just return the original model name. Older vllm versions will
        # fall through to offline quantization via _offline_quantize_to_fp8.
        if importlib.util.find_spec("vllm") is not None:
            import vllm

            if Version(vllm.__version__) >= Version("0.12.0"):
                return model_name
        return None

    elif not SUPPORTS_FOURBIT and lower_model_name in INT_TO_FLOAT_MAPPER:
        model_name = INT_TO_FLOAT_MAPPER[lower_model_name]
        print(
            f"Unsloth: Your transformers version of {transformers_version} does not support native "
            f"4bit loading.\nThe minimum required version is 4.37.\n"
            f'Try `pip install --upgrade "transformers>=4.37"`\n'
            f"to obtain the latest transformers build, then restart this session.\n"
            f"For now, we shall load `{model_name}` instead (still 4bit, just slower downloading)."
        )
        return model_name

    elif not load_in_4bit and lower_model_name in INT_TO_FLOAT_MAPPER:
        new_model_name = INT_TO_FLOAT_MAPPER[lower_model_name]
        # logger.warning_once(
        #     f"Unsloth: You passed in `{model_name}` which is a 4bit model, yet you set\n"\
        #     f"`load_in_4bit = False`. We shall load `{new_model_name}` instead."
        # )
        return new_model_name

    elif not load_in_4bit and lower_model_name in MAP_TO_UNSLOTH_16bit:
        new_model_name = MAP_TO_UNSLOTH_16bit[lower_model_name]
        return new_model_name

    elif load_in_4bit and SUPPORTS_FOURBIT and lower_model_name in FLOAT_TO_INT_MAPPER:
        # Support returning original full -bnb-4bit name if specified specifically
        # since we'll map it to the dynamic version instead
        if lower_model_name.endswith("-bnb-4bit"):
            return lower_model_name

        new_model_name = FLOAT_TO_INT_MAPPER[lower_model_name]
        # logger.warning_once(
        #     f"Unsloth: You passed in `{model_name}` and `load_in_4bit = True`.\n"\
        #     f"We shall load `{new_model_name}` for 4x faster loading."
        # )
        return new_model_name

    return None


def _get_new_mapper():
    try:
        import requests

        new_mapper = "https://raw.githubusercontent.com/unslothai/unsloth/main/unsloth/models/mapper.py"
        with requests.get(new_mapper, timeout = 3) as new_mapper:
            new_mapper = new_mapper.text
        new_mapper = new_mapper[new_mapper.find("__INT_TO_FLOAT_MAPPER") :]
        new_mapper = (
            new_mapper.replace("INT_TO_FLOAT_MAPPER", "NEW_INT_TO_FLOAT_MAPPER")
            .replace("FLOAT_TO_INT_MAPPER", "NEW_FLOAT_TO_INT_MAPPER")
            .replace("MAP_TO_UNSLOTH_16bit", "NEW_MAP_TO_UNSLOTH_16bit")
        )

        exec(new_mapper, globals())
        return (
            NEW_INT_TO_FLOAT_MAPPER,
            NEW_FLOAT_TO_INT_MAPPER,
            NEW_MAP_TO_UNSLOTH_16bit,
        )
    except:
        return {}, {}, {}


def _resolve_with_mappers(
    model_name,
    load_in_4bit,
    load_in_fp8,
    int_to_float,
    float_to_int,
    map_to_unsloth_16bit,
):
    return __get_model_name(
        model_name = model_name,
        load_in_4bit = load_in_4bit,
        INT_TO_FLOAT_MAPPER = int_to_float,
        FLOAT_TO_INT_MAPPER = float_to_int,
        MAP_TO_UNSLOTH_16bit = map_to_unsloth_16bit,
        load_in_fp8 = load_in_fp8,
        FLOAT_TO_FP8_BLOCK_MAPPER = FLOAT_TO_FP8_BLOCK_MAPPER,
        FLOAT_TO_FP8_ROW_MAPPER = FLOAT_TO_FP8_ROW_MAPPER,
    )


def get_model_name(
    model_name,
    load_in_4bit = True,
    load_in_fp8 = False,
    token = None,
    trust_remote_code = False,
):
    assert load_in_fp8 in (True, False, "block")
    new_model_name = _resolve_with_mappers(
        model_name = model_name,
        load_in_4bit = load_in_4bit,
        load_in_fp8 = load_in_fp8,
        int_to_float = INT_TO_FLOAT_MAPPER,
        float_to_int = FLOAT_TO_INT_MAPPER,
        map_to_unsloth_16bit = MAP_TO_UNSLOTH_16bit,
    )
    # In the rare case, we convert bad model names to other names
    # For eg too large dynamic quants or MoEs
    if (
        new_model_name is not None
        and type(new_model_name) is str
        and new_model_name.lower() in BAD_MAPPINGS
    ):
        new_model_name = BAD_MAPPINGS[new_model_name.lower()]

    if (
        new_model_name is None
        and model_name.count("/") == 1
        and model_name[0].isalnum()
    ):
        # Try checking if a new Unsloth version allows it!
        NEW_INT_TO_FLOAT_MAPPER, NEW_FLOAT_TO_INT_MAPPER, NEW_MAP_TO_UNSLOTH_16bit = (
            _get_new_mapper()
        )
        upgraded_model_name = _resolve_with_mappers(
            model_name = model_name,
            load_in_4bit = load_in_4bit,
            load_in_fp8 = load_in_fp8,
            int_to_float = NEW_INT_TO_FLOAT_MAPPER,
            float_to_int = NEW_FLOAT_TO_INT_MAPPER,
            map_to_unsloth_16bit = NEW_MAP_TO_UNSLOTH_16bit,
        )
        if upgraded_model_name is not None:
            raise NotImplementedError(
                f"Unsloth: {model_name} is not supported in your current Unsloth version! Please update Unsloth via:\n\n"
                "pip uninstall unsloth unsloth_zoo -y\n"
                'pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"\n'
                'pip install --upgrade --no-cache-dir "git+https://github.com/unslothai/unsloth-zoo.git"\n'
            )

    if new_model_name is None:
        new_model_name = model_name

    return new_model_name


def _offline_quantize_to_fp8(model_name: str, fp8_mode: str) -> str:
    """
    Quantizes the model to fp8 using torchao and saving the quantized model to a
    temporary location. Return the path to the quantized model.

    Note: For vllm >= 0.12.0, we should dynamically quantize the model in vllm instead:

      llm = LLM(
        ...
        hf_overrides={"quantization_config_file": "torchao_config.json"},
      )
    """
    temp_dir = tempfile.gettempdir()
    new_model_name = model_name.split("/")[-1] + "-fp8-" + fp8_mode
    new_model_name = os.path.join(temp_dir, new_model_name)
    print(
        f"Unsloth: Quantizing '{model_name}' to fp8, using model_name='{new_model_name}' instead"
    )

    if not os.path.isdir(new_model_name):
        from transformers import (
            AutoModelForCausalLM,
            AutoModelForImageTextToText,
            AutoTokenizer,
            AutoProcessor,
            TorchAoConfig,
            AutoConfig,
        )

        qconfig = _get_torchao_fp8_config(fp8_mode)
        qconfig = TorchAoConfig(qconfig)
        config = AutoConfig.from_pretrained(model_name)
        is_vlm = any(
            x.endswith(("ForConditionalGeneration", "ForVisionText2Text"))
            for x in config.architectures
        )
        is_vlm = is_vlm or hasattr(config, "vision_config")
        auto_model = AutoModelForImageTextToText if is_vlm else AutoModelForCausalLM
        auto_processor = AutoProcessor if is_vlm else AutoTokenizer
        model = auto_model.from_pretrained(
            model_name,
            torch_dtype = "auto",
            device_map = "auto",
            quantization_config = qconfig,
        )
        tokenizer = auto_processor.from_pretrained(model_name)
        model.save_pretrained(new_model_name, safe_serialization = False)
        del model
        for _ in range(2):
            torch.cuda.empty_cache()
            gc.collect()
        tokenizer.save_pretrained(new_model_name)
    return new_model_name


def _tag_model_with_fp8_torchao_config(model: torch.nn.Module, fp8_mode: str):
    """
    Tag a model with a `TorchAOConfig` so downstream callers will know what to do with it.
    """
    try:
        base_config = _get_torchao_fp8_config(fp8_mode)
        model.torchao_config = TorchAOConfig(
            qat_scheme = None,
            base_config_and_filter_fns = [(base_config, None)],
        )
    except:
        pass


def _get_fp8_mode_and_check_settings(
    load_in_fp8: Union[bool, str],
    fast_inference: bool,
    full_finetuning: bool = False,
    load_in_4bit: bool = False,
    load_in_8bit: bool = False,
    load_in_16bit: bool = False,
) -> str:
    """
    Assuming `load_in_fp8` is enabled, raise appropriate errors on incompatible settings
    and environment. Currently this feature requires:

    1. H100 GPUs or after
    2. torchao 0.15.0+ (or nightly)
    3. torch 2.9.0+
    4. If fbgemm_gpu_genai is installed, require 1.4.1+

    Returns the fp8 mode, one of "row" or "block".
    """
    assert load_in_fp8 is not False
    if load_in_fp8 is True:
        fp8_mode = "row"  # default
    else:
        fp8_mode = load_in_fp8

    # Check user settings
    if fp8_mode not in ["row", "block"]:
        raise ValueError(
            f"Unsloth: `load_in_fp8` can only be 'row' or 'block', got '{fp8_mode}'"
        )
    if full_finetuning:
        raise ValueError(
            "Unsloth: `load_in_fp8` is not compatible with full finetuning"
        )
    if load_in_4bit or load_in_8bit or load_in_16bit:
        raise ValueError(
            "Unsloth: `load_in_fp8` is not compatible with `load_in_4bit`, `load_in_8bit` or `load_in_16bit`",
        )

    # Check if this is Hopper or above
    if not (
        torch.cuda.is_available()
        and torch.version.cuda
        and torch.cuda.get_device_capability() >= (9, 0)
    ):
        raise ValueError(
            "Unsloth: On the fly `load_in_fp8` requires H100 GPUs or after. Try `unsloth/Qwen3-8B` instead."
        )

    # Check if torch >= 2.9.0
    if Version(torch.__version__) < Version("2.9.0"):
        raise ValueError(
            "Unsloth: On the fly `load_in_fp8` requires torch 2.9.0+. Try `unsloth/Qwen3-8B` instead."
        )

    # Check if torchao has this PR: https://github.com/pytorch/ao/pull/3158,
    # which will be released in 0.15.0.
    if importlib.util.find_spec("torchao") is None:
        raise ValueError(
            "Unsloth: Please install torchao for on the fly float8 to work! Try `unsloth/Qwen3-8B` instead."
        )
    import torchao

    error_message = (
        "Unsloth: `load_in_fp8` requires torchao 0.15.0+ (or nightly).\n"
        f"You have torchao version={torchao.__version__}\n"
        "Use `pip install --upgrade --force-reinstall torchao`"
    )
    if Version(torchao.__version__) < Version("0.15.0"):
        raise ValueError(error_message)

    # If fbgemm_gpu_genai is installed and old, disable FBGEMM and use Triton instead
    if (
        importlib.util.find_spec("fbgemm_gpu") is not None
        and importlib.util.find_spec("fbgemm_gpu.experimental") is not None
    ):
        import fbgemm_gpu.experimental.gen_ai

        if Version(fbgemm_gpu.__version__) < Version("1.4.1"):
            # Old FBGEMM version - disable and use Triton kernels instead
            os.environ["UNSLOTH_HAS_FBGEMM"] = "0"
            from unsloth_zoo.log import logger

            logger.info(
                f"Unsloth: fbgemm_gpu_genai=={fbgemm_gpu.__version__} is old for FP8 loading. "
                f"Using Triton kernels instead."
            )
    return fp8_mode


================================================
FILE: unsloth/models/mapper.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = [
    "INT_TO_FLOAT_MAPPER",
    "FLOAT_TO_INT_MAPPER",
    "MAP_TO_UNSLOTH_16bit",
    "FLOAT_TO_FP8_BLOCK_MAPPER",
    "FLOAT_TO_FP8_ROW_MAPPER",
]

__INT_TO_FLOAT_MAPPER = \
{
    "unsloth/mistral-7b-bnb-4bit" : (
        "unsloth/mistral-7b",
        "mistralai/Mistral-7B-v0.1",
    ),
    "unsloth/llama-2-7b-bnb-4bit" : (
        "unsloth/llama-2-7b",
        "meta-llama/Llama-2-7b-hf",
    ),
    "unsloth/llama-2-13b-bnb-4bit" : (
        "unsloth/llama-2-13b",
        "meta-llama/Llama-2-13b-hf",
    ),
    "unsloth/codellama-34b-bnb-4bit" : (
        "codellama/CodeLlama-34b-hf",
    ),
    "unsloth/zephyr-sft-bnb-4bit" : (
        "unsloth/zephyr-sft",
        "HuggingFaceH4/mistral-7b-sft-beta",
    ),
    "unsloth/tinyllama-bnb-4bit" : (
        "unsloth/tinyllama",
        "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T",
    ),
    "unsloth/tinyllama-chat-bnb-4bit" : (
        "unsloth/tinyllama-chat",
        "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    ),
    "unsloth/mistral-7b-instruct-v0.1-bnb-4bit" : (
        "unsloth/mistral-7b-instruct-v0.1",
        "mistralai/Mistral-7B-Instruct-v0.1",
    ),
    "unsloth/mistral-7b-instruct-v0.2-bnb-4bit" : (
        "unsloth/mistral-7b-instruct-v0.2",
        "mistralai/Mistral-7B-Instruct-v0.2",
    ),
    "unsloth/llama-2-7b-chat-bnb-4bit" : (
        "unsloth/llama-2-7b-chat",
        "meta-llama/Llama-2-7b-chat-hf",
    ),
    "unsloth/llama-2-7b-chat-bnb-4bit" : (
        "unsloth/llama-2-7b-chat",
        "meta-llama/Llama-2-7b-chat-hf",
    ),
    "unsloth/Mixtral-8x7B-v0.1-unsloth-bnb-4bit" : (
        "unsloth/Mixtral-8x7B-v0.1",
        "mistralai/Mixtral-8x7B-v0.1",
        "unsloth/Mixtral-8x7B-v0.1-bnb-4bit",
    ),
    "unsloth/Mixtral-8x7B-Instruct-v0.1-unsloth-bnb-4bit" : (
        "unsloth/Mixtral-8x7B-Instruct-v0.1",
        "mistralai/Mixtral-8x7B-Instruct-v0.1",
        "unsloth/Mixtral-8x7B-Instruct-v0.1-bnb-4bit",
    ),
    "unsloth/codellama-7b-bnb-4bit" : (
        "unsloth/codellama-7b",
        "codellama/CodeLlama-7b-hf",
    ),
    "unsloth/codellama-13b-bnb-4bit" : (
        "codellama/CodeLlama-13b-hf",
    ),
    "unsloth/yi-6b-bnb-4bit" : (
        "unsloth/yi-6b",
        "01-ai/Yi-6B",
    ),
    "unsloth/solar-10.7b-bnb-4bit" : (
        "upstage/SOLAR-10.7B-v1.0",
    ),
    "unsloth/gemma-7b-bnb-4bit" : (
        "unsloth/gemma-7b",
        "google/gemma-7b",
    ),
    "unsloth/gemma-2b-bnb-4bit" : (
        "unsloth/gemma-2b",
        "google/gemma-2b",
    ),
    "unsloth/gemma-7b-it-bnb-4bit" : (
        "unsloth/gemma-7b-it",
        "google/gemma-7b-it",
    ),
    "unsloth/gemma-2b-bnb-4bit" : (
        "unsloth/gemma-2b-it",
        "google/gemma-2b-it",
    ),
    "unsloth/mistral-7b-v0.2-bnb-4bit" : (
        "unsloth/mistral-7b-v0.2",
        "alpindale/Mistral-7B-v0.2-hf",
    ),
    "unsloth/gemma-1.1-2b-it-bnb-4bit" : (
        "unsloth/gemma-1.1-2b-it",
        "google/gemma-1.1-2b-it",
    ),
    "unsloth/gemma-1.1-7b-it-bnb-4bit" : (
        "unsloth/gemma-1.1-7b-it",
        "google/gemma-1.1-7b-it",
    ),
    "unsloth/Starling-LM-7B-beta" : (
        "unsloth/Starling-LM-7B-beta",
        "Nexusflow/Starling-LM-7B-beta",
    ),
    "unsloth/Hermes-2-Pro-Mistral-7B-bnb-4bit" : (
        "unsloth/Hermes-2-Pro-Mistral-7B",
        "NousResearch/Hermes-2-Pro-Mistral-7B",
    ),
    "unsloth/OpenHermes-2.5-Mistral-7B-bnb-4bit" : (
        "unsloth/OpenHermes-2.5-Mistral-7B",
        "teknium/OpenHermes-2.5-Mistral-7B",
    ),
    "unsloth/codegemma-2b-bnb-4bit" : (
        "unsloth/codegemma-2b",
        "google/codegemma-2b",
    ),
    "unsloth/codegemma-7b-bnb-4bit" : (
        "unsloth/codegemma-7b",
        "google/codegemma-7b",
    ),
    "unsloth/codegemma-7b-it-bnb-4bit" : (
        "unsloth/codegemma-7b-it",
        "google/codegemma-7b-it",
    ),
    "unsloth/llama-3-8b-bnb-4bit" : (
        "unsloth/llama-3-8b",
        "meta-llama/Meta-Llama-3-8B",
    ),
    "unsloth/llama-3-8b-Instruct-bnb-4bit" : (
        "unsloth/llama-3-8b-Instruct",
        "meta-llama/Meta-Llama-3-8B-Instruct",
    ),
    "unsloth/llama-3-70b-bnb-4bit" : (
        "meta-llama/Meta-Llama-3-70B",
    ),
    "unsloth/llama-3-70b-Instruct-bnb-4bit" : (
        "meta-llama/Meta-Llama-3-70B-Instruct",
    ),
    "unsloth/Phi-3-mini-4k-instruct-bnb-4bit" : (
        "unsloth/Phi-3-mini-4k-instruct",
        "microsoft/Phi-3-mini-4k-instruct",
    ),
    "unsloth/mistral-7b-v0.3-bnb-4bit" : (
        "unsloth/mistral-7b-v0.3",
        "mistralai/Mistral-7B-v0.3",
    ),
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit" : (
        "unsloth/mistral-7b-instruct-v0.3",
        "mistralai/Mistral-7B-Instruct-v0.3",
    ),
    "unsloth/Phi-3-medium-4k-instruct-bnb-4bit" : (
        "unsloth/Phi-3-medium-4k-instruct",
        "microsoft/Phi-3-medium-4k-instruct",
    ),
    "unsloth/Qwen2-0.5B-bnb-4bit" : (
        "unsloth/Qwen2-0.5B",
        "Qwen/Qwen2-0.5B",
    ),
    "unsloth/Qwen2-0.5B-Instruct-bnb-4bit" : (
        "unsloth/Qwen2-0.5B-Instruct",
        "Qwen/Qwen2-0.5B-Instruct",
    ),
    "unsloth/Qwen2-1.5B-bnb-4bit" : (
        "unsloth/Qwen2-1.5B",
        "Qwen/Qwen2-1.5B",
    ),
    "unsloth/Qwen2-1.5B-Instruct-bnb-4bit" : (
        "unsloth/Qwen2-1.5B-Instruct",
        "Qwen/Qwen2-1.5B-Instruct",
    ),
    "unsloth/Qwen2-7B-bnb-4bit" : (
        "unsloth/Qwen2-7B",
        "Qwen/Qwen2-7B",
    ),
    "unsloth/Qwen2-7B-Instruct-bnb-4bit" : (
        "unsloth/Qwen2-7B-Instruct",
        "Qwen/Qwen2-7B-Instruct",
    ),
    "unsloth/Qwen2-70B-bnb-4bit" : (
        "Qwen/Qwen2-70B",
    ),
    "unsloth/Qwen2-70B-Instruct-bnb-4bit" : (
        "Qwen/Qwen2-70B-Instruct",
    ),
    "mistralai/Codestral-22B-v0.1" : (
        "mistral-community/Codestral-22B-v0.1",
    ),
    "unsloth/gemma-2-9b-bnb-4bit" : (
        "unsloth/gemma-2-9b",
        "google/gemma-2-9b",
    ),
    "unsloth/gemma-2-27b-bnb-4bit" : (
        "unsloth/gemma-2-27b",
        "google/gemma-2-27b",
    ),
    "unsloth/gemma-2-9b-it-bnb-4bit" : (
        "unsloth/gemma-2-9b-it",
        "google/gemma-2-9b-it",
    ),
    "unsloth/gemma-2-27b-it-bnb-4bit" : (
        "unsloth/gemma-2-27b-it",
        "google/gemma-2-27b-it",
    ),
    "unsloth/Phi-3-mini-4k-instruct-v0-bnb-4bit" : ( # Old Phi pre July
        "unsloth/Phi-3-mini-4k-instruct-v0",
    ),
    "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit" : ( # New 12b Mistral models
        "unsloth/Mistral-Nemo-Instruct-2407",
        "mistralai/Mistral-Nemo-Instruct-2407",
    ),
    "unsloth/Mistral-Nemo-Base-2407-bnb-4bit" : ( # New 12b Mistral models
        "unsloth/Mistral-Nemo-Base-2407",
        "mistralai/Mistral-Nemo-Base-2407",
    ),
    "unsloth/Meta-Llama-3.1-8B-unsloth-bnb-4bit" : (
        "unsloth/Meta-Llama-3.1-8B",
        "meta-llama/Meta-Llama-3.1-8B",
        "unsloth/Meta-Llama-3.1-8B-bnb-4bit",
    ),
    "unsloth/Meta-Llama-3.1-8B-Instruct-unsloth-bnb-4bit" : {
        "8" : (
            "RedHatAI/Llama-3.1-8B-Instruct-FP8",
            "unsloth/Llama-3.1-8B-Instruct-FP8-Block",
            "unsloth/Llama-3.1-8B-Instruct-FP8-Dynamic",
        ),
        "16" : (
            "unsloth/Meta-Llama-3.1-8B-Instruct",
            "meta-llama/Meta-Llama-3.1-8B-Instruct",
            "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
        ),
    },
    "unsloth/Llama-3.1-8B-unsloth-bnb-4bit" : (
        "unsloth/Llama-3.1-8B",
        "meta-llama/Llama-3.1-8B",
        "unsloth/Llama-3.1-8B-bnb-4bit",
    ),
    "unsloth/Llama-3.1-8B-Instruct-unsloth-bnb-4bit" : {
        "8" : (
            "RedHatAI/Llama-3.1-8B-Instruct-FP8",
            "unsloth/Llama-3.1-8B-Instruct-FP8-Block",
            "unsloth/Llama-3.1-8B-Instruct-FP8-Dynamic",
        ),
        "16" : (
            "unsloth/Llama-3.1-8B-Instruct",
            "meta-llama/Llama-3.1-8B-Instruct",
            "unsloth/Llama-3.1-8B-Instruct-bnb-4bit",
        ),
    },
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit" : (
        "unsloth/Meta-Llama-3.1-70B",
        "meta-llama/Meta-Llama-3.1-70B",
    ),
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit" : (
        "meta-llama/Meta-Llama-3.1-405B",
    ),
    "unsloth/Meta-Llama-3.1-405B-Instruct-bnb-4bit" : (
        "meta-llama/Meta-Llama-3.1-405B-Instruct",
    ),
    "unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit" : (
        "unsloth/Meta-Llama-3.1-70B-Instruct",
        "meta-llama/Meta-Llama-3.1-70B-Instruct",
    ),
    "unsloth/Mistral-Large-Instruct-2407-bnb-4bit" : (
        "mistralai/Mistral-Large-Instruct-2407",
    ),
    "unsloth/gemma-2-2b-bnb-4bit" : (
        "unsloth/gemma-2-2b",
        "google/gemma-2-2b",
    ),
    "unsloth/gemma-2-2b-it-bnb-4bit" : (
        "unsloth/gemma-2-2b-it",
        "google/gemma-2-2b-it",
    ),
    "unsloth/Phi-3.5-mini-instruct-bnb-4bit" : (
        "unsloth/Phi-3.5-mini-instruct",
        "microsoft/Phi-3.5-mini-instruct",
    ),
    "unsloth/c4ai-command-r-08-2024-bnb-4bit" : (
        "CohereForAI/c4ai-command-r-08-2024",
    ),
    "unsloth/c4ai-command-r-plus-08-2024-bnb-4bit" : (
        "CohereForAI/c4ai-command-r-plus-08-2024",
    ),
    "unsloth/Llama-3.1-Storm-8B-bnb-4bit" : (
        "unsloth/Llama-3.1-Storm-8B",
        "akjindal53244/Llama-3.1-Storm-8B",
    ),
    "unsloth/Hermes-3-Llama-3.1-8B-bnb-4bit" : (
        "unsloth/Hermes-3-Llama-3.1-8B",
        "NousResearch/Hermes-3-Llama-3.1-8B",
    ),
    "unsloth/Hermes-3-Llama-3.1-70B-bnb-4bit" : (
        "unsloth/Hermes-3-Llama-3.1-70B",
        "NousResearch/Hermes-3-Llama-3.1-70B",
    ),
    "unsloth/Hermes-3-Llama-3.1-405B-bnb-4bit" : (
        "NousResearch/Hermes-3-Llama-3.1-405B",
    ),
    "unsloth/SmolLM-135M-bnb-4bit" : (
        "unsloth/SmolLM-135M",
        "HuggingFaceTB/SmolLM-135M",
    ),
    "unsloth/SmolLM-360M-bnb-4bit" : (
        "unsloth/SmolLM-360M",
        "HuggingFaceTB/SmolLM-360M",
    ),
    "unsloth/SmolLM-1.7B-bnb-4bit" : (
        "unsloth/SmolLM-1.7B",
        "HuggingFaceTB/SmolLM-1.7B",
    ),
    "unsloth/SmolLM-135M-Instruct-bnb-4bit" : (
        "unsloth/SmolLM-135M-Instruct",
        "HuggingFaceTB/SmolLM-135M-Instruct",
    ),
    "unsloth/SmolLM-360M-Instruct-bnb-4bit" : (
        "unsloth/SmolLM-360M-Instruct",
        "HuggingFaceTB/SmolLM-360M-Instruct",
    ),
    "unsloth/SmolLM-1.7B-Instruct-bnb-4bit" : (
        "unsloth/SmolLM-1.7B-Instruct",
        "HuggingFaceTB/SmolLM-1.7B-Instruct",
    ),
    "unsloth/Mistral-Small-Instruct-2409-bnb-4bit" : (
        "unsloth/Mistral-Small-Instruct-2409",
        "mistralai/Mistral-Small-Instruct-2409",
    ),
    "unsloth/Qwen2.5-0.5B-Instruct-unsloth-bnb-4bit" : (
        "unsloth/Qwen2.5-0.5B-Instruct",
        "Qwen/Qwen2.5-0.5B-Instruct",
        "unsloth/Qwen2.5-0.5B-Instruct-bnb-4bit",
    ),
    "unsloth/Qwen2.5-1.5B-Instruct-unsloth-bnb-4bit" : (
        "unsloth/Qwen2.5-1.5B-Instruct",
        "Qwen/Qwen2.5-1.5B-Instruct",
        "unsloth/Qwen2.5-1.5B-Instruct-bnb-4bit",
    ),
    "unsloth/Qwen2.5-3B-Instruct-unsloth-bnb-4bit" : (
        "unsloth/Qwen2.5-3B-Instruct",
        "Qwen/Qwen2.5-3B-Instruct",
        "unsloth/Qwen2.5-3B-Instruct-bnb-4bit",
    ),
    "unsloth/Qwen2.5-7B-Instruct-unsloth-bnb-4bit" : (
        "unsloth/Qwen2.5-7B-Instruct",
        "Qwen/Qwen2.5-7B-Instruct",
        "unsloth/Qwen2.5-7B-Instruct-bnb-4bit",
    ),
    "unsloth/Qwen2.5-14B-Instruct-unsloth-bnb-4bit" : (
        "unsloth/Qwen2.5-14B-Instruct",
        "Qwen/Qwen2.5-14B-Instruct",
        "unsloth/Qwen2.5-14B-Instruct-bnb-4bit",
    ),
    "unsloth/Qwen2.5-32B-Instruct-bnb-4bit" : (
        "unsloth/Qwen2.5-32B-Instruct",
        "Qwen/Qwen2.5-32B-Instruct",
    ),
    "unsloth/Qwen2.5-72B-Instruct-bnb-4bit" : (
        "unsloth/Qwen2.5-72B-Instruct",
        "Qwen/Qwen2.5-72B-Instruct",
    ),
    "unsloth/Qwen2.5-0.5B-unsloth-bnb-4bit" : (
        "unsloth/Qwen2.5-0.5B",
        "Qwen/Qwen2.5-0.5B",
        "unsloth/Qwen2.5-0.5B-bnb-4bit",
    ),
    "unsloth/Qwen2.5-1.5B-unsloth-bnb-4bit" : (
        "unsloth/Qwen2.5-1.5B",
        "Qwen/Qwen2.5-1.5B",
        "unsloth/Qwen2.5-1.5B-bnb-4bit",
    ),
    "unsloth/Qwen2.5-3B-unsloth-bnb-4bit" : (
        "unsloth/Qwen2.5-3B",
        "Qwen/Qwen2.5-3B",
        "unsloth/Qwen2.5-3B-bnb-4bit",
    ),
    "unsloth/Qwen2.5-7B-unsloth-bnb-4bit" : (
        "unsloth/Qwen2.5-7B",
        "Qwen/Qwen2.5-7B",
        "unsloth/Qwen2.5-7B-bnb-4bit",
    ),
    "unsloth/Qwen2.5-14B-unsloth-bnb-4bit" : (
        "unsloth/Qwen2.5-14B",
        "Qwen/Qwen2.5-14B",
        "unsloth/Qwen2.5-14B-bnb-4bit",
    ),
    "unsloth/Qwen2.5-32B-bnb-4bit" : (
        "unsloth/Qwen2.5-32B",
        "Qwen/Qwen2.5-32B",
    ),
    "unsloth/Qwen2.5-72B-bnb-4bit" : (
        "unsloth/Qwen2.5-72B",
        "Qwen/Qwen2.5-72B",
    ),
    "unsloth/Qwen2.5-Math-1.5B-bnb-4bit" : (
        "unsloth/Qwen2.5-Math-1.5B",
        "Qwen/Qwen2.5-Math-1.5B",
    ),
    "unsloth/Qwen2.5-Math-7B-bnb-4bit" : (
        "unsloth/Qwen2.5-Math-7B",
        "Qwen/Qwen2.5-Math-7B",
    ),
    "unsloth/Qwen2.5-Math-72B-bnb-4bit" : (
        "unsloth/Qwen2.5-Math-72B",
        "Qwen/Qwen2.5-Math-72B",
    ),
    "unsloth/Qwen2.5-Math-1.5B-Instruct-bnb-4bit" : (
        "unsloth/Qwen2.5-Math-1.5B-Instruct",
        "Qwen/Qwen2.5-Math-1.5B-Instruct",
    ),
    "unsloth/Qwen2.5-Math-7B-Instruct-bnb-4bit" : (
        "unsloth/Qwen2.5-Math-7B-Instruct",
        "Qwen/Qwen2.5-Math-7B-Instruct",
    ),
    "unsloth/Qwen2.5-Math-72B-Instruct-bnb-4bit" : (
        "unsloth/Qwen2.5-Math-72B-Instruct",
        "Qwen/Qwen2.5-Math-72B-Instruct",
    ),
    "unsloth/Qwen2.5-Coder-0.5B-bnb-4bit" : (
        "unsloth/Qwen2.5-Coder-0.5B",
        "Qwen/Qwen2.5-Coder-0.5B",
    ),
    "unsloth/Qwen2.5-Coder-1.5B-bnb-4bit" : (
        "unsloth/Qwen2.5-Coder-1.5B",
        "Qwen/Qwen2.5-Coder-1.5B",
    ),
    "unsloth/Qwen2.5-Coder-3B-bnb-4bit" : (
        "unsloth/Qwen2.5-Coder-3B",
        "Qwen/Qwen2.5-Coder-3B",
    ),
    "unsloth/Qwen2.5-Coder-7B-bnb-4bit" : (
        "unsloth/Qwen2.5-Coder-7B",
        "Qwen/Qwen2.5-Coder-7B",
    ),
    "unsloth/Qwen2.5-Coder-14B-bnb-4bit" : (
        "unsloth/Qwen2.5-Coder-14B",
        "Qwen/Qwen2.5-Coder-14B",
    ),
    "unsloth/Qwen2.5-Coder-32B-bnb-4bit" : (
        "unsloth/Qwen2.5-Coder-32B",
        "Qwen/Qwen2.5-Coder-32B",
    ),
    "unsloth/Qwen2.5-Coder-0.5B-Instruct-bnb-4bit" : (
        "unsloth/Qwen2.5-Coder-0.5B-Instruct",
        "Qwen/Qwen2.5-Coder-0.5B-Instruct",
    ),
    "unsloth/Qwen2.5-Coder-1.5B-Instruct-bnb-4bit" : (
        "unsloth/Qwen2.5-Coder-1.5B-Instruct",
        "Qwen/Qwen2.5-Coder-1.5B-Instruct",
    ),
    "unsloth/Qwen2.5-Coder-3B-Instruct-bnb-4bit" : (
        "unsloth/Qwen2.5-Coder-3B-Instruct",
        "Qwen/Qwen2.5-Coder-3B-Instruct",
    ),
    "unsloth/Qwen2.5-Coder-7B-Instruct-bnb-4bit" : (
        "unsloth/Qwen2.5-Coder-7B-Instruct",
        "Qwen/Qwen2.5-Coder-7B-Instruct",
    ),
    "unsloth/Qwen2.5-Coder-14B-Instruct-bnb-4bit" : (
        "unsloth/Qwen2.5-Coder-14B-Instruct",
        "Qwen/Qwen2.5-Coder-14B-Instruct",
    ),
    "unsloth/Qwen2.5-Coder-32B-Instruct-bnb-4bit" : (
        "unsloth/Qwen2.5-Coder-32B-Instruct",
        "Qwen/Qwen2.5-Coder-32B-Instruct",
    ),
    "unsloth/Llama-3.2-1B-unsloth-bnb-4bit" : (
        "unsloth/Llama-3.2-1B",
        "meta-llama/Llama-3.2-1B",
        "unsloth/Llama-3.2-1B-bnb-4bit",
    ),
    "unsloth/Llama-3.2-3B-unsloth-bnb-4bit" : (
        "unsloth/Llama-3.2-3B",
        "meta-llama/Llama-3.2-3B",
        "unsloth/Llama-3.2-3B-bnb-4bit",
    ),
    "unsloth/Llama-3.2-1B-Instruct-unsloth-bnb-4bit" : {
        "8": (
            "RedHatAI/Llama-3.2-1B-Instruct-FP8",
            "unsloth/Llama-3.2-1B-Instruct-FP8-Block",
            "unsloth/Llama-3.2-1B-Instruct-FP8-Dynamic",
        ),
        "16" : (
            "unsloth/Llama-3.2-1B-Instruct",
            "meta-llama/Llama-3.2-1B-Instruct",
            "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
        ),
    },
    "unsloth/Llama-3.2-3B-Instruct-unsloth-bnb-4bit" : {
        "8": (
            "RedHatAI/Llama-3.2-3B-Instruct-FP8",
            "unsloth/Llama-3.2-3B-Instruct-FP8-Block",
            "unsloth/Llama-3.2-3B-Instruct-FP8-Dynamic",
        ),
        "16" : (
            "unsloth/Llama-3.2-3B-Instruct",
            "meta-llama/Llama-3.2-3B-Instruct",
            "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
        ),
    },
    "unsloth/Llama-3.1-Nemotron-70B-Instruct-bnb-4bit" : (
        "unsloth/Llama-3.1-Nemotron-70B-Instruct",
        "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
    ),
    "unsloth/Qwen2-VL-2B-Instruct-unsloth-bnb-4bit" : (
        "unsloth/Qwen2-VL-2B-Instruct",
        "Qwen/Qwen2-VL-2B-Instruct",
        "unsloth/Qwen2-VL-2B-Instruct-bnb-4bit",
    ),
    "unsloth/Qwen2-VL-7B-Instruct-unsloth-bnb-4bit" : (
        "unsloth/Qwen2-VL-7B-Instruct",
        "Qwen/Qwen2-VL-7B-Instruct",
        "unsloth/Qwen2-VL-7B-Instruct-bnb-4bit",
    ),
    "unsloth/Qwen2-VL-72B-Instruct-bnb-4bit" : (
        "unsloth/Qwen2-VL-72B-Instruct",
        "Qwen/Qwen2-VL-72B-Instruct",
    ),
    "unsloth/Qwen2-VL-2B-bnb-4bit" : (
        "unsloth/Qwen2-VL-2B",
        "Qwen/Qwen2-VL-2B",
    ),
    "unsloth/Qwen2-VL-7B-bnb-4bit" : (
        "unsloth/Qwen2-VL-7B",
        "Qwen/Qwen2-VL-7B",
    ),
    "unsloth/Qwen2-VL-72B-bnb-4bit" : (
        "unsloth/Qwen2-VL-72B",
        "Qwen/Qwen2-VL-72B",
    ),
    "unsloth/Llama-3.2-11B-Vision-Instruct-unsloth-bnb-4bit" : (
        "unsloth/Llama-3.2-11B-Vision-Instruct",
        "meta-llama/Llama-3.2-11B-Vision-Instruct",
        "unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit",
    ),
    "unsloth/Llama-3.2-90B-Vision-Instruct-bnb-4bit" : (
        "unsloth/Llama-3.2-90B-Vision-Instruct",
        "meta-llama/Llama-3.2-90B-Vision-Instruct",
    ),
    "unsloth/Llama-3.2-11B-Vision-unsloth-bnb-4bit" : (
        "unsloth/Llama-3.2-11B-Vision",
        "meta-llama/Llama-3.2-11B-Vision",
        "unsloth/Llama-3.2-11B-Vision-bnb-4bit",
    ),
    "unsloth/Llama-3.2-90B-Vision-bnb-4bit" : (
        "unsloth/Llama-3.2-90B-Vision",
        "meta-llama/Llama-3.2-90B-Vision",
    ),
    "unsloth/Pixtral-12B-2409-unsloth-bnb-4bit" : (
        "unsloth/Pixtral-12B-2409",
        "mistralai/Pixtral-12B-2409",
        "unsloth/Pixtral-12B-2409-bnb-4bit",
    ),
    "unsloth/Pixtral-12B-2409-Base-bnb-4bit" : (
        "unsloth/Pixtral-12B-Base-2409",
        "mistralai/Pixtral-12B-Base-2409",
    ),
    "unsloth/llava-1.5-7b-hf-bnb-4bit" : (
        "unsloth/llava-1.5-7b-hf",
        "llava-hf/llava-1.5-7b-hf",
    ),
    "unsloth/llava-v1.6-mistral-7b-hf-bnb-4bit" : (
        "unsloth/llava-v1.6-mistral-7b-hf",
        "llava-hf/llava-v1.6-mistral-7b-hf",
    ),
    "unsloth/Llama-3.1-Tulu-3-8B-bnb-4bit" : (
        "unsloth/Llama-3.1-Tulu-3-8B",
        "allenai/Llama-3.1-Tulu-3-8B",
    ),
    "unsloth/Llama-3.1-Tulu-3-70B-bnb-4bit" : (
        "unsloth/Llama-3.1-Tulu-3-70B",
        "allenai/Llama-3.1-Tulu-3-70B",
    ),
    "unsloth/QwQ-32B-Preview-bnb-4bit" : (
        "unsloth/QwQ-32B-Preview",
        "Qwen/QwQ-32B-Preview",
    ),
    "unsloth/Llama-3.3-70B-Instruct-unsloth-bnb-4bit" : {
        "8" : (
            "RedHatAI/Llama-3.3-70B-Instruct-FP8",
            "unsloth/Llama-3.3-70B-Instruct-FP8-Block",
            "unsloth/Llama-3.3-70B-Instruct-FP8-Dynamic",
        ),
        "16" : (
            "unsloth/Llama-3.3-70B-Instruct",
            "meta-llama/Llama-3.3-70B-Instruct",
            "unsloth/Llama-3.3-70B-Instruct-bnb-4bit",
        ),
    },
    "unsloth/phi-4-unsloth-bnb-4bit" : (
        "unsloth/phi-4",
        "microsoft/phi-4",
        "unsloth/phi-4-bnb-4bit",
    ),
    "unsloth/DeepSeek-R1-Distill-Qwen-32B-bnb-4bit" : (
        "unsloth/DeepSeek-R1-Distill-Qwen-32B",
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
    ),
    "unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit" : (
        "unsloth/DeepSeek-R1-Distill-Qwen-14B",
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
        "unsloth/DeepSeek-R1-Distill-Qwen-14B-bnb-4bit",
    ),
    "unsloth/DeepSeek-R1-Distill-Qwen-7B-unsloth-bnb-4bit" : (
        "unsloth/DeepSeek-R1-Distill-Qwen-7B",
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
        "unsloth/DeepSeek-R1-Distill-Qwen-7B-bnb-4bit",
    ),
    "unsloth/DeepSeek-R1-Distill-Qwen-1.5B-unsloth-bnb-4bit" : (
        "unsloth/DeepSeek-R1-Distill-Qwen-1.5B",
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        "unsloth/DeepSeek-R1-Distill-Qwen-1.5B-bnb-4bit",
    ),
    "unsloth/DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit" : (
        "unsloth/DeepSeek-R1-Distill-Llama-8B",
        "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
        "unsloth/DeepSeek-R1-Distill-Llama-8B-bnb-4bit",
    ),
    "unsloth/DeepSeek-R1-Distill-Llama-70B-bnb-4bit" : (
        "unsloth/DeepSeek-R1-Distill-Llama-70B",
        "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
    ),
    "unsloth/Mistral-Small-24B-Base-2501-unsloth-bnb-4bit" : (
        "unsloth/Mistral-Small-24B-Base-2501",
        "mistralai/Mistral-Small-24B-Base-2501",
        "unsloth/Mistral-Small-24B-Base-2501-bnb-4bit",
    ),
    "unsloth/Mistral-Small-24B-Instruct-2501-unsloth-bnb-4bit" : (
        "unsloth/Mistral-Small-24B-Instruct-2501",
        "mistralai/Mistral-Small-24B-Instruct-2501",
        "unsloth/Mistral-Small-24B-Instruct-2501-bnb-4bit",
    ),
    "unsloth/Qwen2.5-VL-3B-Instruct-unsloth-bnb-4bit" : (
        "unsloth/Qwen2.5-VL-3B-Instruct",
        "Qwen/Qwen2.5-VL-3B-Instruct",
        "unsloth/Qwen2.5-VL-3B-Instruct-bnb-4bit",
    ),
    "unsloth/Qwen2.5-VL-7B-Instruct-unsloth-bnb-4bit" : (
        "unsloth/Qwen2.5-VL-7B-Instruct",
        "Qwen/Qwen2.5-VL-7B-Instruct",
        "unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit",
    ),
    "unsloth/Qwen2.5-VL-32B-Instruct-unsloth-bnb-4bit" : (
        "unsloth/Qwen2.5-VL-32B-Instruct",
        "Qwen/Qwen2.5-VL-32B-Instruct",
        "unsloth/Qwen2.5-VL-32B-Instruct-bnb-4bit",
    ),
    "unsloth/Qwen2.5-VL-72B-Instruct-unsloth-bnb-4bit" : (
        "unsloth/Qwen2.5-VL-72B-Instruct",
        "Qwen/Qwen2.5-VL-72B-Instruct",
        "unsloth/Qwen2.5-VL-72B-Instruct-bnb-4bit",
    ),
    "unsloth/DeepScaleR-1.5B-Preview-unsloth-bnb-4bit" : (
        "unsloth/DeepHermes-3-Llama-3-8B-Preview",
        "agentica-org/DeepScaleR-1.5B-Preview",
        "unsloth/DeepScaleR-1.5B-Preview-bnb-4bit",
    ),
    "unsloth/OpenThinker-7B-unsloth-bnb-4bit" : (
        "unsloth/OpenThinker-7B",
        "open-thoughts/OpenThinker-7B",
        "unsloth/OpenThinker-7B-bnb-4bit",
    ),
    "unsloth/granite-3.2-2b-instruct-unsloth-bnb-4bit" : (
        "unsloth/granite-3.2-2b-instruct",
        "ibm-granite/granite-3.2-2b-instruct",
        "unsloth/granite-3.2-2b-instruct-bnb-4bit",
    ),
    "unsloth/granite-3.2-8b-instruct-unsloth-bnb-4bit" : (
        "unsloth/granite-3.2-8b-instruct",
        "ibm-granite/granite-3.2-8b-instruct",
        "unsloth/granite-3.2-8b-instruct-bnb-4bit",
    ),
    "unsloth/QwQ-32B-unsloth-bnb-4bit" : (
        "unsloth/QwQ-32B",
        "Qwen/QwQ-32B",
        "unsloth/QwQ-32B-bnb-4bit",
    ),
    "unsloth/gemma-3-1b-it-unsloth-bnb-4bit" : (
        "unsloth/gemma-3-1b-it",
        "google/gemma-3-1b-it",
        "unsloth/gemma-3-1b-it-bnb-4bit",
    ),
    "unsloth/gemma-3-4b-it-unsloth-bnb-4bit" : (
        "unsloth/gemma-3-4b-it",
        "google/gemma-3-4b-it",
        "unsloth/gemma-3-4b-it-bnb-4bit",
    ),
    "unsloth/gemma-3-12b-it-unsloth-bnb-4bit" : (
        "unsloth/gemma-3-12b-it",
        "google/gemma-3-12b-it",
        "unsloth/gemma-3-12b-it-bnb-4bit",
    ),
    "unsloth/gemma-3-27b-it-unsloth-bnb-4bit" : (
        "unsloth/gemma-3-27b-it",
        "google/gemma-3-27b-it",
        "unsloth/gemma-3-27b-it-bnb-4bit",
    ),
    "unsloth/gemma-3-1b-pt-unsloth-bnb-4bit" : (
        "unsloth/gemma-3-1b-pt",
        "google/gemma-3-1b-pt",
        "unsloth/gemma-3-1b-pt-bnb-4bit",
    ),
    "unsloth/gemma-3-4b-pt-unsloth-bnb-4bit" : (
        "unsloth/gemma-3-4b-pt",
        "google/gemma-3-4b-pt",
        "unsloth/gemma-3-4b-pt-bnb-4bit",
    ),
    "unsloth/gemma-3-12b-pt-unsloth-bnb-4bit" : (
        "unsloth/gemma-3-12b-pt",
        "google/gemma-3-12b-pt",
        "unsloth/gemma-3-12b-pt-bnb-4bit",
    ),
    "unsloth/gemma-3-27b-pt-unsloth-bnb-4bit" : (
        "unsloth/gemma-3-27b-pt",
        "google/gemma-3-27b-pt",
        "unsloth/gemma-3-27b-pt-bnb-4bit",
    ),
    "unsloth/reka-flash-3-unsloth-bnb-4bit" : (
        "unsloth/reka-flash-3",
        "RekaAI/reka-flash-3",
        "unsloth/reka-flash-3-bnb-4bit",
    ),
    "unsloth/c4ai-command-a-03-2025-unsloth-bnb-4bit" : (
        "unsloth/c4ai-command-a-03-2025",
        "CohereForAI/c4ai-command-a-03-2025",
        "unsloth/c4ai-command-a-03-2025-bnb-4bit",
    ),
    "unsloth/aya-vision-32b-unsloth-bnb-4bit" : (
        "unsloth/aya-vision-32b",
        "CohereForAI/aya-vision-32b",
        "unsloth/aya-vision-32b-bnb-4bit",
    ),
    "unsloth/aya-vision-8b-unsloth-bnb-4bit" : (
        "unsloth/aya-vision-8b",
        "CohereForAI/aya-vision-8b",
        "unsloth/aya-vision-8b-bnb-4bit",
    ),
    "unsloth/granite-vision-3.2-2b-unsloth-bnb-4bit" : (
        "unsloth/granite-vision-3.2-2b",
        "ibm-granite/granite-vision-3.2-2b",
        "unsloth/granite-vision-3.2-2b-bnb-4bit",
    ),
    "unsloth/OLMo-2-0325-32B-Instruct-unsloth-bnb-4bit" : (
        "unsloth/OLMo-2-0325-32B-Instruct",
        "allenai/OLMo-2-0325-32B-Instruct",
        "unsloth/OLMo-2-0325-32B-Instruct-bnb-4bit",
    ),
    "unsloth/Mistral-Small-3.1-24B-Instruct-2503-unsloth-bnb-4bit" : (
        "unsloth/Mistral-Small-3.1-24B-Instruct-2503",
        "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
        "unsloth/Mistral-Small-3.1-24B-Instruct-2503-bnb-4bit",
    ),
    "unsloth/Mistral-Small-3.1-24B-Base-2503-unsloth-bnb-4bit" : (
        "unsloth/Mistral-Small-3.1-24B-Base-2503",
        "mistralai/Mistral-Small-3.1-24B-Base-2503",
        "unsloth/Mistral-Small-3.1-24B-Base-2503-bnb-4bit",
    ),
    "unsloth/Qwen3-0.6B-unsloth-bnb-4bit" : {
        "8" : (
            "Qwen/Qwen3-0.6B-FP8",
            "unsloth/Qwen3-0.6B-FP8",
            "unsloth/Qwen3-0.6B-FP8",
        ),
        "16" : (
            "unsloth/Qwen3-0.6B",
            "Qwen/Qwen3-0.6B",
            "unsloth/Qwen3-0.6B-bnb-4bit",
        ),
    },
    "unsloth/Qwen3-1.7B-unsloth-bnb-4bit" : {
        "8" : (
            "Qwen/Qwen3-1.7B-FP8",
            "unsloth/Qwen3-1.7B-FP8",
            "unsloth/Qwen3-1.7B-FP8",
        ),
        "16" : (
            "unsloth/Qwen3-1.7B",
            "Qwen/Qwen3-1.7B",
            "unsloth/Qwen3-1.7B-bnb-4bit",
        ),
    },
    "unsloth/Qwen3-4B-unsloth-bnb-4bit" : {
        "8" : (
            "Qwen/Qwen3-4B-FP8",
            "unsloth/Qwen3-4B-FP8",
            "unsloth/Qwen3-4B-FP8",
        ),
        "16" : (
            "unsloth/Qwen3-4B",
            "Qwen/Qwen3-4B",
            "unsloth/Qwen3-4B-bnb-4bit",
        ),
    },
    "unsloth/Qwen3-8B-unsloth-bnb-4bit" : {
        "8" : (
            "Qwen/Qwen3-8B-FP8",
            "unsloth/Qwen3-8B-FP8",
            "unsloth/Qwen3-8B-FP8",
        ),
        "16" : (
            "unsloth/Qwen3-8B",
            "Qwen/Qwen3-8B",
            "unsloth/Qwen3-8B-bnb-4bit",
        ),
    },
    "unsloth/Qwen3-14B-unsloth-bnb-4bit" : {
        "8" : (
            "Qwen/Qwen3-14B-FP8",
            "unsloth/Qwen3-14B-FP8",
            "unsloth/Qwen3-14B-FP8",
        ),
        "16" : (
            "unsloth/Qwen3-14B",
            "Qwen/Qwen3-14B",
            "unsloth/Qwen3-14B-bnb-4bit",
        ),
    },
    "unsloth/Qwen3-32B-unsloth-bnb-4bit" : {
        "8" : (
            "Qwen/Qwen3-32B-FP8",
            "unsloth/Qwen3-32B-FP8",
            "unsloth/Qwen3-32B-FP8",
        ),
        "16" : (
            "unsloth/Qwen3-32B",
            "Qwen/Qwen3-32B",
            "unsloth/Qwen3-32B-bnb-4bit",
        ),
    },
    "unsloth/Qwen3-30B-A3B-unsloth-bnb-4bit" : (
        "unsloth/Qwen3-30B-A3B",
        "Qwen/Qwen3-30B-A3B",
        "unsloth/Qwen3-30B-A3B-bnb-4bit",
    ),
    "unsloth/Qwen3-0.6B-Base-unsloth-bnb-4bit" : (
        "unsloth/Qwen3-0.6B-Base",
        "Qwen/Qwen3-0.6B-Base",
        "unsloth/Qwen3-0.6B-Base-bnb-4bit",
    ),
    "unsloth/Qwen3-1.7B-Base-unsloth-bnb-4bit" : (
        "unsloth/Qwen3-1.7B-Base",
        "Qwen/Qwen3-1.7B-Base",
        "unsloth/Qwen3-1.7B-Base-bnb-4bit",
    ),
    "unsloth/Qwen3-4B-Base-unsloth-bnb-4bit" : (
        "unsloth/Qwen3-4B-Base",
        "Qwen/Qwen3-4B-Base",
        "unsloth/Qwen3-4B-Base-bnb-4bit",
    ),
    "unsloth/Qwen3-8B-Base-unsloth-bnb-4bit" : (
        "unsloth/Qwen3-8B-Base",
        "Qwen/Qwen3-8B-Base",
        "unsloth/Qwen3-8B-Base-bnb-4bit",
    ),
    "unsloth/Qwen3-14B-Base-unsloth-bnb-4bit" : (
        "unsloth/Qwen3-14B-Base",
        "Qwen/Qwen3-14B-Base",
        "unsloth/Qwen3-14B-Base-bnb-4bit",
    ),
    "unsloth/Qwen3-30B-A3B-Base-bnb-4bit" : (
        "unsloth/Qwen3-30B-A3B-Base",
        "Qwen/Qwen3-30B-A3B-Base",
    ),
    "unsloth/phi-4-reasoning-unsloth-bnb-4bit" : (
        "unsloth/phi-4-reasoning",
        "microsoft/Phi-4-reasoning",
        "unsloth/phi-4-reasoning-bnb-4bit",
    ),
    "unsloth/phi-4-reasoning-plus-unsloth-bnb-4bit" : (
        "unsloth/phi-4-reasoning-plus",
        "microsoft/Phi-4-reasoning-plus",
        "unsloth/phi-4-reasoning-plus-bnb-4bit",
    ),
    "unsloth/phi-4-mini-reasoning-unsloth-bnb-4bit" : (
        "unsloth/phi-4-mini-reasoning",
        "microsoft/Phi-4-mini-reasoning",
        "unsloth/phi-4-mini-reasoning-bnb-4bit",
    ),
    "unsloth/Phi-4-mini-instruct-unsloth-bnb-4bit" : (
        "unsloth/Phi-4-mini-instruct",
        "microsoft/Phi-4-mini-instruct",
        "unsloth/Phi-4-mini-instruct-bnb-4bit",
    ),
    "unsloth/orpheus-3b-0.1-pretrained-unsloth-bnb-4bit" : (
        "unsloth/orpheus-3b-0.1-pretrained",
        "canopylabs/orpheus-3b-0.1-pretrained",
        "unsloth/orpheus-3b-0.1-pretrained-bnb-4bit",
    ),
    "unsloth/orpheus-3b-0.1-ft-unsloth-bnb-4bit" : (
        "unsloth/orpheus-3b-0.1-ft",
        "canopylabs/orpheus-3b-0.1-ft",
        "unsloth/orpheus-3b-0.1-ft-bnb-4bit",
    ),
    "unsloth/csm-1b" : (
        "unsloth/csm-1b",
        "sesame/csm-1b",
    ),
    "unsloth/whisper-large-v3" : (
        "unsloth/whisper-large-v3",
        "openai/whisper-large-v3",
    ),
    "unsloth/whisper-large-v3-turbo" : (
        "unsloth/whisper-large-v3-turbo",
        "openai/whisper-large-v3-turbo",
    ),
    "unsloth/whisper-small" : (
        "unsloth/whisper-small",
        "openai/whisper-small",
    ),
    "unsloth/CrisperWhisper" : (
        "unsloth/CrisperWhisper",
        "nyrahealth/CrisperWhisper",
    ),
    "unsloth/Llasa-1B" : (
        "unsloth/Llasa-1B",
        "HKUSTAudio/Llasa-1B",
    ),
    "unsloth/Spark-TTS-0.5B" : (
        "unsloth/Spark-TTS-0.5B",
        "SparkAudio/Spark-TTS-0.5B",
    ),
    "unsloth/Llama-OuteTTS-1.0-1B" : (
        "unsloth/Llama-OuteTTS-1.0-1B",
        "OuteAI/Llama-OuteTTS-1.0-1B",
    ),
    "unsloth/medgemma-4b-it-unsloth-bnb-4bit" : (
        "unsloth/medgemma-4b-it",
        "google/medgemma-4b-it",
        "unsloth/medgemma-4b-it-bnb-4bit",
    ),
    "unsloth/medgemma-27b-text-it-unsloth-bnb-4bit" : (
        "unsloth/medgemma-27b-text-it",
        "google/medgemma-27b-text-it",
        "unsloth/medgemma-27b-text-it-bnb-4bit",
    ),
    "unsloth/Devstral-Small-2505-unsloth-bnb-4bit" : (
        "unsloth/Devstral-Small-2505",
        "mistralai/Devstral-Small-2505",
        "unsloth/Devstral-Small-2505-bnb-4bit",
    ),
    "unsloth/DeepSeek-R1-0528-Qwen3-8B-unsloth-bnb-4bit" : (
        "unsloth/DeepSeek-R1-0528-Qwen3-8B",
        "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
        "unsloth/DeepSeek-R1-0528-Qwen3-8B-bnb-4bit",
    ),
    "unsloth/Magistral-Small-2506-unsloth-bnb-4bit" : (
        "unsloth/Magistral-Small-2506",
        "mistralai/Magistral-Small-2506",
        "unsloth/Magistral-Small-2506-bnb-4bit",
    ),
    "unsloth/Mistral-Small-3.2-24B-Instruct-2506-unsloth-bnb-4bit" : {
        "8" : (
            "mistralai/Mistral-Small-3.2-24B-Instruct-2506",
            "unsloth/Mistral-Small-3.2-24B-Instruct-2506-FP8",
            "unsloth/Mistral-Small-3.2-24B-Instruct-2506-FP8",
        ),
        "16" : (
            "unsloth/Mistral-Small-3.2-24B-Instruct-2506",
            "mistralai/Mistral-Small-3.2-24B-Instruct-2506",
            "unsloth/Mistral-Small-3.2-24B-Instruct-2506-bnb-4bit",
        ),
    },
    "unsloth/gemma-3n-E4B-it-unsloth-bnb-4bit" : (
        "unsloth/gemma-3n-E4B-it",
        "google/gemma-3n-E4B-it",
        "unsloth/gemma-3n-E4B-it-unsloth-bnb-4bit",
    ),
    "unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit" : (
        "unsloth/gemma-3n-E2B-it",
        "google/gemma-3n-E2B-it",
        "unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit",
    ),
    "unsloth/gemma-3n-E4B-unsloth-bnb-4bit" : (
        "unsloth/gemma-3n-E4B",
        "google/gemma-3n-E4B",
        "unsloth/gemma-3n-E4B-unsloth-bnb-4bit",
    ),
    "unsloth/gemma-3n-E2B-unsloth-bnb-4bit" : (
        "unsloth/gemma-3n-E2B",
        "google/gemma-3n-E2B",
        "unsloth/gemma-3n-E2B-unsloth-bnb-4bit",
    ),
    "unsloth/Devstral-Small-2507-unsloth-bnb-4bit" : (
        "unsloth/Devstral-Small-2507",
        "mistralai/Devstral-Small-2507",
        "unsloth/Devstral-Small-2507-bnb-4bit",
    ),
    "unsloth/Qwen3-30B-A3B-Thinking-2507" : (
        "unsloth/Qwen3-30B-A3B-Thinking-2507",
        "Qwen/Qwen3-30B-A3B-Thinking-2507",
    ),
    "unsloth/Qwen3-30B-A3B-Instruct-2507" : (
        "unsloth/Qwen3-30B-A3B-Instruct-2507",
        "Qwen/Qwen3-30B-A3B-Instruct-2507",
    ),
    "unsloth/Qwen3-Coder-30B-A3B-Instruct" : (
        "unsloth/Qwen3-Coder-30B-A3B-Instruct",
        "Qwen/Qwen3-Coder-30B-A3B-Instruct",
    ),
    "unsloth/gpt-oss-20b-unsloth-bnb-4bit" : (
        "unsloth/gpt-oss-20b",
        "openai/gpt-oss-20b",
        "unsloth/gpt-oss-20b-unsloth-bnb-4bit",
    ),
    "unsloth/gpt-oss-120b-unsloth-bnb-4bit" : (
        "unsloth/gpt-oss-120b",
        "openai/gpt-oss-120b",
        "unsloth/gpt-oss-120b-unsloth-bnb-4bit",
    ),
    "unsloth/Qwen3-4B-Instruct-2507-unsloth-bnb-4bit" : {
        "8" : (
            "Qwen/Qwen3-4B-Instruct-2507-FP8",
            "unsloth/Qwen3-4B-Instruct-2507-FP8",
            "unsloth/Qwen3-4B-Instruct-2507-FP8",
        ),
        "16" : (
            "unsloth/Qwen3-4B-Instruct-2507",
            "Qwen/Qwen3-4B-Instruct-2507",
            "unsloth/Qwen3-4B-Instruct-2507-bnb-4bit",
        ),
    },
    "unsloth/Qwen3-4B-Thinking-2507-unsloth-bnb-4bit" : {
        "8" : (
            "Qwen/Qwen3-4B-Thinking-2507-FP8",
            "unsloth/Qwen3-4B-Thinking-2507-FP8",
            "unsloth/Qwen3-4B-Thinking-2507-FP8",
        ),
        "16" : (
            "unsloth/Qwen3-4B-Thinking-2507",
            "Qwen/Qwen3-4B-Thinking-2507",
            "unsloth/Qwen3-4B-Thinking-2507-bnb-4bit",
        ),
    },
    "unsloth/gemma-3-270m-it-unsloth-bnb-4bit" : (
        "unsloth/gemma-3-270m-it",
        "google/gemma-3-270m-it",
        "unsloth/gemma-3-270m-it-bnb-4bit",
    ),
    "unsloth/gemma-3-270m-unsloth-bnb-4bit" : (
        "unsloth/gemma-3-270m",
        "google/gemma-3-270m",
        "unsloth/gemma-3-270m-bnb-4bit",
    ),
    "unsloth/Magistral-Small-2507-unsloth-bnb-4bit" : (
        "unsloth/Magistral-Small-2507",
        "mistralai/Magistral-Small-2507",
        "unsloth/Magistral-Small-2507-bnb-4bit",
    ),
    "unsloth/Magistral-Small-2509-unsloth-bnb-4bit" : {
        "8" : (
            "mistralai/Magistral-Small-2509",
            "unsloth/Magistral-Small-2509-FP8-Dynamic",
            "unsloth/Magistral-Small-2509-FP8-Dynamic",
        ),
        "16" : (
            "unsloth/Magistral-Small-2509",
            "mistralai/Magistral-Small-2509",
            "unsloth/Magistral-Small-2509-bnb-4bit",
        ),
    },
    "unsloth/Apertus-70B-Instruct-2509-unsloth-bnb-4bit" : (
        "unsloth/Apertus-70B-Instruct-2509",
        "swiss-ai/Apertus-70B-2509",
        "unsloth/Apertus-70B-Instruct-2509-unsloth-bnb-4bit",
    ),
    "unsloth/Apertus-8B-Instruct-2509-unsloth-bnb-4bit" : (
        "unsloth/Apertus-8B-Instruct-2509",
        "swiss-ai/Apertus-8B-2509",
        "unsloth/Apertus-8B-Instruct-2509-unsloth-bnb-4bit",
    ),
    "unsloth/granite-4.0-micro-unsloth-bnb-4bit" : (
        "unsloth/granite-4.0-micro",
        "ibm-granite/granite-4.0-micro",
        "unsloth/granite-4.0-micro-bnb-4bit",
    ),
    "unsloth/granite-4.0-h-micro-unsloth-bnb-4bit" : (
        "unsloth/granite-4.0-h-micro",
        "ibm-granite/granite-4.0-h-micro",
        "unsloth/granite-4.0-h-micro-bnb-4bit",
    ),
    "unsloth/granite-4.0-micro-base-unsloth-bnb-4bit" : (
        "unsloth/granite-4.0-micro-base",
        "ibm-granite/granite-4.0-micro-base",
        "unsloth/granite-4.0-micro-base-bnb-4bit",
    ),
    "unsloth/granite-4.0-h-micro-base-unsloth-bnb-4bit" : (
        "unsloth/granite-4.0-h-micro-base",
        "ibm-granite/granite-4.0-h-micro-base",
        "unsloth/granite-4.0-h-micro-base-bnb-4bit",
    ),
    "unsloth/granite-4.0-h-tiny" : (
        "unsloth/granite-4.0-h-tiny",
        "ibm-granite/granite-4.0-h-tiny",
    ),
    "unsloth/granite-4.0-h-small" : (
        "unsloth/granite-4.0-h-small",
        "ibm-granite/granite-4.0-h-small",
    ),
    "unsloth/granite-4.0-h-tiny-base" : (
        "unsloth/granite-4.0-h-tiny-base",
        "ibm-granite/granite-4.0-h-tiny-base",
    ),
    "unsloth/granite-4.0-h-small-base" : (
        "unsloth/granite-4.0-h-small-base",
        "ibm-granite/granite-4.0-h-small-base",
    ),
    "unsloth/Qwen3-VL-4B-Thinking-unsloth-bnb-4bit" : {
        "8" : (
            "Qwen/Qwen3-VL-4B-Thinking-FP8",
            "unsloth/Qwen3-VL-4B-Thinking-FP8",
            "unsloth/Qwen3-VL-4B-Thinking-FP8",
        ),
        "16" : (
            "unsloth/Qwen3-VL-4B-Thinking",
            "Qwen/Qwen3-VL-4B-Thinking",
            "unsloth/Qwen3-VL-4B-Thinking-bnb-4bit",
        ),
    },
    "unsloth/Qwen3-VL-8B-Thinking-unsloth-bnb-4bit" : {
        "8" : (
            "Qwen/Qwen3-VL-8B-Thinking-FP8",
            "unsloth/Qwen3-VL-8B-Thinking-FP8",
            "unsloth/Qwen3-VL-8B-Thinking-FP8",
        ),
        "16" : (
            "unsloth/Qwen3-VL-8B-Thinking",
            "Qwen/Qwen3-VL-8B-Thinking",
            "unsloth/Qwen3-VL-8B-Thinking-bnb-4bit",
        ),
    },
    "unsloth/Qwen3-VL-4B-Instruct-unsloth-bnb-4bit" : {
        "8" : (
            "Qwen/Qwen3-VL-4B-Instruct-FP8",
            "unsloth/Qwen3-VL-4B-Instruct-FP8",
            "unsloth/Qwen3-VL-4B-Instruct-FP8",
        ),
        "16" : (
            "unsloth/Qwen3-VL-4B-Instruct",
            "Qwen/Qwen3-VL-4B-Instruct",
            "unsloth/Qwen3-VL-4B-Instruct-bnb-4bit",
        ),
    },
    "unsloth/Qwen3-VL-8B-Instruct-unsloth-bnb-4bit" : {
        "8" : (
            "Qwen/Qwen3-VL-8B-Instruct-FP8",
            "unsloth/Qwen3-VL-8B-Instruct-FP8",
            "unsloth/Qwen3-VL-8B-Instruct-FP8",
        ),
        "16" : (
            "unsloth/Qwen3-VL-8B-Instruct",
            "Qwen/Qwen3-VL-8B-Instruct",
            "unsloth/Qwen3-VL-8B-Instruct-bnb-4bit",
        ),
    },
    "unsloth/Qwen3-VL-2B-Thinking-unsloth-bnb-4bit" : {
        "8" : (
            "Qwen/Qwen3-VL-2B-Thinking-FP8",
            "unsloth/Qwen3-VL-2B-Thinking-FP8",
            "unsloth/Qwen3-VL-2B-Thinking-FP8",
        ),
        "16" : (
            "unsloth/Qwen3-VL-2B-Thinking",
            "Qwen/Qwen3-VL-2B-Thinking",
            "unsloth/Qwen3-VL-2B-Thinking-bnb-4bit",
        ),
    },
    "unsloth/Qwen3-VL-32B-Thinking-unsloth-bnb-4bit" : {
        "8" : (
            "Qwen/Qwen3-VL-32B-Thinking-FP8",
            "unsloth/Qwen3-VL-32B-Thinking-FP8",
            "unsloth/Qwen3-VL-32B-Thinking-FP8",
        ),
        "16" : (
            "unsloth/Qwen3-VL-32B-Thinking",
            "Qwen/Qwen3-VL-32B-Thinking",
            "unsloth/Qwen3-VL-32B-Thinking-bnb-4bit",
        ),
    },
    "unsloth/Qwen3-VL-2B-Instruct-unsloth-bnb-4bit" : {
        "8" : (
            "Qwen/Qwen3-VL-2B-Instruct-FP8",
            "unsloth/Qwen3-VL-2B-Instruct-FP8",
            "unsloth/Qwen3-VL-2B-Instruct-FP8",
        ),
        "16" : (
            "unsloth/Qwen3-VL-2B-Instruct",
            "Qwen/Qwen3-VL-2B-Instruct",
            "unsloth/Qwen3-VL-2B-Instruct-bnb-4bit",
        ),
    },
    "unsloth/Qwen3-VL-32B-Instruct-unsloth-bnb-4bit" : {
        "8" : (
            "Qwen/Qwen3-VL-32B-Instruct-FP8",
            "unsloth/Qwen3-VL-32B-Instruct-FP8",
            "unsloth/Qwen3-VL-32B-Instruct-FP8",
        ),
        "16" : (
            "unsloth/Qwen3-VL-32B-Instruct",
            "Qwen/Qwen3-VL-32B-Instruct",
            "unsloth/Qwen3-VL-32B-Instruct-bnb-4bit",
        ),
    },
    "unsloth/granite-4.0-350m-base-unsloth-bnb-4bit" : (
        "unsloth/granite-4.0-350m-base",
        "ibm-granite/granite-4.0-350m-base",
        "unsloth/granite-4.0-350m-base-bnb-4bit",
    ),
    "unsloth/granite-4.0-350m-unsloth-bnb-4bit" : (
        "unsloth/granite-4.0-350m",
        "ibm-granite/granite-4.0-350m",
        "unsloth/granite-4.0-350m-bnb-4bit",
    ),
    "unsloth/granite-4.0-h-350m-base-unsloth-bnb-4bit" : (
        "unsloth/granite-4.0-h-350m-base",
        "ibm-granite/granite-4.0-h-350m-base",
        "unsloth/granite-4.0-h-350m-base-bnb-4bit",
    ),
    "unsloth/granite-4.0-h-350m-unsloth-bnb-4bit" : (
        "unsloth/granite-4.0-h-350m",
        "ibm-granite/granite-4.0-h-350m",
        "unsloth/granite-4.0-h-350m-bnb-4bit",
    ),
    "unsloth/granite-4.0-1b-base-unsloth-bnb-4bit" : (
        "unsloth/granite-4.0-1b-base",
        "ibm-granite/granite-4.0-1b-base",
        "unsloth/granite-4.0-1b-base-bnb-4bit",
    ),
    "unsloth/granite-4.0-1b-unsloth-bnb-4bit" : (
        "unsloth/granite-4.0-1b",
        "ibm-granite/granite-4.0-1b",
        "unsloth/granite-4.0-1b-bnb-4bit",
    ),
    "unsloth/granite-4.0-h-1b-base-unsloth-bnb-4bit" : (
        "unsloth/granite-4.0-h-1b-base",
        "ibm-granite/granite-4.0-h-1b-base",
        "unsloth/granite-4.0-h-1b-base-bnb-4bit",
    ),
    "unsloth/granite-4.0-h-1b-unsloth-bnb-4bit" : (
        "unsloth/granite-4.0-h-1b",
        "ibm-granite/granite-4.0-h-1b",
        "unsloth/granite-4.0-h-1b-bnb-4bit",
    ),
    "unsloth/gpt-oss-safeguard-20b" : (
        "unsloth/gpt-oss-safeguard-20b",
        "openai/gpt-oss-safeguard-20b",
    ),
    "unsloth/gpt-oss-safeguard-120b" : (
        "unsloth/gpt-oss-safeguard-120b",
        "openai/gpt-oss-safeguard-120b",
    ),
    "unsloth/functiongemma-270m-it-unsloth-bnb-4bit" : (
        "unsloth/functiongemma-270m-it",
        "google/functiongemma-270m-it",
        "unsloth/functiongemma-270m-it-unsloth-bnb-4bit",
    ),
    # Ministral 3 models
    "unsloth/Ministral-3-3B-Instruct-2512-unsloth-bnb-4bit" : {
        "8" : (
            "mistralai/Ministral-3-3B-Instruct-2512",
            "unsloth/Ministral-3-3B-Instruct-2512-FP8",
            "unsloth/Ministral-3-3B-Instruct-2512-FP8",
        ),
        "16" : (
            "unsloth/Ministral-3-3B-Instruct-2512",
            "mistralai/Ministral-3-3B-Instruct-2512",
            "unsloth/Ministral-3-3B-Instruct-2512-bnb-4bit",
        ),
    },
    "unsloth/Ministral-3-3B-Base-2512-unsloth-bnb-4bit" : (
        "unsloth/Ministral-3-3B-Base-2512",
        "mistralai/Ministral-3-3B-Base-2512",
        "unsloth/Ministral-3-3B-Base-2512-bnb-4bit",
    ),
    "unsloth/Ministral-3-3B-Reasoning-2512-unsloth-bnb-4bit" : (
        "unsloth/Ministral-3-3B-Reasoning-2512",
        "mistralai/Ministral-3-3B-Reasoning-2512",
        "unsloth/Ministral-3-3B-Reasoning-2512-bnb-4bit",
    ),
    "unsloth/Ministral-3-8B-Instruct-2512-unsloth-bnb-4bit" : {
        "8" : (
            "mistralai/Ministral-3-8B-Instruct-2512",
            "unsloth/Ministral-3-8B-Instruct-2512-FP8",
            "unsloth/Ministral-3-8B-Instruct-2512-FP8",
        ),
        "16" : (
            "unsloth/Ministral-3-8B-Instruct-2512",
            "mistralai/Ministral-3-8B-Instruct-2512",
            "unsloth/Ministral-3-8B-Instruct-2512-bnb-4bit",
        ),
    },
    "unsloth/Ministral-3-8B-Base-2512-unsloth-bnb-4bit" : (
        "unsloth/Ministral-3-8B-Base-2512",
        "mistralai/Ministral-3-8B-Base-2512",
        "unsloth/Ministral-3-8B-Base-2512-bnb-4bit",
    ),
    "unsloth/Ministral-3-8B-Reasoning-2512-unsloth-bnb-4bit" : (
        "unsloth/Ministral-3-8B-Reasoning-2512",
        "mistralai/Ministral-3-8B-Reasoning-2512",
        "unsloth/Ministral-3-8B-Reasoning-2512-bnb-4bit",
    ),
    "unsloth/Ministral-3-14B-Instruct-2512-unsloth-bnb-4bit" : {
        "8" : (
            "mistralai/Ministral-3-14B-Instruct-2512",
            "unsloth/Ministral-3-14B-Instruct-2512-FP8",
            "unsloth/Ministral-3-14B-Instruct-2512-FP8",
        ),
        "16" : (
            "unsloth/Ministral-3-14B-Instruct-2512",
            "mistralai/Ministral-3-14B-Instruct-2512",
            "unsloth/Ministral-3-14B-Instruct-2512-bnb-4bit",
        ),
    },
    "unsloth/Ministral-3-14B-Base-2512-unsloth-bnb-4bit" : (
        "unsloth/Ministral-3-14B-Base-2512",
        "mistralai/Ministral-3-14B-Base-2512",
        "unsloth/Ministral-3-14B-Base-2512-bnb-4bit",
    ),
    "unsloth/Ministral-3-14B-Reasoning-2512-unsloth-bnb-4bit" : (
        "unsloth/Ministral-3-14B-Reasoning-2512",
        "mistralai/Ministral-3-14B-Reasoning-2512",
        "unsloth/Ministral-3-14B-Reasoning-2512-bnb-4bit",
    ),
    "unsloth/Kimi-K2-Instruct-BF16" : (
        "unsloth/Kimi-K2-Instruct",
    ),
}

INT_TO_FLOAT_MAPPER  = {}
FLOAT_TO_INT_MAPPER  = {}
MAP_TO_UNSLOTH_16bit = {}
FLOAT_TO_FP8_BLOCK_MAPPER = {}
FLOAT_TO_FP8_ROW_MAPPER   = {}


def _add_with_lower(mapper, key, value):
    if key is None:
        return
    mapper[key] = value
    mapper[key.lower()] = value


def _add_lower_only(mapper, key, value):
    if key is None:
        return
    mapper[key.lower()] = value

for key, values in __INT_TO_FLOAT_MAPPER.items():
    block, row = None, None
    if type(values) is dict:
        assert "16" in values
        float16_values = values["16"]
        # Float8 and other quantized types
        if "8" in values:
            float8_values = values["8"]
            assert len(float8_values) == 3
            official, block, row = float8_values
            _add_lower_only(FLOAT_TO_FP8_BLOCK_MAPPER, key, block)
            _add_lower_only(FLOAT_TO_FP8_ROW_MAPPER, key, row)
            _add_lower_only(FLOAT_TO_FP8_BLOCK_MAPPER, official + "-dynamic", block)
            _add_lower_only(FLOAT_TO_FP8_ROW_MAPPER, official, row)
            _add_lower_only(FLOAT_TO_FP8_ROW_MAPPER, official + "-dynamic", row)
            for k in float8_values + float16_values:
                _add_lower_only(FLOAT_TO_FP8_BLOCK_MAPPER, k, block)
                _add_lower_only(FLOAT_TO_FP8_ROW_MAPPER, k, row)

            if float8_values[1] is not None and float8_values[1].startswith("unsloth"):
                for value in float8_values:
                    if value is not None:
                        _add_with_lower(MAP_TO_UNSLOTH_16bit, value, float8_values[1])

            for value in float8_values:
                if value is not None:
                    FLOAT_TO_INT_MAPPER[value] = key
                    FLOAT_TO_INT_MAPPER[value.lower()] = key.lower()
        values = float16_values
    INT_TO_FLOAT_MAPPER[key] = values[0]

    for value in values:
        FLOAT_TO_INT_MAPPER[value] = key

    # Map to Unsloth version for 16bit versions
    if len(values) == 2:
        if values[0].startswith("unsloth"):
            _add_with_lower(MAP_TO_UNSLOTH_16bit, values[1], values[0])
            _add_with_lower(MAP_TO_UNSLOTH_16bit, block, values[0])
            _add_with_lower(MAP_TO_UNSLOTH_16bit, row, values[0])
    elif len(values) == 3:
        # Dynamic Unsloth quantization
        if values[0].startswith("unsloth"):
            _add_with_lower(MAP_TO_UNSLOTH_16bit, values[1], values[0])
            _add_with_lower(MAP_TO_UNSLOTH_16bit, values[2], values[0])
            _add_with_lower(MAP_TO_UNSLOTH_16bit, block, values[0])
            _add_with_lower(MAP_TO_UNSLOTH_16bit, row, values[0])
        pass

    # Get lowercased
    lowered_key = key.lower()
    INT_TO_FLOAT_MAPPER[lowered_key] = values[0].lower()

    for value in values:
        FLOAT_TO_INT_MAPPER[value.lower()] = lowered_key


================================================
FILE: unsloth/models/mistral.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .llama import *
import os
from ._utils import __version__
from unsloth_zoo.utils import _get_dtype
from unsloth_zoo.hf_utils import dtype_from_config
from ..utils.packing import (
    get_packed_info_from_kwargs,
    mask_packed_sequence_boundaries,
)
from ..utils.attention_dispatch import (
    AttentionConfig,
    AttentionContext,
    run_attention,
    SDPA,
    select_attention_backend,
)
from .llama import (
    LlamaRotaryEmbedding,
    LlamaLinearScalingRotaryEmbedding,
)
from transformers.models.mistral.modeling_mistral import (
    MistralAttention,
    MistralDecoderLayer,
    MistralModel,
    MistralForCausalLM,
)

# For Pytorch 2.1.1
try:
    from transformers.models.mistral.modeling_mistral import (
        MistralSdpaAttention,
        MistralFlashAttention2,
    )
except:
    MistralSdpaAttention = MistralAttention
    MistralFlashAttention2 = MistralAttention
from unsloth_zoo.utils import Version, _get_dtype


def MistralAttention_fast_forward(
    self,
    hidden_states: torch.Tensor,
    causal_mask: Optional[BlockDiagonalCausalMask] = None,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    past_key_value: Optional[Tuple[torch.Tensor]] = None,
    output_attentions: bool = False,
    use_cache: bool = False,
    padding_mask: Optional[torch.LongTensor] = None,
    position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
    *args,
    **kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
    # Clear inference
    if hasattr(self, "paged_attention"):
        del self.paged_attention_K
        del self.paged_attention_V
        del self.paged_attention
        del self.temp_QA
        del self.temp_KV
        del self.RH_Q
        del self.attention

    bsz, q_len, _ = hidden_states.size()

    n_heads = self.config.num_attention_heads
    n_groups = self.num_key_value_groups
    n_kv_heads = self.config.num_key_value_heads
    head_dim = self.head_dim
    assert n_kv_heads * n_groups == n_heads

    Q, K, V = self.apply_qkv(self, hidden_states)
    Q = Q.view(bsz, q_len, n_heads, head_dim).transpose(1, 2)
    K = K.view(bsz, q_len, n_kv_heads, head_dim).transpose(1, 2)
    V = V.view(bsz, q_len, n_kv_heads, head_dim).transpose(1, 2)
    seq_info = get_packed_info_from_kwargs(kwargs, Q.device)

    kv_seq_len = K.shape[-2]
    if past_key_value is not None:
        kv_seq_len += past_key_value[0].shape[-2]

    # Extend RoPE dynamically to fit in VRAM
    self.rotary_emb.extend_rope_embedding(V, seq_len = kv_seq_len)
    cos, sin = self.rotary_emb.get_cached(kv_seq_len, Q.device.index)

    rope_position_ids = (
        position_ids if position_ids is not None else kwargs.get("position_ids")
    )
    # Useful for LongRoPE
    Q, K = fast_rope_embedding(Q, K, cos, sin, rope_position_ids)

    if past_key_value is not None:
        K = torch.cat([past_key_value[0], K], dim = 2)
        V = torch.cat([past_key_value[1], V], dim = 2)
    past_key_value = (K, V) if use_cache else None

    # Attention module
    sw_cfg = getattr(self.config, "sliding_window", None)
    sw = kv_seq_len if (sw_cfg is None or sw_cfg == "null") else sw_cfg
    window_size = (-1, -1) if (kv_seq_len <= sw) else (sw, sw)

    use_varlen = (
        seq_info is not None and past_key_value is None and window_size == (-1, -1)
    )
    backend = (
        SDPA if attention_mask is not None else select_attention_backend(use_varlen)
    )
    attention_config = AttentionConfig(
        backend = backend,
        n_kv_heads = n_kv_heads,
        n_groups = n_groups,
        flash_dense_kwargs = {"causal": True, "window_size": window_size},
        flash_varlen_kwargs = {
            "dropout_p": 0.0,
            "causal": True,
            "softmax_scale": getattr(self, "softmax_scale", None),
        },
    )
    context = AttentionContext(
        bsz = bsz,
        q_len = q_len,
        kv_seq_len = kv_seq_len,
        n_heads = n_heads,
        head_dim = head_dim,
        requires_grad = hidden_states.requires_grad,
        seq_info = seq_info,
        attention_mask = attention_mask,
        causal_mask = causal_mask,
    )

    A = run_attention(config = attention_config, context = context, Q = Q, K = K, V = V)
    attn_output = A.reshape(bsz, q_len, n_heads * head_dim)
    attn_output = self.apply_o(self, attn_output)
    attn_weights = None
    return attn_output, attn_weights, past_key_value


def MistralForCausalLM_fast_forward(
    self,
    input_ids: torch.LongTensor = None,
    causal_mask: Optional[BlockDiagonalCausalMask] = None,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    past_key_values: Optional[List[torch.FloatTensor]] = None,
    inputs_embeds: Optional[torch.FloatTensor] = None,
    labels: Optional[torch.LongTensor] = None,
    use_cache: Optional[bool] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    return_dict: Optional[bool] = None,
    num_logits_to_keep: Optional[int] = 0,
    logits_to_keep: Optional[int] = 0,
    *args,
    **kwargs,
) -> Union[Tuple, CausalLMOutputWithPast]:
    if causal_mask is None and past_key_values is None:
        bsz, q_len = input_ids.shape
        sliding_window = getattr(self.config, "sliding_window", None)

        if HAS_XFORMERS:
            # Always create causal mask for xformers
            if (
                sliding_window is None
                or sliding_window == "null"
                or sliding_window <= 0
            ):
                causal_mask = xformers.attn_bias.LowerTriangularMask()
            elif q_len <= sliding_window:
                causal_mask = xformers.attn_bias.LowerTriangularMask()
            else:
                causal_mask = xformers.attn_bias.BlockDiagonalCausalMask.from_seqlens(
                    [q_len] * bsz
                ).make_local_attention(window_size = sliding_window)

            # If attention_mask exists, it will be handled in the attention forward

        else:
            # Not using xformers - need to create attention masks
            if (
                sliding_window is None
                or sliding_window == "null"
                or sliding_window <= 0
                or q_len <= sliding_window
            ):
                # Fully causal mask
                causal_mask_values = torch.triu(
                    torch.full((q_len, q_len), -torch.inf, device = input_ids.device),
                    diagonal = 1,
                )
            else:
                # Sliding window attention
                q_indices = torch.arange(q_len, device = input_ids.device).view(-1, 1)
                k_indices = torch.arange(q_len, device = input_ids.device).view(1, -1)

                causal_bool_mask = k_indices <= q_indices
                window_bool_mask = (q_indices - k_indices) < sliding_window

                causal_mask_values = torch.where(
                    causal_bool_mask & window_bool_mask, 0.0, -torch.inf
                )

            # Combine with existing attention_mask if present
            if attention_mask is None:
                attention_mask = causal_mask_values[None, None, :, :].expand(
                    bsz, 1, q_len, q_len
                )
            else:
                if attention_mask.dim() == 2:
                    # Convert 0/1 padding mask to additive format: 1->0 (keep), 0->-inf (mask)
                    padding_mask = torch.where(
                        attention_mask[:, None, None, :].bool(),
                        0.0,
                        -torch.inf,
                    )
                    attention_mask = causal_mask_values[None, None, :, :] + padding_mask
                else:
                    attention_mask = (
                        attention_mask + causal_mask_values[None, None, :, :]
                    )

            attention_mask = attention_mask.to(
                dtype = _get_dtype(dtype_from_config(self.config))
            )

    output_attentions = (
        output_attentions
        if output_attentions is not None
        else self.config.output_attentions
    )
    output_hidden_states = (
        output_hidden_states
        if output_hidden_states is not None
        else self.config.output_hidden_states
    )
    return_dict = (
        return_dict if return_dict is not None else self.config.use_return_dict
    )

    # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
    self.model._has_no_labels = labels is None

    if past_key_values is not None:
        outputs = LlamaModel_fast_forward_inference(
            self,
            input_ids,
            past_key_values,
            position_ids = position_ids,
            attention_mask = attention_mask,
        )
    else:
        outputs = self.model(
            input_ids = input_ids,
            causal_mask = causal_mask,
            attention_mask = attention_mask,
            position_ids = position_ids,
            past_key_values = past_key_values,
            inputs_embeds = inputs_embeds,
            use_cache = use_cache,
            output_attentions = output_attentions,
            output_hidden_states = output_hidden_states,
            return_dict = return_dict,
            **kwargs,
        )

    hidden_states = outputs[0]

    bsz, q_len, hd = hidden_states.shape
    lm_head = self.lm_head.weight
    lm_head_device = lm_head.device

    # Move items to same device as lm_head
    hidden_states = hidden_states.to(lm_head_device)
    if labels is not None:
        labels = labels.to(lm_head_device)

    # If we are in GRPO mode, return raw hidden states
    if os.environ.get("UNSLOTH_RETURN_HIDDEN_STATES", "0") == "1":
        num_logits_to_keep = max(num_logits_to_keep, logits_to_keep)
        if num_logits_to_keep != 0:
            hidden_states = hidden_states[:, -num_logits_to_keep:, :]
        return CausalLMOutputWithPast(
            loss = None,
            logits = hidden_states,
            past_key_values = outputs.past_key_values,
            hidden_states = outputs.hidden_states,
            attentions = outputs.attentions,
        )

    if bsz == 1 and q_len == 1:
        logits = torch.mv(lm_head, hidden_states.ravel().to(lm_head.dtype))
        logits = logits.unsqueeze(0).unsqueeze(0)
    elif num_logits_to_keep != 0:
        logits = self.lm_head(
            hidden_states[:, -num_logits_to_keep:, :].to(lm_head.dtype)
        )
    else:
        RETURN_LOGITS = os.environ.get("UNSLOTH_RETURN_LOGITS", "0") == "1"
        # < 1024 Normal Unsloth uses less VRAM!
        if bsz * q_len <= 1024 and not RETURN_LOGITS:
            # Use unsloth_fused_ce_loss which actually calculates the best chunk size to reduce VRAM usage
            RETURN_LOGITS = False

        if not RETURN_LOGITS and labels is not None:
            n_items = kwargs.get("num_items_in_batch", None)
            if n_items is None:
                n_items = kwargs.get("n_items", None)
            logit_softcapping = getattr(self.config, "final_logit_softcapping", 0)

            # loss = fused_linear_cross_entropy(
            #     hidden_states = hidden_states,
            #     lm_weight = lm_head,
            #     labels = labels,
            #     num_items_in_batch = n_items,
            #     logit_softcapping = logit_softcapping,
            # )
            loss = unsloth_fused_ce_loss(
                trainer = None,
                hidden_states = hidden_states,
                lm_head_weight = lm_head,
                lm_head_bias = None,
                labels = labels,
                mask = None,
                n_items = n_items,
                scaling = getattr(self, "accelerator_scaler", None),
                target_gb = None,
                torch_compile = True,
                logit_softcapping = logit_softcapping,
            )
            if not return_dict:
                output = (logits,) + outputs[1:]
                return (loss,) + output if loss is not None else output

            output = CausalLMOutputWithPast(
                loss = loss,
                logits = EMPTY_LOGITS,
                past_key_values = outputs.past_key_values,
                hidden_states = outputs.hidden_states,
                attentions = outputs.attentions,
            )
            return output
        pass
        logits = self.lm_head(hidden_states.to(lm_head.dtype))
    logits = logits.to(_get_dtype(dtype_from_config(self.config)))

    loss = None
    if labels is not None:
        shift_logits = logits
        # if not hasattr(self, "extra_ignored_labels"):
        #     # Fixes https://github.com/unslothai/unsloth/issues/10
        #     self.extra_ignored_labels = torch.full((self.max_seq_length, 1), -100, device = "cuda:0")
        # pass
        # shift_labels = torch.hstack((labels[..., 1:], self.extra_ignored_labels[:labels.shape[0]]))
        shift_labels = torch.empty_like(labels)
        shift_labels[..., :-1] = labels[..., 1:]
        shift_labels[..., -1] = -100
        mask_packed_sequence_boundaries(
            shift_labels,
            kwargs.get("packed_seq_lengths"),
        )
        n_items = kwargs.get("num_items_in_batch", None)
        if n_items is None:
            n_items = kwargs.get("n_items", None)
        loss = fast_cross_entropy_loss(
            logits = shift_logits,
            labels = shift_labels,
            n_items = n_items,
        )

    if not return_dict:
        output = (logits,) + outputs[1:]
        return (loss,) + output if loss is not None else output

    return CausalLMOutputWithPast(
        loss = loss,
        logits = logits,
        past_key_values = outputs.past_key_values,
        hidden_states = outputs.hidden_states,
        attentions = outputs.attentions,
    )


# Transformers had to update for Mistral Nemo 12b since Attention is (5120, 4096) now.
def patch_mistral_nemo_attention(function):
    function = function.replace(
        "(self.head_dim * self.config.num_attention_heads) != self.config.hidden_size",
        "False",
    )
    function = function.replace(
        "self.head_dim = self.config.hidden_size // self.config.num_attention_heads",
        "self.head_dim = config.head_dim",
    )
    function = function.replace(
        "self.o_proj = nn.Linear(self.config.hidden_size, self.config.hidden_size, bias=False)",
        "self.o_proj = nn.Linear(self.config.num_attention_heads * self.head_dim, self.config.hidden_size, bias=False)",
    )
    return function


class FastMistralModel(FastLlamaModel):
    @staticmethod
    def pre_patch():
        init_name, function = patch_linear_scaling(
            model_name = "mistral",
            rope_module = LlamaRotaryEmbedding,
            scaled_rope_module = LlamaLinearScalingRotaryEmbedding,
            attention_module = MistralAttention,
        )
        # Just for Mistral Nemo models!
        if function is not None and init_name is not None:
            function = patch_mistral_nemo_attention(function)
            # if True:#init_name is not None:
            exec(function, globals())
            MistralAttention.__init__ = eval(init_name)
        MistralAttention.forward = MistralAttention_fast_forward
        MistralSdpaAttention.forward = MistralAttention_fast_forward
        MistralFlashAttention2.forward = MistralAttention_fast_forward
        MistralDecoderLayer.forward = LlamaDecoderLayer_fast_forward
        MistralModel.forward = LlamaModel_fast_forward
        MistralForCausalLM.forward = MistralForCausalLM_fast_forward
        PeftModelForCausalLM.forward = PeftModel_fast_forward
        fix_prepare_inputs_for_generation(MistralForCausalLM)

        # Solves https://github.com/unslothai/unsloth/issues/168
        # Static KV Cache was introduced in 4.38.0, causing training to be much slower.
        # Inference can now be CUDAGraphed, but we shall retain the old rotary embeddings.
        # https://github.com/huggingface/transformers/pull/27931
        # https://github.com/huggingface/transformers/blob/v4.37.2/src/transformers/models/llama/modeling_llama.py
        import transformers.models.mistral.modeling_mistral

        transformers.models.mistral.modeling_mistral.MistralRotaryEmbedding = (
            LlamaRotaryEmbedding
        )
        return

    @staticmethod
    def from_pretrained(
        model_name = "unsloth/mistral-7b-bnb-4bit",
        max_seq_length = None,
        dtype = None,
        load_in_4bit = True,
        token = None,
        device_map = "sequential",
        rope_scaling = None,  # Mistral does not support RoPE scaling
        fix_tokenizer = True,
        model_patcher = None,
        tokenizer_name = None,
        trust_remote_code = False,
        **kwargs,
    ):
        return FastLlamaModel.from_pretrained(
            model_name = model_name,
            max_seq_length = max_seq_length,
            dtype = dtype,
            load_in_4bit = load_in_4bit,
            token = token,
            device_map = device_map,
            rope_scaling = rope_scaling,
            fix_tokenizer = fix_tokenizer,
            model_patcher = FastMistralModel,
            tokenizer_name = tokenizer_name,
            trust_remote_code = trust_remote_code,
            **kwargs,
        )


================================================
FILE: unsloth/models/qwen2.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .llama import *
from .llama import (
    LlamaRotaryEmbedding,
    LlamaLinearScalingRotaryEmbedding,
)
from transformers.models.qwen2.modeling_qwen2 import (
    Qwen2Attention,
    Qwen2DecoderLayer,
    Qwen2Model,
    Qwen2ForCausalLM,
)

# For Pytorch 2.1.1
try:
    from transformers.models.qwen2.modeling_qwen2 import (
        Qwen2SdpaAttention,
        Qwen2FlashAttention2,
    )
except:
    Qwen2SdpaAttention = Qwen2Attention
    Qwen2FlashAttention2 = Qwen2Attention


class FastQwen2Model(FastLlamaModel):
    @staticmethod
    def pre_patch():
        init_name, function = patch_linear_scaling(
            model_name = "qwen2",
            rope_module = LlamaRotaryEmbedding,
            scaled_rope_module = LlamaLinearScalingRotaryEmbedding,
            attention_module = Qwen2Attention,
        )
        if init_name is not None:
            exec(function, globals())
            Qwen2Attention.__init__ = eval(init_name)
        Qwen2Attention.forward = LlamaAttention_fast_forward
        Qwen2SdpaAttention.forward = LlamaAttention_fast_forward
        Qwen2FlashAttention2.forward = LlamaAttention_fast_forward
        Qwen2DecoderLayer.forward = LlamaDecoderLayer_fast_forward
        Qwen2Model.forward = LlamaModel_fast_forward
        Qwen2ForCausalLM.forward = CausalLM_fast_forward(
            LlamaModel_fast_forward_inference
        )
        PeftModelForCausalLM.forward = PeftModel_fast_forward
        fix_prepare_inputs_for_generation(Qwen2ForCausalLM)

        # Solves https://github.com/unslothai/unsloth/issues/168
        # Static KV Cache was introduced in 4.38.0, causing training to be much slower.
        # Inference can now be CUDAGraphed, but we shall retain the old rotary embeddings.
        # https://github.com/huggingface/transformers/pull/27931
        # https://github.com/huggingface/transformers/blob/v4.37.2/src/transformers/models/llama/modeling_llama.py
        import transformers.models.qwen2.modeling_qwen2

        transformers.models.qwen2.modeling_qwen2.Qwen2RotaryEmbedding = (
            LlamaRotaryEmbedding
        )
        return

    @staticmethod
    def from_pretrained(
        model_name = "Qwen/Qwen2-7B",
        max_seq_length = 4096,
        dtype = None,
        load_in_4bit = True,
        token = None,
        device_map = "sequential",
        rope_scaling = None,  # Qwen2 does not support RoPE scaling
        fix_tokenizer = True,
        model_patcher = None,
        tokenizer_name = None,
        trust_remote_code = False,
        **kwargs,
    ):
        return FastLlamaModel.from_pretrained(
            model_name = model_name,
            max_seq_length = max_seq_length,
            dtype = dtype,
            load_in_4bit = load_in_4bit,
            token = token,
            device_map = device_map,
            rope_scaling = rope_scaling,
            fix_tokenizer = fix_tokenizer,
            model_patcher = FastQwen2Model,
            tokenizer_name = tokenizer_name,
            trust_remote_code = trust_remote_code,
            **kwargs,
        )


================================================
FILE: unsloth/models/qwen3.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .llama import *
import os
from ._utils import __version__
from unsloth_zoo.utils import Version, _get_dtype
from ..utils.packing import get_packed_info_from_kwargs
from ..utils.attention_dispatch import (
    AttentionConfig,
    AttentionContext,
    run_attention,
    SDPA,
    select_attention_backend,
)
from .llama import (
    LlamaRotaryEmbedding,
    LlamaLinearScalingRotaryEmbedding,
    _LlamaModel_fast_forward_inference,
)

try:
    from transformers.models.qwen3.modeling_qwen3 import (
        Qwen3Attention,
        Qwen3DecoderLayer,
        Qwen3Model,
        Qwen3ForCausalLM,
    )
except:
    transformers_version = Version(transformers_version)
    if not transformers_version >= Version(
        "4.50.3"
    ):  # TODO: Update when transformers is updated
        raise ImportError(
            f"Unsloth: Your transformers version of {transformers_version} does not support Qwen3 and Qwen3Moe.\n"
            f"The minimum required version is 4.50.3.\n"
            f'Try `pip install --upgrade "transformers>=4.50.3"`\n'
            f"to obtain the latest transformers build, then restart this session."
        )
from transformers.modeling_attn_mask_utils import (
    _prepare_4d_causal_attention_mask_for_sdpa,
)

# For Pytorch 2.1.1
try:
    from transformers.models.qwen3.modeling_qwen3 import (
        Qwen3SdpaAttention,
        Qwen3FlashAttention2,
    )
except:
    Qwen3SdpaAttention = Qwen3Attention
    Qwen3FlashAttention2 = Qwen3Attention


def Qwen3Attention_fast_forward(
    self,
    hidden_states: torch.Tensor,
    causal_mask: Optional[BlockDiagonalCausalMask] = None,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    past_key_value: Optional[Tuple[torch.Tensor]] = None,
    output_attentions: bool = False,
    use_cache: bool = False,
    padding_mask: Optional[torch.LongTensor] = None,
    position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
    *args,
    **kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
    # Clear inference
    if hasattr(self, "paged_attention"):
        del self.paged_attention_K
        del self.paged_attention_V
        del self.paged_attention
        del self.temp_QA
        del self.temp_KV
        del self.RH_Q
        del self.attention

    bsz, q_len, _ = hidden_states.size()

    n_heads = self.config.num_attention_heads
    n_groups = self.num_key_value_groups
    n_kv_heads = self.config.num_key_value_heads
    head_dim = self.head_dim
    assert n_kv_heads * n_groups == n_heads

    Q, K, V = self.apply_qkv(self, hidden_states)
    Q = Q.view(
        bsz, q_len, n_heads, head_dim
    )  # .transpose(1, 2) # we will transpose after normalisation
    K = K.view(
        bsz, q_len, n_kv_heads, head_dim
    )  # .transpose(1, 2) # we will transpose after normalisation
    V = V.view(bsz, q_len, n_kv_heads, head_dim).transpose(1, 2)
    seq_info = get_packed_info_from_kwargs(kwargs, hidden_states.device)

    # Qwen3 has QKNorm. This seems to be the only difference from Qwen2.
    # Note that using fast_layernorm_compiled causes issues as the dimensions don't match up.
    # I tried to add a compiled version of the new norm but the numbers don't match up with Transformers
    # TODO: Check on the differences here.
    Q = fast_rms_layernorm(self.q_norm, Q)
    K = fast_rms_layernorm(self.k_norm, K)

    Q = Q.transpose(1, 2)
    K = K.transpose(1, 2)

    kv_seq_len = K.shape[-2]
    if past_key_value is not None:
        kv_seq_len += past_key_value[0].shape[-2]

    # Extend RoPE dynamically to fit in VRAM
    if position_embeddings and kv_seq_len <= position_embeddings[0].shape[0]:
        cos, sin = position_embeddings
    else:
        rotary_emb = self.rotary_emb
        rotary_emb.extend_rope_embedding(V, seq_len = kv_seq_len)
        cos, sin = rotary_emb.get_cached(kv_seq_len, Q.device.index)

    rope_position_ids = (
        position_ids if position_ids is not None else kwargs.get("position_ids")
    )
    # Useful for LongRoPE
    Q, K = fast_rope_embedding(Q, K, cos, sin, rope_position_ids)

    if past_key_value is not None:
        K = torch.cat([past_key_value[0], K], dim = 2)
        V = torch.cat([past_key_value[1], V], dim = 2)
    past_key_value = (K, V) if use_cache else None

    # Attention module
    use_varlen = seq_info is not None and past_key_value is None
    backend = (
        SDPA if attention_mask is not None else select_attention_backend(use_varlen)
    )
    attention_config = AttentionConfig(
        backend = backend,
        n_kv_heads = n_kv_heads,
        n_groups = n_groups,
        flash_dense_kwargs = {"causal": True},
        flash_varlen_kwargs = {
            "dropout_p": 0.0,
            "causal": True,
            "softmax_scale": getattr(self, "softmax_scale", None),
        },
    )
    context = AttentionContext(
        bsz = bsz,
        q_len = q_len,
        kv_seq_len = kv_seq_len,
        n_heads = n_heads,
        head_dim = head_dim,
        requires_grad = hidden_states.requires_grad,
        seq_info = seq_info,
        attention_mask = attention_mask,
        causal_mask = causal_mask,
    )

    A = run_attention(config = attention_config, context = context, Q = Q, K = K, V = V)

    attn_output = A.reshape(bsz, q_len, n_heads * head_dim)
    attn_output = self.apply_o(self, attn_output)
    attn_weights = None
    return attn_output, attn_weights, past_key_value


torch_matmul = torch.matmul


def Qwen3Attention_fast_forward_inference(
    self,
    hidden_states: torch.Tensor,
    past_key_value: Optional[Tuple[torch.Tensor]],
    position_ids,
    do_prefill = False,
    attention_mask = None,
    **kwargs,
):
    """
    https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L406
    Fast inference using KV cache.
    QK^T can be computed in 4 chunks

    [Q, q] @ [K, k].T where q, k are the new tokens.
    [QK^T, Qk^T]
    [qK^T, qk^T]

    Since the attention mask wipes Qk^T, we just get
    [QK^T,    0]
    [qK^T, qk^T]

    Since softmax is row-wise, we get
    softmax([QK^T,    0])
    softmax([qK^T, qk^T])

    We then multiply by   [V]
                          [v]
    softmax([QK^T,    0]) [softmax(QK^T)V] *
    softmax([qK^T, qk^T]) [softmax([qK^T, qk^T]) @ [V, v]]

    But notice * [softmax(QK^T)V] is just the last attention.
    We just need to compute the last final row.

    This means we can pass in a row of Q, but we need to
    remember K and V, which are called the KV cache.
    """
    Xn = hidden_states
    bsz, _, hd = hidden_states.size()
    K1, V1 = past_key_value
    dtype = Xn.dtype

    n_heads = self.config.num_attention_heads
    n_groups = self.num_key_value_groups
    n_kv_heads = self.config.num_key_value_heads
    head_dim = self.head_dim
    # assert(n_kv_heads * n_groups == n_heads)

    hidden_size = self.config.hidden_size
    attention_size = n_heads * head_dim
    seq_len = K1.shape[-2]
    kv_seq_len = seq_len + 1

    # Prefill phase
    # if not hasattr(self, "paged_attention"):
    device = hidden_states.device
    if do_prefill:
        self.paged_attention = torch.empty(
            (KV_CACHE_INCREMENT + seq_len + 1, 2, bsz, n_kv_heads, head_dim),
            dtype = dtype,
            device = device,
        )
        self.paged_attention_K = self.paged_attention[:, 0]
        self.paged_attention_V = self.paged_attention[:, 1]
        self.paged_attention_K[:seq_len] = K1.permute(2, 0, 1, 3)
        self.paged_attention_V[:seq_len] = V1.permute(2, 0, 1, 3)
        self.temp_QA = torch.empty(
            (2, bsz, 1, attention_size), dtype = dtype, device = device
        )
        self.temp_KV = torch.empty(
            (2, bsz, 1, n_kv_heads * head_dim), dtype = dtype, device = device
        )
        self.RH_Q = torch.empty((bsz, n_heads, 1, head_dim), dtype = dtype, device = device)

        # Mistral Nemo 12b has weird dimensions
        if attention_size != hidden_size:
            self.temp_O = torch.empty((bsz, 1, hidden_size), dtype = dtype, device = device)
        else:
            self.temp_O = self.temp_QA[1][:, :, :hidden_size]

        self.attention = torch.empty(
            (bsz, n_heads, 1, KV_CACHE_INCREMENT + seq_len), dtype = dtype, device = device
        )
        self.scalar = 1.0 / math_sqrt(self.head_dim)
        self.half_head_dim = head_dim // 2
    elif kv_seq_len >= self.paged_attention.shape[0]:
        self.paged_attention.resize_(
            (
                self.paged_attention.shape[0] + KV_CACHE_INCREMENT,
                2,
                bsz,
                n_kv_heads,
                head_dim,
            )
        )
        self.paged_attention_K = self.paged_attention[:, 0]
        self.paged_attention_V = self.paged_attention[:, 1]
        self.attention.resize_(
            (bsz, n_heads, 1, self.attention.shape[-1] + KV_CACHE_INCREMENT)
        )

    Qn = fast_linear_forward(self.q_proj, Xn, out = self.temp_QA[0])
    Kn = fast_linear_forward(self.k_proj, Xn, out = self.temp_KV[0])
    Vn = fast_linear_forward(self.v_proj, Xn, out = self.temp_KV[1])
    Qn = Qn.view(
        bsz, 1, n_heads, head_dim
    )  # .transpose(1, 2) # we will transpose after normalisation
    Kn = Kn.view(
        bsz, 1, n_kv_heads, head_dim
    )  # .transpose(1, 2) # we will transpose after normalisation
    Vn = Vn.view(bsz, 1, n_kv_heads, head_dim).transpose(1, 2)

    Qn = fast_rms_layernorm_inference(self.q_norm, Qn)
    Kn = fast_rms_layernorm_inference(self.k_norm, Kn)

    Qn = Qn.transpose(1, 2)
    Kn = Kn.transpose(1, 2)

    # cos, sin = self.rotary_emb(Vn, seq_len = kv_seq_len)
    # Qn, Kn = inplace_rope_embedding(Qn, Kn, cos, sin, position_ids)

    # Need to do it prior 2 steps before hitting full on short KV cache
    # or else error
    self.rotary_emb.extend_rope_embedding(Vn, seq_len + 2)
    cos, sin = self.rotary_emb.get_cached(kv_seq_len, Qn.device.index)
    cos = cos[position_ids].unsqueeze(1)
    sin = sin[position_ids].unsqueeze(1)
    h = self.half_head_dim

    RH_Q = self.RH_Q
    RH_Q[:, :, :, :h] = Qn[:, :, :, h:]
    RH_Q[:, :, :, h:] = Qn[:, :, :, :h]
    RH_Q[:, :, :, :h].neg_()  # torch.neg(RH_Q[:,:,:,:h], out = RH_Q[:,:,:,:h])
    Qn *= cos
    Qn.addcmul_(RH_Q, sin)

    RH_K = RH_Q[
        :, :n_kv_heads, :, :
    ]  # torch.empty((n_kv_heads, 1, head_dim), dtype = dtype, device = "cuda:0")
    RH_K[:, :, :, :h] = Kn[:, :, :, h:]
    RH_K[:, :, :, h:] = Kn[:, :, :, :h]
    RH_K[:, :, :, :h].neg_()  # torch.neg(RH_K[:,:,:,:h], out = RH_K[:,:,:,:h])
    Kn *= cos
    Kn.addcmul_(RH_K, sin)

    # New KV cache
    # Kn = torch.cat([K1, Kn], dim = 2)
    # Vn = torch.cat([V1, Vn], dim = 2)
    self.paged_attention_K[seq_len] = Kn.permute(2, 0, 1, 3)
    self.paged_attention_V[seq_len] = Vn.permute(2, 0, 1, 3)
    Kn = self.paged_attention_K[:kv_seq_len].permute(1, 2, 0, 3)
    Vn = self.paged_attention_V[:kv_seq_len].permute(1, 2, 0, 3)

    # Handle sliding windows
    sliding_window = getattr(self.config, "sliding_window", None)
    if sliding_window is not None and kv_seq_len > sliding_window:
        start = kv_seq_len - sliding_window
        Knn = Kn[:, :, start:, :]  # .contiguous()
        Vnn = Vn[:, :, start:, :]  # .contiguous()
        if attention_mask is not None:
            attention_mask = attention_mask[..., start:]
    else:
        Knn, Vnn = Kn, Vn

    # when qlen==vlen and attn_mask is None, we should use causal attention
    Q_len = Qn.shape[-2]
    K_len = Knn.shape[-2]
    if attention_mask is not None and attention_mask.dim() == 2:
        attention_mask = attention_mask[:, None, None, :].to(torch.bool)
    elif (
        attention_mask is not None
        and attention_mask.dim() == 4
        and attention_mask.dtype != torch.bool
    ):
        attention_mask = attention_mask.eq(0)
    if attention_mask is None and Q_len == K_len:
        is_causal = True
    else:
        is_causal = False
    use_sdpa_gqa = SDPA_HAS_GQA
    if (
        use_sdpa_gqa
        and isinstance(attention_mask, torch.Tensor)
        and attention_mask.dim() >= 3
        and attention_mask.shape[0] > 1
    ):
        # Avoid SDPA GQA drift for batched masked decode.
        use_sdpa_gqa = False

    # Grouped query attention
    _, _, cached_len, _ = Knn.shape
    if bsz == 1 or ((not use_sdpa_gqa) and n_groups != 1):
        Knn = Knn[:, :, None, :, :].expand(
            bsz, n_kv_heads, n_groups, cached_len, head_dim
        )
        Vnn = Vnn[:, :, None, :, :].expand(
            bsz, n_kv_heads, n_groups, cached_len, head_dim
        )
        Knn = Knn.reshape(bsz, n_heads, cached_len, head_dim)
        Vnn = Vnn.reshape(bsz, n_heads, cached_len, head_dim)

    # Attention
    if bsz == 1:
        Qn *= self.scalar  # See https://github.com/ggerganov/llama.cpp/issues/7805#issuecomment-2153349963
        # It seems like doing (Q * scalar) @ K is better than (Q @ K) * scalar to stop overflows
        A = torch_matmul(
            Qn, Knn.transpose(2, 3), out = self.attention[:, :, :, :cached_len]
        )
        A[:] = torch_nn_functional_softmax(
            A, dim = -1, dtype = torch.float32
        )  # .to(A.dtype)
        A = torch_matmul(A, Vnn, out = Qn)
    else:
        if use_sdpa_gqa:
            A = scaled_dot_product_attention(
                Qn,
                Knn,
                Vnn,
                attn_mask = attention_mask,
                is_causal = is_causal,
                enable_gqa = True,
            )
        else:
            A = scaled_dot_product_attention(
                Qn, Knn, Vnn, attn_mask = attention_mask, is_causal = is_causal
            )
    A = A.transpose(1, 2)
    A = A.reshape(bsz, 1, attention_size)
    A = fast_linear_forward(self.o_proj, A, out = self.temp_O)
    return A, (Kn, Vn)


class FastQwen3Model(FastLlamaModel):
    @staticmethod
    def pre_patch():
        init_name, function = patch_linear_scaling(
            model_name = "Qwen3",
            rope_module = LlamaRotaryEmbedding,
            scaled_rope_module = LlamaLinearScalingRotaryEmbedding,
            attention_module = Qwen3Attention,
        )
        if init_name is not None:
            exec(function, globals())
            Qwen3Attention.__init__ = eval(init_name)
        Qwen3Attention.forward = Qwen3Attention_fast_forward
        Qwen3SdpaAttention.forward = Qwen3Attention_fast_forward
        Qwen3FlashAttention2.forward = Qwen3Attention_fast_forward
        Qwen3DecoderLayer.forward = LlamaDecoderLayer_fast_forward
        Qwen3Model.forward = LlamaModel_fast_forward
        Qwen3ForCausalLM.forward = CausalLM_fast_forward(
            _LlamaModel_fast_forward_inference(Qwen3Attention_fast_forward_inference)
        )
        PeftModelForCausalLM.forward = PeftModel_fast_forward
        fix_prepare_inputs_for_generation(Qwen3ForCausalLM)

        # Solves https://github.com/unslothai/unsloth/issues/168
        # Static KV Cache was introduced in 4.38.0, causing training to be much slower.
        # Inference can now be CUDAGraphed, but we shall retain the old rotary embeddings.
        # https://github.com/huggingface/transformers/pull/27931
        # https://github.com/huggingface/transformers/blob/v4.37.2/src/transformers/models/llama/modeling_llama.py
        import transformers.models.qwen3.modeling_qwen3

        transformers.models.qwen3.modeling_qwen3.Qwen3RotaryEmbedding = (
            LlamaRotaryEmbedding
        )
        return

    @staticmethod
    def from_pretrained(  # TODO: Change after release
        model_name = "Qwen/Qwen3-7B",
        max_seq_length = 4096,
        dtype = None,
        load_in_4bit = True,
        token = None,
        device_map = "sequential",
        rope_scaling = None,
        fix_tokenizer = True,
        model_patcher = None,
        tokenizer_name = None,
        trust_remote_code = False,
        **kwargs,
    ):
        return FastLlamaModel.from_pretrained(
            model_name = model_name,
            max_seq_length = max_seq_length,
            dtype = dtype,
            load_in_4bit = load_in_4bit,
            token = token,
            device_map = device_map,
            rope_scaling = rope_scaling,
            fix_tokenizer = fix_tokenizer,
            model_patcher = FastQwen3Model,
            tokenizer_name = tokenizer_name,
            trust_remote_code = trust_remote_code,
            **kwargs,
        )


================================================
FILE: unsloth/models/qwen3_moe.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .llama import *
import os
from ._utils import __version__
from .llama import (
    LlamaRotaryEmbedding,
    LlamaLinearScalingRotaryEmbedding,
)
from .qwen3 import (
    Qwen3Attention_fast_forward,
    FastQwen3Model,
)
from transformers.models.qwen3_moe.modeling_qwen3_moe import (
    Qwen3MoeAttention,
    Qwen3MoeSparseMoeBlock,
    Qwen3MoeMLP,
    Qwen3MoeDecoderLayer,
    Qwen3MoeModel,
    Qwen3MoeForCausalLM,
)

# For Pytorch 2.1.1
# TODO: Transformers moved to `attention_interface`. So we might not need these anymore
# try:
#     from transformers.models.qwen3_moe.modeling_qwen3_moe import (
#         Qwen3SdpaAttention,
#         Qwen3FlashAttention2,
#     )
# except:
#     Qwen3SdpaAttention   = Qwen3Attention
#     Qwen3FlashAttention2 = Qwen3Attention
# pass
from unsloth_zoo.utils import Version, _get_dtype


torch_nn_functional_softmax = torch.nn.functional.softmax


def Qwen3MoeSparseMoeBlock_fast_forward(self, X, temp_gate = None, temp_up = None):
    # adapted from https://github.com/huggingface/transformers/pull/36878/files#diff-0855b77fc27ad9449158a1c74953f909b011c00de7125f7c8e68d0ff209c092aR356-R370

    bsz, seq_len, hd = X.shape
    X = X.view(-1, hd)

    router_logits = fast_linear_forward(
        self.gate_proj, X, out = temp_gate
    )  # pretty much the only change from transformers implementation.

    routing_weights = torch_nn_functional_softmax(
        router_logits, dim = -1, dtype = torch.float32
    )
    routing_weights, selected_experts = torch.topk(routing_weights, self.top_k, dim = -1)
    routing_weights /= routing_weights.sum(dim = -1, keepdim = True)
    # we cast back to the input dtype
    routing_weights = routing_weights.to(X.dtype)
    final_X = torch.zeros((bsz * seq_len, hd), dtype = torch.float32, device = X.device)

    # One hot encode the selected experts to create an expert mask
    # this will be used to easily index which expert is going to be sollicitated
    expert_mask = torch.nn.functional.one_hot(
        selected_experts, num_classes = self.num_experts
    ).permute(2, 1, 0)

    # Loop over all available experts in the model and perform the computation on each expert
    for expert_idx in range(self.num_experts):
        expert_layer = self.experts[expert_idx]
        idx, top_x = torch.where(expert_mask[expert_idx])

        # Index the correct hidden states and compute the expert hidden state for
        # the current expert. We need to make sure to multiply the output hidden
        # states by `routing_weights` on the corresponding tokens (top-1 and top-2)
        current_state = X[None, top_x].reshape(-1, hd)
        current_X = (
            expert_layer(current_state) * routing_weights[top_x, idx, None]
        )  # Qwen3MoeMLP.forward = fast_swiglu_inference takes care of making this faster. Analogous to Dense models' MLP

        # However `index_add_` only support torch tensors for indexing so we'll use
        # the `top_x` tensor here.
        final_X.index_add_(0, top_x, current_X.to(X.dtype))
    final_X = final_X.reshape(bsz, seq_len, hd)
    return final_X, router_logits


def Qwen3MoeDecoderLayer_fast_forward(
    self,
    hidden_states: torch.Tensor,
    causal_mask: Optional[BlockDiagonalCausalMask] = None,
    attention_mask: Optional[torch.Tensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    past_key_value: Optional[Tuple[torch.Tensor]] = None,
    output_attentions: Optional[bool] = False,
    output_router_logits: Optional[bool] = False,
    use_cache: Optional[bool] = False,
    padding_mask: Optional[torch.LongTensor] = None,
    position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
    *args,
    **kwargs,
):
    residual = hidden_states

    if use_cache and hasattr(
        self, "_flag_for_generation"
    ):  # past_key_value is not None:
        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference(
            self.input_layernorm, hidden_states
        )
        hidden_states, self_attn_weights, present_key_value = self.self_attn(
            hidden_states = hidden_states,
            causal_mask = causal_mask,
            attention_mask = attention_mask,
            position_ids = position_ids,
            past_key_value = past_key_value,
            output_attentions = output_attentions,
            use_cache = use_cache,
            padding_mask = padding_mask,
            position_embeddings = position_embeddings,
            _flag_for_generation = self._flag_for_generation,
        )
        hidden_states += residual

        # MoE Router MLP
        residual = hidden_states
        hidden_states = fast_rms_layernorm_inference(
            self.post_attention_layernorm, hidden_states
        )
        hidden_states, router_logits = Qwen3MoeSparseMoeBlock_fast_forward(
            self.mlp, hidden_states
        )
        hidden_states += residual
    else:
        residual = hidden_states
        hidden_states = fast_rms_layernorm(self.input_layernorm, hidden_states)
        hidden_states, self_attn_weights, present_key_value = self.self_attn(
            hidden_states = hidden_states,
            causal_mask = causal_mask,
            attention_mask = attention_mask,
            position_ids = position_ids,
            past_key_value = past_key_value,
            output_attentions = output_attentions,
            use_cache = use_cache,
            padding_mask = padding_mask,
            position_embeddings = position_embeddings,
        )
        hidden_states = residual + hidden_states

        # MoE Router MLP
        residual = hidden_states
        hidden_states = fast_rms_layernorm(self.post_attention_layernorm, hidden_states)
        hidden_states, router_logits = self.mlp(hidden_states)
        hidden_states = residual + hidden_states

    outputs = (hidden_states,)
    if output_attentions:
        outputs += (self_attn_weights,)
    if output_router_logits:
        outputs += (router_logits,)
    if use_cache:
        outputs += (present_key_value,)
    return outputs


class FastQwen3MoeModel(FastQwen3Model):
    @staticmethod
    def pre_patch():
        init_name, function = patch_linear_scaling(
            model_name = "Qwen3Moe",
            rope_module = LlamaRotaryEmbedding,
            scaled_rope_module = LlamaLinearScalingRotaryEmbedding,
            attention_module = Qwen3MoeAttention,
        )
        if init_name is not None:
            exec(function, globals())
            Qwen3MoeAttention.__init__ = eval(init_name)
        Qwen3MoeAttention.forward = Qwen3Attention_fast_forward
        # Qwen3SdpaAttention   .forward = Qwen3Attention_fast_forward
        # Qwen3FlashAttention2 .forward = Qwen3Attention_fast_forward
        Qwen3MoeSparseMoeBlock.forward = Qwen3MoeSparseMoeBlock_fast_forward
        Qwen3MoeMLP.forward = (
            fast_swiglu_inference  # This is analogous to Dense models' MLP
        )
        Qwen3MoeDecoderLayer.forward = Qwen3MoeDecoderLayer_fast_forward
        Qwen3MoeModel.forward = LlamaModel_fast_forward
        Qwen3MoeForCausalLM.forward = CausalLM_fast_forward(
            LlamaModel_fast_forward_inference
        )
        PeftModelForCausalLM.forward = PeftModel_fast_forward
        fix_prepare_inputs_for_generation(Qwen3MoeForCausalLM)

        # Solves https://github.com/unslothai/unsloth/issues/168
        # Static KV Cache was introduced in 4.38.0, causing training to be much slower.
        # Inference can now be CUDAGraphed, but we shall retain the old rotary embeddings.
        # https://github.com/huggingface/transformers/pull/27931
        # https://github.com/huggingface/transformers/blob/v4.37.2/src/transformers/models/llama/modeling_llama.py\
        import transformers.models.qwen3_moe.modeling_qwen3_moe

        transformers.models.qwen3_moe.modeling_qwen3_moe.Qwen3MoeRotaryEmbedding = (
            LlamaRotaryEmbedding
        )
        return

    @staticmethod
    def from_pretrained(  # TODO: Change after release
        model_name = "Qwen/Qwen3-7B",
        max_seq_length = 4096,
        dtype = None,
        load_in_4bit = True,
        token = None,
        device_map = "sequential",
        rope_scaling = None,
        fix_tokenizer = True,
        model_patcher = None,
        tokenizer_name = None,
        trust_remote_code = False,
        **kwargs,
    ):
        return FastLlamaModel.from_pretrained(
            model_name = model_name,
            max_seq_length = max_seq_length,
            dtype = dtype,
            load_in_4bit = load_in_4bit,
            token = token,
            device_map = device_map,
            rope_scaling = rope_scaling,
            fix_tokenizer = fix_tokenizer,
            model_patcher = FastQwen3MoeModel,
            tokenizer_name = tokenizer_name,
            trust_remote_code = trust_remote_code,
            **kwargs,
        )


================================================
FILE: unsloth/models/rl.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = [
    "PatchFastRL",
    "vLLMSamplingParams",
]

import torch
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
import inspect
import os
import re
from unsloth_zoo.compiler import create_new_function
from unsloth_zoo.log import logger
from unsloth_zoo.logging_utils import PatchRLStatistics
from unsloth_zoo.rl_replacements import RL_REPLACEMENTS
from ..device_type import DEVICE_TYPE
from .rl_replacements import (
    RL_EXTRA_ARGS,
    RL_FUNCTIONS,
    RL_PRE_ITEMS,
    RL_CONFIG_CHANGES,
    RL_METRICS_CHANGES,
    RL_ADDITIONAL_FUNCTIONS,
)

torch_compile_options = {
    "epilogue_fusion": True,
    "max_autotune": False,  # Disable Triton mm kernels
    "shape_padding": True,
    "trace.enabled": False,
    "triton.cudagraphs": False,
}

# vLLM compatibility shim (TRL expects GuidedDecodingParams even if vLLM doesn't provide it)
try:
    import vllm.sampling_params as _unsloth_vllm_sp

    if not hasattr(_unsloth_vllm_sp, "GuidedDecodingParams"):

        class GuidedDecodingParams:
            def __init__(self, **kwargs):
                self.kwargs = kwargs

        _unsloth_vllm_sp.GuidedDecodingParams = GuidedDecodingParams
except Exception:
    pass

from trl import __version__ as trl_version_raw
from importlib.metadata import version as importlib_version
from unsloth_zoo.utils import Version

try:
    trl_version = Version(trl_version_raw)
except Exception:
    try:
        trl_version = Version(importlib_version("trl"))
    except Exception:
        trl_version = Version("0.0.0")

# Get PyTorch version for feature detection
try:
    torch_version = Version(torch.__version__.split("+")[0].split("a")[0].split("b")[0])
except Exception:
    torch_version = Version("0.0.0")

# Get transformers version for feature detection
try:
    from transformers import __version__ as _transformers_version_raw

    transformers_version = Version(_transformers_version_raw)
except Exception:
    transformers_version = Version("0.0.0")


def vLLMSamplingParams(**kwargs):
    from vllm import SamplingParams

    sampling_params = SamplingParams(**kwargs)
    sampling_params._set_kwargs = kwargs
    return sampling_params


def PatchRL(FastLanguageModel):
    try:
        from trl.models.utils import unwrap_model_for_generation
    except ImportError:
        try:
            from trl.models import unwrap_model_for_generation
        except ImportError:
            # Local fallback -- TRL removed or moved this symbol
            from contextlib import contextmanager as _cm

            @_cm
            def unwrap_model_for_generation(
                model, accelerator, gather_deepspeed3_params = True
            ):
                unwrapped_model = accelerator.unwrap_model(model)
                is_gc = getattr(unwrapped_model, "is_gradient_checkpointing", False)
                if is_gc:
                    unwrapped_model.gradient_checkpointing_disable()
                if (
                    getattr(accelerator, "state", None) is not None
                    and getattr(accelerator.state, "deepspeed_plugin", None) is not None
                    and accelerator.state.deepspeed_plugin.zero_stage == 3
                ):
                    if not gather_deepspeed3_params:
                        yield accelerator.unwrap_model(model)
                    else:
                        import deepspeed

                        with deepspeed.zero.GatheredParameters(model.parameters()):
                            yield accelerator.unwrap_model(model)
                else:
                    yield unwrapped_model
                if is_gc:
                    unwrapped_model.gradient_checkpointing_enable()

    from contextlib import contextmanager

    @contextmanager
    def unsloth_unwrap_model_for_generation(model, *args, **kwargs):
        with unwrap_model_for_generation(model, *args, **kwargs) as unwrapped_model:
            # Put the model in inference mode.
            FastLanguageModel.for_inference(model)

            # We must use .clone for Unsloth since we force inference_mode
            # Rather we should have used no_grad
            original_generate = unwrapped_model.generate

            def generate_with_clone(*args, **kwargs):
                out = original_generate(*args, **kwargs)
                if isinstance(out, torch.Tensor):
                    return out.clone()
                return out

            unwrapped_model.generate = generate_with_clone

            try:
                yield unwrapped_model
            finally:
                # Restore generate and return
                unwrapped_model.generate = original_generate
                FastLanguageModel.for_training(model)

    from transformers import Trainer
    from transformers.trainer_pt_utils import nested_detach

    @torch.no_grad()
    def unsloth_prediction_step(
        self,
        model,
        inputs,
        prediction_loss_only,
        ignore_keys,
    ):
        """
        Perform an evaluation step on `model` using `inputs`.
        Subclass and override to inject custom behavior.
        Args:
            model (`nn.Module`):
                The model to evaluate.
            inputs (`Dict[str, Union[torch.Tensor, Any]]`):
                The inputs and targets of the model.
                The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
                argument `labels`. Check your model's documentation for all accepted arguments.
            prediction_loss_only (`bool`):
                Whether or not to return the loss only.
            ignore_keys (`List[str]`, *optional*):
                A list of keys in the output of your model (if it is a dictionary) that should be ignored when
                gathering predictions.
        Return:
            Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]: A tuple with the loss,
            logits and labels (each being optional).
        """
        has_labels = (
            False
            if len(self.label_names) == 0
            else all(inputs.get(k) is not None for k in self.label_names)
        )
        # For CLIP-like models capable of returning loss values.
        # If `return_loss` is not specified or being `None` in `inputs`, we check if the default value of `return_loss`
        # is `True` in `model.forward`.
        return_loss = inputs.get("return_loss", None)
        if return_loss is None:
            return_loss = self.can_return_loss
        loss_without_labels = (
            True if len(self.label_names) == 0 and return_loss else False
        )

        inputs = self._prepare_inputs(inputs)
        if ignore_keys is None:
            if hasattr(self.model, "config"):
                ignore_keys = getattr(
                    self.model.config, "keys_to_ignore_at_inference", []
                )
            else:
                ignore_keys = []

        # labels may be popped when computing the loss (label smoothing for instance) so we grab them first.
        if has_labels or loss_without_labels:
            labels = nested_detach(tuple(inputs.get(name) for name in self.label_names))
            if len(labels) == 1:
                labels = labels[0]
        else:
            labels = None

        os.environ["UNSLOTH_RETURN_LOGITS"] = "1"
        with torch.no_grad():
            if has_labels or loss_without_labels:
                with self.compute_loss_context_manager():
                    loss, outputs = self.compute_loss(
                        model, inputs, return_outputs = True
                    )
                loss = loss.mean().detach()

                if isinstance(outputs, dict):
                    logits = tuple(
                        v for k, v in outputs.items() if k not in ignore_keys + ["loss"]
                    )
                else:
                    logits = outputs[1:]
            else:
                loss = None
                with self.compute_loss_context_manager():
                    tokenized_output = self.processing_class(
                        inputs["prompt"],
                        padding = True,
                        truncation = True,
                        return_tensors = "pt",
                    ).to(model.device)
                    outputs = model(**tokenized_output)
                if isinstance(outputs, dict):
                    logits = tuple(
                        v for k, v in outputs.items() if k not in ignore_keys
                    )
                else:
                    logits = outputs
                # TODO: this needs to be fixed and made cleaner later.
                if self.args.past_index >= 0:
                    self._past = outputs[self.args.past_index - 1]
        os.environ["UNSLOTH_RETURN_LOGITS"] = "0"
        if prediction_loss_only:
            return (loss, None, None)

        logits = nested_detach(logits)
        if len(logits) == 1:
            logits = logits[0]

        return (loss, logits, labels)

    import trl.trainer

    trainers = dir(trl.trainer)
    trainers = [x for x in trainers if x.endswith("_trainer")]
    unwrap = "unwrap_model_for_generation"
    for trainer in trainers:
        try:
            current_trainer = getattr(trl.trainer, trainer)
        except:
            continue
        if hasattr(current_trainer, unwrap):
            try:
                setattr(current_trainer, unwrap, unsloth_unwrap_model_for_generation)
            except:
                continue
    Trainer.prediction_step = unsloth_prediction_step


grpo_selective_log_softmax = RL_REPLACEMENTS["grpo_selective_log_softmax"]
selective_log_softmax = RL_REPLACEMENTS["selective_log_softmax"]
calculate_pad_tokens_in_prompt = RL_REPLACEMENTS["calculate_pad_tokens_in_prompt"]
create_completion_attention_mask = RL_REPLACEMENTS["create_completion_attention_mask"]
left_pack_padding = RL_REPLACEMENTS["left_pack_padding"]
align_logprobs_with_mask = RL_REPLACEMENTS["align_logprobs_with_mask"]
autotune_batch_and_chunks = RL_REPLACEMENTS["grpo_autotune_batch_and_chunks"]
sanitize_logprob = RL_REPLACEMENTS["sanitize_logprob"]

RLTrainer_replacement = '''
import os
import math
import logging
from typing import *
from dataclasses import dataclass, field
from packaging.version import Version
import torch
import numpy as np
from contextlib import nullcontext
from torch.nn import functional as F
import inspect
from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling
from transformers.training_args import ParallelMode
from unsloth_zoo.device_type import DEVICE_TYPE, device_synchronize

# Wrap trainer with padding to right and enable training mode
# Also patches W&B since multiple runs must use wandb.finish()
import functools
from types import MethodType
try:
    from unsloth_zoo.gradient_checkpointing import reset_unsloth_gradient_checkpointing_buffers
except:
    def reset_unsloth_gradient_checkpointing_buffers(): pass
def prepare_for_training_mode(f):
    @functools.wraps(f)
    def wrapper(self, *args, **kwargs):
        # Enable training mode
        _was_training = None
        # Get gradient checkpointing setting from training arguments
        use_gc = getattr(self.args, 'gradient_checkpointing', True)
        if hasattr(self, 'model') and hasattr(self.model, "training"):
            _was_training = self.model.training
        if hasattr(self, 'model') and hasattr(self.model, "for_training"):
            self.model.for_training(use_gradient_checkpointing=use_gc)
        output = f(self, *args, **kwargs)
        # Restore previous mode when possible
        if hasattr(self, 'model') and hasattr(self.model, "for_inference"):
            if _was_training is False:
                self.model.for_inference()
            elif _was_training is True and hasattr(self.model, "for_training"):
                self.model.for_training(use_gradient_checkpointing=use_gc)
        # Reset gradient checkpointing buffers to free memory while staying ready for next run
        try:
            reset_unsloth_gradient_checkpointing_buffers()
        except:
            pass
        # Patch W&B to enable logging on future runs, otherwise it'll overwrite the first run
        try:
            import wandb
            wandb.finish()
        except:
            pass
        return output
    return wrapper
pass

torch_compile_options = {{
    "epilogue_fusion"   : True,
    "max_autotune"      : False,
    "shape_padding"     : True,
    "trace.enabled"     : False,
    "triton.cudagraphs" : False,
}}

{grpo_selective_log_softmax_code}
{selective_log_softmax_code}
{calculate_pad_tokens_in_prompt_code}
{create_completion_attention_mask_code}
{left_pack_padding_code}
{align_logprobs_with_mask_code}
{autotune_batch_and_chunks_code}
{sanitize_logprob_code}

{RL_pre}

@dataclass
class Unsloth{RLConfig_name}({RLConfig_name}):
    """
    {__RLConfig_doc__}
    """
    vllm_sampling_params: Optional[Any] = field(
        default = None,
        metadata = {{'help': 'vLLM SamplingParams'}},
    )
    unsloth_num_chunks : Optional[int] = field(
        default = -1,
        metadata = {{'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}},
    )
    unsloth_logit_chunk_multiplier : Optional[int] = field(
            default = None,
            metadata = {{'help': 'Multiplier for chunked logit computations.'}},
        )
    unsloth_grpo_mini_batch : Optional[int] = field(
        default = None,
        metadata = {{'help': 'Mini batch size for GRPO hidden state accumulation. Default is None unless user defines it.'}},
    )
    {max_seq_length_pre}
    def __init__({RLConfig_arguments},
        vllm_sampling_params = None,
        unsloth_num_chunks = -1,
        unsloth_logit_chunk_multiplier = None,
        unsloth_grpo_mini_batch = None,
        {max_seq_length_call}
        **kwargs,
    ):
{RLConfig_extra_args}
        super().__init__({RLConfig_call_args}{RLConfig_kwargs})
        self.vllm_sampling_params = vllm_sampling_params
        self.unsloth_num_chunks = unsloth_num_chunks
        if unsloth_grpo_mini_batch is not None:
            if self.generation_batch_size >= unsloth_grpo_mini_batch:
                self.unsloth_grpo_mini_batch = unsloth_grpo_mini_batch
            else:
                raise ValueError(
                    f"Unsloth GRPO mini batch size needs to be less than or equal to the effective generation batch size, "
                    f"which is self.per_device_train_batch_size * gradient_accumulation_steps."
                )
        self.unsloth_logit_chunk_multiplier = unsloth_logit_chunk_multiplier
        {max_seq_length_post}
{RLConfig_post}
pass

{RLTrainer_extras}

class Unsloth{RLTrainer_name}(_Unsloth{RLTrainer_name}):
    """
    {__RLTrainer_doc__}
    """
    def __init__({RLTrainer_arguments},
        **kwargs
    ):
        if args is None: args = Unsloth{RLConfig_name}()
{RLTrainer_extra_args}
        # [TODO] Fix up DataParallel multiplying batch sizes
        # [TODO] DDP works, but DP seems to not work? [TODO]
        if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1:
            if getattr(args, "_n_gpu", 1) != 1:
                args._n_gpu = 1
        if "model" in locals() and hasattr(model, "for_training"):
            model.for_training(use_gradient_checkpointing=getattr(args, 'gradient_checkpointing', True))
        super().__init__({RLTrainer_call_args}{RLTrainer_kwargs})
        if "model" in locals() and hasattr(model, "for_inference"):
            model.for_inference()
{RLTrainer_post}
pass
'''


def _wrap_grpo_generate_and_score(trainer_cls):
    if not hasattr(trainer_cls, "_generate_and_score_completions"):
        return
    original = trainer_cls._generate_and_score_completions
    if getattr(original, "_unsloth_restore_training_wrapped", False):
        return

    def wrapped(self, *args, **kwargs):
        was_training = getattr(getattr(self, "model", None), "training", None)
        try:
            return original(self, *args, **kwargs)
        finally:
            if (
                was_training is False
                and hasattr(self, "model")
                and hasattr(self.model, "for_inference")
            ):
                try:
                    self.model.for_inference()
                except Exception:
                    pass

    wrapped._unsloth_restore_training_wrapped = True
    trainer_cls._generate_and_score_completions = wrapped


def _patch_trl_rl_trainers(trainer_file = "grpo_trainer"):
    # Patch for vLLM and Unsloth PEFT
    import trl
    import trl.trainer

    try:
        trainer = eval(f"trl.trainer.{trainer_file}")
    except Exception as error:
        logger.info(f"Unsloth: Could not import trl.trainer.{trainer_file}: {error}")
        return

    # Get SFTTrainer and SFTConfig names
    name = [
        x
        for x in dir(trainer)
        if x.endswith("Trainer")
        and x != "Trainer"
        and not x.startswith("_")
        and trainer_file.split("_")[0] in x.lower()
    ]
    config = [
        x
        for x in dir(trainer)
        if x.endswith("Config")
        and x != "Config"
        and not x.startswith("_")
        and trainer_file.split("_")[0] in x.lower()
    ]
    if len(name) != 1:
        logger.info(
            f"Unsloth: Could not find Trainer class in trl.trainer.{trainer_file}. Found: {name}"
        )
        return
    if len(config) != 1:
        # TRL 0.26+: Config may be in a separate *_config.py module
        config_module_name = trainer_file.replace("_trainer", "_config")
        try:
            config_mod = eval(f"trl.trainer.{config_module_name}")
            config = [
                x
                for x in dir(config_mod)
                if x.endswith("Config")
                and x != "Config"
                and not x.startswith("_")
                and trainer_file.split("_")[0] in x.lower()
            ]
        except Exception:
            pass
    if len(config) != 1 and len(name) == 1:
        # Thin wrapper fallback: walk the Trainer's MRO to find Config
        # in the real implementation module (e.g., trl.experimental.bco)
        try:
            _temp_cls = eval(f"trl.trainer.{trainer_file}.{name[0]}")
            for _parent in _temp_cls.__mro__[1:]:
                if _parent is object:
                    continue
                _parent_mod = inspect.getmodule(_parent)
                if (
                    _parent_mod is None
                    or _parent_mod.__name__ == f"trl.trainer.{trainer_file}"
                ):
                    continue
                config = [
                    x
                    for x in dir(_parent_mod)
                    if x.endswith("Config")
                    and x != "Config"
                    and not x.startswith("_")
                    and trainer_file.split("_")[0] in x.lower()
                ]
                if len(config) == 1:
                    break
        except Exception:
            pass
    if len(config) != 1:
        logger.info(
            f"Unsloth: Could not find Config class in trl.trainer.{trainer_file}. Found: {config}"
        )
        return

    # Get SFTTrainer, SFTConfig
    RLTrainer_name = name[0]
    RLConfig_name = config[0]
    try:
        RLTrainer = eval(f"trl.trainer.{trainer_file}.{RLTrainer_name}")
    except Exception as e:
        logger.info(
            f"Unsloth: Could not load {RLTrainer_name} from trl.trainer.{trainer_file}: {e}"
        )
        return
    _config_resolved_module = None
    try:
        RLConfig = eval(f"trl.trainer.{trainer_file}.{RLConfig_name}")
    except Exception:
        # TRL 0.26+: Config may be in a separate *_config.py module
        try:
            config_module_name = trainer_file.replace("_trainer", "_config")
            RLConfig = eval(f"trl.trainer.{config_module_name}.{RLConfig_name}")
        except Exception:
            # Thin wrapper fallback: load Config from parent trainer's module
            _config_loaded = False
            try:
                _temp_cls = eval(f"trl.trainer.{trainer_file}.{name[0]}")
                for _parent in _temp_cls.__mro__[1:]:
                    if _parent is object:
                        continue
                    _parent_mod = inspect.getmodule(_parent)
                    if (
                        _parent_mod is None
                        or _parent_mod.__name__ == f"trl.trainer.{trainer_file}"
                    ):
                        continue
                    if hasattr(_parent_mod, RLConfig_name):
                        RLConfig = getattr(_parent_mod, RLConfig_name)
                        _config_resolved_module = _parent_mod
                        _config_loaded = True
                        break
            except Exception:
                pass
            if not _config_loaded:
                logger.info(f"Unsloth: Could not load {RLConfig_name}")
                return

    # Check name
    if RLTrainer.__name__.startswith("Unsloth"):
        print(f"Unsloth: {RLTrainer.__name__} is already patched.")
        return
    if RLConfig.__name__.startswith("Unsloth"):
        print(f"Unsloth: {RLConfig.__name__} is already patched.")
        return

    # TRL 0.26+: Resolve thin wrappers to their experimental parent class.
    # Thin wrappers are deprecation shims in trl.trainer that just forward
    # *args/**kwargs to the real implementation in trl.experimental.
    # Only resolve if a parent class actually lives in a trl.experimental module.
    _trainer_resolved_module = None
    try:
        _trainer_src = inspect.getsource(RLTrainer)
        _trainer_module = inspect.getmodule(RLTrainer)
        _trainer_module_src = (
            inspect.getsource(_trainer_module) if _trainer_module else ""
        )
        if (
            "trl.experimental" in _trainer_src
            or "trl.experimental" in _trainer_module_src
        ):
            for _parent in RLTrainer.__mro__[1:]:
                if _parent is object:
                    continue
                _parent_mod = inspect.getmodule(_parent)
                if _parent_mod is None:
                    continue
                # Only resolve to a parent that lives in trl.experimental
                if "trl.experimental" in _parent_mod.__name__:
                    RLTrainer = _parent
                    _trainer_resolved_module = _parent_mod
                    break
    except Exception:
        pass

    try:
        _config_src = inspect.getsource(RLConfig)
        _config_module = inspect.getmodule(RLConfig)
        _config_module_src = inspect.getsource(_config_module) if _config_module else ""
        if (
            "trl.experimental" in _config_src
            or "trl.experimental" in _config_module_src
        ):
            for _parent in RLConfig.__mro__[1:]:
                if _parent is object:
                    continue
                _parent_mod = inspect.getmodule(_parent)
                if _parent_mod is None:
                    continue
                # Only resolve to a parent that lives in trl.experimental
                if "trl.experimental" in _parent_mod.__name__:
                    RLConfig = _parent
                    break
    except Exception:
        pass

    # Get old source
    old_RLTrainer_source = inspect.getsource(RLTrainer)
    old_RLConfig_source = inspect.getsource(RLConfig)

    if _trainer_resolved_module is not None:
        all_imports = dir(_trainer_resolved_module)
    elif _config_resolved_module is not None:
        all_imports = dir(_config_resolved_module)
    else:
        all_imports = dir(trainer)
    # Fix _deprecate_arguments not getting imported so stop __ but not _
    imports = [x for x in all_imports if not x.startswith("__")]

    # Get default arguments
    EMPTY = inspect.Parameter.empty
    processed = []
    for RLobject in [RLTrainer, RLConfig]:
        parameters = inspect.signature(RLobject.__init__).parameters
        types = (
            bool,
            type(None),
            int,
            float,
            str,
        )
        arguments = ["self"]
        call_args = []
        for k, v in parameters.items():
            if k == "self":
                continue
            v = v.default
            if v == "\n":
                v = re.escape("\n")
            if v is EMPTY:
                arguments.append(k)
            elif type(v) is str:
                arguments.append(f"{k} = '{v}'")
            elif type(v) in types:
                arguments.append(f"{k} = {v}")
            else:
                continue
            call_args.append(f"{k} = {k}")
        arguments = f"\n{' '*8}" + f",\n{' '*8}".join(arguments)
        call_args = f"\n{' '*12}" + f",\n{' '*12}".join(call_args)
        processed.append(
            (
                arguments,
                call_args,
            )
        )

    # Process RLTrainer first
    arguments, call_args = processed[0]
    RLTrainer_post = ""

    # Add tokenizer if not seen
    if "tokenizer" not in parameters and "processing_class" in parameters:
        arguments += f",\n{' '*8}tokenizer = None"
        call_args = call_args.replace(
            "processing_class = processing_class",
            "processing_class = tokenizer if tokenizer is not None else processing_class",
        )

    # Edit bf16, fp16 by checking model's dtype/torch_dtype directly
    extra_args = ""
    if "args" in call_args and "model" in call_args:
        mixed_precision = (
            "use_bf16 = getattr(args, 'bf16', False)\n"
            "if type(use_bf16) is not bool: use_bf16 = False\n"
            "use_fp16 = getattr(args, 'fp16', False)\n"
            "if type(use_fp16) is not bool: use_fp16 = False\n"
            "force_float32 = False\n"
            "full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1'\n"
            "if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'):\n"
            "    print('Unsloth: Switching to float32 training since model cannot work with float16')\n"
            "    force_float32 = True\n"
            "mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')\n"
            "dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)\n"
            "if dtype is None: dtype = model.get_input_embeddings().weight.dtype\n"
            "from unsloth_zoo.utils import _get_dtype\n"
            "dtype = _get_dtype(dtype)\n"
            "float16 = dtype == torch.float16\n"
            "if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`')\n"
            "if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`')\n"
            "if force_float32:\n"
            "    # Forced float32 training\n"
            "    args.fp16 = False\n"
            "    args.bf16 = False\n"
            "    os.environ['ACCELERATE_MIXED_PRECISION'] = 'no'\n"
            "    if hasattr(args, 'mixed_precision'): args.mixed_precision = 'no'\n"
            "    # args.mixed_precision is a new argument which needs to be set now\n"
            "elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32':\n"
            "    # Mixed precision training\n"
            "    args.fp16 = float16\n"
            "    args.bf16 = not float16\n"
            "    os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16'\n"
            "    if hasattr(args, 'mixed_precision'): args.mixed_precision = 'fp16' if float16 else 'bf16'\n"
            "    # args.mixed_precision is a new argument which needs to be set now\n"
            "elif mixed_precision_dtype == 'bfloat16':\n"
            "    # Both False since bfloat16 full finetuning doesn't do any autocasting.\n"
            "    args.fp16 = False\n"
            "    args.bf16 = False\n"
            "    os.environ['ACCELERATE_MIXED_PRECISION'] = 'no'\n"
            "    if hasattr(args, 'mixed_precision'): args.mixed_precision = 'no'\n"
            "    # args.mixed_precision is a new argument which needs to be set now\n"
            "\n"
        )
        extra_args += mixed_precision

    # Check if per_device_eval_batch_size (default 8) bigger than bsz
    # Also use FP16 / BF16 evaluation
    if "args" in call_args:
        # Check eval_dataset first
        if "eval_dataset" in call_args:
            check_eval_dataset = (
                "if getattr(args, 'eval_dataset', None) is not None and "
                "getattr(args, 'eval_strategy', 'no') == 'no':\n"
                "    args.eval_strategy = 'steps'\n"
                "    if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1\n"
            )
            extra_args += check_eval_dataset

        # Check if gradient accumulation bug fix is applied
        check_ga = (
            "ga_steps = getattr(args, 'gradient_accumulation_steps', None)\n"
            "if ga_steps is not None and ga_steps > 1:\n"
            "    from transformers import __version__ as transformers_version\n"
            "    if Version(transformers_version) <= Version('4.45.2'):\n"
            "        print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\\n'\n"
            "              '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`')\n"
        )
        extra_args += check_ga

        eval_changes = (
            "if getattr(args, 'eval_strategy', 'no') != 'no':\n"
            "    eval_bsz = getattr(args, 'per_device_eval_batch_size', 8)\n"
            "    if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size\n"
            "    if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps\n"
            "fp16_full_eval = getattr(args, 'fp16_full_eval', False)\n"
            "if type(fp16_full_eval) is not bool: fp16_full_eval = False\n"
            "bf16_full_eval = getattr(args, 'bf16_full_eval', False)\n"
            "if type(bf16_full_eval) is not bool: bf16_full_eval = False\n"
            "if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True\n"
            "if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False\n"
            "if force_float32:\n"
            "    args.bf16_full_eval = False\n"
            "    args.fp16_full_eval = False\n"
            "elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16':\n"
            "    args.bf16_full_eval = True\n"
            "    args.fp16_full_eval = False\n"
            "elif not bf16_full_eval and not fp16_full_eval:\n"
            "    args.bf16_full_eval = args.bf16\n"
            "    args.fp16_full_eval = args.fp16\n"
        )
        extra_args += eval_changes

    # Force logits to be produced if preprocess_logits_for_metrics or compute_metrics is used
    if "model" in call_args:
        logits_check = (
            "_output_logits = False\n"
            "if locals().get('compute_metrics', None) is not None: _output_logits = True\n"
            "if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True\n"
            "if _output_logits:\n"
            "    os.environ['UNSLOTH_RETURN_LOGITS'] = '1'\n"
        )
        extra_args += logits_check
        warnings_issued_check = (
            "if model is not None:\n"
            "    _warnings_issued = getattr(model, 'warnings_issued', None)\n"
            "    if _warnings_issued is None:\n"
            "        model.warnings_issued = {}\n"
            "    elif not isinstance(_warnings_issued, dict):\n"
            "        try:\n"
            "            model.warnings_issued = dict(_warnings_issued)\n"
            "        except Exception:\n"
            "            model.warnings_issued = {}\n"
        )
        extra_args += warnings_issued_check

    # Check max_seq_length
    if "model" in call_args:
        length_check = (
            "if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'):\n"
            "    pass\n"
            "else:\n"
            "    model_max_seq_length = getattr(model, 'max_seq_length', None)\n"
            "    args_max_seq_length  = getattr(args,  'max_seq_length', None)\n"
            "    if args_max_seq_length is None and model_max_seq_length is not None:\n"
            "        max_seq_length = model.max_seq_length\n"
            "        if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length\n"
            "    elif args_max_seq_length is not None and model_max_seq_length is not None:\n"
            "        if args_max_seq_length > model_max_seq_length:\n"
            "            print('Unsloth: You set `max_seq_length` as ' + str(args_max_seq_length) + ' but '\n"
            "                   'the maximum the model supports is ' + str(model_max_seq_length) + '. We shall reduce it.')\n"
            "            args.max_seq_length = model_max_seq_length\n"
        )
        extra_args += length_check

        # At this point max_seq_length might be set, but trl is moving to max_length
        if trainer_file == "sft_trainer":
            max_length_check = (
                "if 'max_length' not in locals() and not hasattr(args, 'max_length'):\n"
                "    pass\n"
                "else:\n"
                "    if hasattr(args, 'max_seq_length') and args.max_seq_length is not None and args.max_seq_length > 0:\n"
                "        if hasattr(args, 'max_length'):\n"
                "            args.max_length = args.max_seq_length\n"
                "            max_length = args.max_length\n"
                "    else:\n"
                "        model_max_length = getattr(model, 'max_seq_length', None)\n"
                "        if model_max_length is None: model_max_length = getattr(model, 'max_length', None)\n"
                "        if model_max_length is not None:\n"
                "            args.max_length = model_max_length\n"
                "            max_length = args.max_length\n"
                "        elif hasattr(args, 'max_length') and args.max_length is not None:\n"
                "            max_length = args.max_length\n"
                "            # if we are here, then we are in a weird case where max_length is set but max_seq_length is not set\n"
                "            setattr(model, 'max_seq_length', max_length)\n"
                "        else:\n"
                "            print('Unsloth: We did not find `max_seq_length` or `max_length` in the model or args. We will set it to 1024.')\n"
                "            args.max_length = 1024\n"
            )
            extra_args += max_length_check

    # Enable for training and move padding side of tokenizer to right
    if "model" in call_args:
        training_check = (
            "if model is not None and hasattr(model, 'for_training'):\n"
            "    model.for_training(use_gradient_checkpointing=getattr(args, 'gradient_checkpointing', True))\n"
            "if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right'\n"
            "if 'processing_class' in locals():\n"
            "    if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right'\n"
            "    if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): "
            "processing_class.tokenizer.padding_side = 'right'\n"
        )
        extra_args += training_check

    # Check data collator if it's correct!
    if "data_collator" in call_args and "train_dataset" in call_args:
        data_collator_check = (
            "__tokenizer = processing_class if 'processing_class' in locals() else tokenizer\n"
            "from unsloth_zoo.vision_utils import UnslothVisionDataCollator\n"
            "if not isinstance(data_collator, UnslothVisionDataCollator):\n"
            "    if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:\n"
            "        data_collator = TransformersDataCollatorForLanguageModeling(\n"
            "            __tokenizer,\n"
            "            mlm = False,\n"
            "            mlm_probability = 0.0,\n"
            "            pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),\n"
            "        )\n"
            "    elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:\n"
            "        data_collator = DataCollatorForSeq2Seq(\n"
            "            __tokenizer,\n"
            "            pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),\n"
            "        )\n"
            "else:\n"
            "    if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False\n"
            "    if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''\n"
            "    if hasattr(args, 'dataset_kwargs'): args.dataset_kwargs = {'skip_prepare_dataset': True}\n"
        )
        extra_args += data_collator_check

        # Also check if .pad exists -> if not, and is VLM, then change it!
        pad_check = (
            "if not isinstance(data_collator, UnslothVisionDataCollator):\n"
            "    if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):\n"
            "        if isinstance(data_collator, DataCollatorForSeq2Seq):\n"
            "            data_collator = DataCollatorForSeq2Seq(\n"
            "                __tokenizer.tokenizer,\n"
            "                pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),\n"
            "            )\n"
            "        else:\n"
            "            data_collator = TransformersDataCollatorForLanguageModeling(\n"
            "                __tokenizer.tokenizer,\n"
            "                mlm = False,\n"
            "                mlm_probability = 0.0,\n"
            "                pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),\n"
            "            )\n"
        )
        extra_args += pad_check

    # Check NEFTune
    if "model" in call_args:
        neftune_check = (
            "if hasattr(self, 'neftune_hook_handle'):\n"
            "    self.neftune_hook_handle.remove()\n"
            "    if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle\n"
            "if getattr(args, 'neftune_noise_alpha', None) is not None:\n"
            "    model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha\n"
            "pass\n"
        )
        RLTrainer_post += neftune_check

    # Add accelerator scaler to model
    if "model" in call_args:
        accelerator_check = (
            "if hasattr(self, 'accelerator'):\n"
            "    scaler = self.accelerator.scaler\n"
            "    current_model = model\n"
            "    while hasattr(current_model, 'model'):\n"
            "        current_model.accelerator_scaler = scaler\n"
            "        current_model = current_model.model\n"
            "    current_model.accelerator_scaler = scaler\n"
            "pass\n"
        )
        RLTrainer_post += accelerator_check

    # Add enabling and disabling training modes
    if "model" in call_args:
        training_check = (
            "if hasattr(self, 'train'):\n"
            "    self.train = MethodType(prepare_for_training_mode(self.__class__.train), self)\n"
            "pass\n"
        )
        RLTrainer_post += training_check

    # Sync chat_template from processing_class to vLLM's tokenizer
    # This fixes base models that have custom chat templates applied after loading
    if "model" in call_args:
        vllm_chat_template_sync = (
            "if hasattr(self, 'llm') and self.llm is not None and hasattr(self.llm, 'get_tokenizer'):\n"
            "    _vllm_tok = self.llm.get_tokenizer()\n"
            "    _pc = getattr(self, 'processing_class', None) or getattr(self, 'tokenizer', None)\n"
            "    if _vllm_tok is not None and _pc is not None and getattr(_pc, 'chat_template', None) is not None and getattr(_vllm_tok, 'chat_template', None) is None:\n"
            "        _vllm_tok.chat_template = _pc.chat_template\n"
            "pass\n"
        )
        RLTrainer_post += vllm_chat_template_sync

    # Edit optional metrics
    other_metrics_processor = ""
    if trainer_file in RL_METRICS_CHANGES:
        process_extra_args = RL_METRICS_CHANGES[trainer_file]
        for process_extra_arg in process_extra_args:
            other_metrics_processor += process_extra_arg(
                old_RLTrainer_source, old_RLConfig_source
            )

    # Add statistics as well!
    extra_args += (
        "other_metrics = []\n"
        f"{other_metrics_processor}\n"
        "from unsloth_zoo.logging_utils import PatchRLStatistics\n"
        f"PatchRLStatistics('{trainer_file}', other_metrics)\n"
    )

    # Patch optional args
    if trainer_file in RL_EXTRA_ARGS:
        process_extra_args = RL_EXTRA_ARGS[trainer_file]
        for process_extra_arg in process_extra_args:
            extra_args += process_extra_arg(call_args, extra_args)

    # Create RLTrainer args
    extra_args = extra_args.split("\n")
    extra_args = "\n".join(" " * 8 + x for x in extra_args)
    RLTrainer_post = RLTrainer_post.split("\n")
    RLTrainer_post = "\n".join(" " * 8 + x for x in RLTrainer_post)
    RLTrainer_arguments = arguments
    RLTrainer_extra_args = extra_args
    RLTrainer_call_args = call_args

    # Fix RLConfig next
    arguments, call_args = processed[1]
    extra_args = ""

    # Edit GA / bsz and weight_decay
    replacements = {
        "output_dir": None,
        "logging_nan_inf_filter": False,
        "per_device_train_batch_size": 4,
        "gradient_accumulation_steps": 2,
        "weight_decay": 0.01,
        "seed": 3407,
        "optim": "adamw_8bit",
        "learning_rate": 5e-05,
        "per_device_eval_batch_size": 4,
        "eval_accumulation_steps": 2,
        "torch_empty_cache_steps": 250,
        "logging_steps": 1,
        "max_seq_length": None,
        "num_generations": 8,
        # "steps_per_generation"          : 1, # Otherwise defaults to ga_steps which is wrong
        # "generation_batch_size"         : None, # Useless. If steps_per_generation set, generation_batch_size clashes
        "top_k": None,
        "vllm_mode": "colocate",
        "generation_kwargs": {},
        "bf16": False,
        "fp16": False,
        "report_to": "none",
        "include_tokens_per_second": False,
        "include_num_input_tokens_seen": False,
        "auto_find_batch_size": False,  # Auto /2 batch size - too many people complained so removing
        "dataloader_pin_memory": True,
        "padding_free": None,  # None = user didn't set it, allows auto-enable detection
        # Might fail so disable for now
        # "dataloader_persistent_workers" : True, # Keeps dataloader in RAM
        # "dataloader_prefetch_factor"    : 2,
        # "dataloader_num_workers"        : 2, # Default is 0 means 1
    }
    # warmup_ratio deprecated in transformers >= 5.0; warmup_steps accepts float
    if transformers_version >= Version("5.0.0"):
        replacements["warmup_steps"] = 0.1
    else:
        replacements["warmup_ratio"] = 0.1

    for k, v in replacements.items():
        x = f"{k}( = [^,\n]{{1,}})?,\n"
        y = f"'{v}'" if type(v) is str else f"{v}"
        y = f"{k} = {y},\n"
        arguments = re.sub(x, y, arguments)

    # Fix GRPO beta default as 0.001 TRL used to be 0.04, now 0.00!
    # https://github.com/huggingface/trl/pull/3516
    # https://verl.readthedocs.io/en/latest/examples/config.html
    if trainer_file == "grpo_trainer":
        replacements = {
            "loss_type": "bnpo",  # Default GRPO paper
            "beta": 0.001,  # Recommended as seen in verl
            "auto_find_batch_size": False,  # Cannot work on GRPO
            # [TODO] See https://fengyao.notion.site/off-policy-rl
            # https://github.com/huggingface/trl/pull/3867 (August 7th)
            "vllm_importance_sampling_correction": False,
        }
        for k, v in replacements.items():
            x = f"{k}( = [^,\n]{{1,}})?,\n"
            y = f"'{v}'" if type(v) is str else f"{v}"
            y = f"{k} = {y},\n"
            arguments = re.sub(x, y, arguments)

    # Warn on too large or too small learning rate
    if "learning_rate" in call_args:
        learning_rate_check = (
            "if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! "
            "Consider increasing it, otherwise gradient updates will be close to 0!')\n"
            "if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! "
            "Consider decreasing it to 1e-1, otherwise gradient updates will explode!')\n"
        )
        extra_args += learning_rate_check

    # Fix num_train_epochs = None causing TypeError in Trainer.__init__
    # Trainer does `args.num_train_epochs > 0` which fails when None
    if "num_train_epochs" in call_args:
        num_train_epochs_check = (
            "if num_train_epochs is None:\n"
            "    num_train_epochs = 3.0  # Default to 3 epochs if None, max_steps will override\n"
        )
        extra_args += num_train_epochs_check

    # Check if max_seq_length is NOT defined (max_length is now default)
    if "max_seq_length" not in call_args and "max_length" in call_args:
        max_seq_length_pre = """max_seq_length : Optional[int] = field(
        default = None,
        metadata = {'help': 'Maximum sequence length to truncate to.'},
    )"""
        max_seq_length_call = "max_seq_length = None,"
        max_seq_length_post = "self.max_seq_length = max_seq_length"
    else:
        max_seq_length_pre = ""
        max_seq_length_call = ""
        max_seq_length_post = ""

    # Add output_dir saving
    if "output_dir" in call_args:
        # Default checks
        saving_check = (
            "if output_dir is None and save_strategy == 'steps' and save_steps == 500:\n"
            "    output_dir = 'unsloth_training_checkpoints'\n"
            "    save_strategy = 'no'\n"
        )
        extra_args += saving_check

    # Edit dataset_num_proc
    if "dataset_num_proc" in call_args:
        num_proc_check = (
            "import multiprocessing as _mp\n"
            "if dataset_num_proc is None:\n"
            "    if _mp.get_start_method() != 'fork':\n"
            "        dataset_num_proc = None\n"
            "    else:\n"
            "        import psutil\n"
            "        dataset_num_proc = min(max((psutil.cpu_count() or 1)+4, 2), 64)\n"
            "        memory_gb_left = psutil.virtual_memory().available / (1024**3)\n"
            "        if memory_gb_left <= 2: dataset_num_proc = 1\n"
            "        else: dataset_num_proc = min(dataset_num_proc, int(memory_gb_left))\n"
        )
        extra_args += num_proc_check

    # Add padding if flex attention is added
    if "pad_to_multiple_of" in call_args:
        pad_to_multiple_of = (
            "if os.environ.get('UNSLOTH_ENABLE_FLEX_ATTENTION', '0') == '1':\n"
            "    from unsloth_zoo.flex_attention import HAS_FLEX_ATTENTION\n"
            "    if HAS_FLEX_ATTENTION and pad_to_multiple_of is None:\n"
            "        from unsloth_zoo.flex_attention import FLEX_ATTENTION_BLOCK_SIZE\n"
            "        pad_to_multiple_of = FLEX_ATTENTION_BLOCK_SIZE\n"
            "\n"
        )
        extra_args += pad_to_multiple_of

    # Check for loss_type = dr_grpo and scale_rewards for GRPO
    if "loss_type" in call_args and "scale_rewards" in call_args:
        # See https://github.com/huggingface/trl/issues/3130#issuecomment-2746947835
        # DAPO uses per token loss so BNPO loss used
        check_dr_grpo = (
            "if loss_type.lower() == 'dr_grpo':\n"
            "    loss_type = 'dr_grpo'\n"
            "elif loss_type.lower() == 'dapo':\n"
            "    loss_type = 'dapo'\n"
            "if loss_type.lower() == 'dr_grpo':\n"
            "    if scale_rewards == None:\n"
            "        scale_rewards = True\n"
            "    elif scale_rewards == True:\n"
            "        print('Unsloth: The Dr GRPO paper recommends setting `scale_rewards` to False! Will override. Set it to `None` to force False.')\n"
            "        scale_rewards = False\n"
            "elif loss_type.lower() == 'dapo':\n"
            "    if mask_truncated_completions != True:\n"
            "        print('Unsloth: The DAPO paper recommends `mask_truncated_completions = True` - we will set it.')\n"
            "    if epsilon_high != 0.28:\n"
            "        print('Unsloth: The DAPO paper recommends `epsilon_high = 0.28` - we will set it.')\n"
            "    if beta != 0.0:\n"
            "        print(f'[WARNING] Unsloth: The DAPO paper recommends setting `beta = 0.0` to remove the KL term - You have set it to {beta}.')\n"
            "    mask_truncated_completions = True\n"
            "    epsilon_high = 0.28\n"
            "\n"
        )
        extra_args += check_dr_grpo

    # Check GRPO num_generations mismatch
    if (
        "per_device_train_batch_size" in call_args
        and "num_generations" in call_args
        and "steps_per_generation" in call_args
        and "generation_batch_size" in call_args
    ):
        # if world size is not set by accelerate or torchrun at this point it will be 1
        check_num_generations = (
            "if steps_per_generation is None and generation_batch_size is None:\n"
            "    ga = gradient_accumulation_steps\n"
            "    world_size = int(os.environ.get('WORLD_SIZE', '1'))\n"
            "    if (ga * world_size * per_device_train_batch_size) % num_generations != 0:\n"
            "        print('Unsloth: We now expect `per_device_train_batch_size` * `gradient_accumulation_steps` * `world_size` to be a multiple of `num_generations`.\\n"
            "We will change the batch size of ' + str(per_device_train_batch_size) + ' to the `num_generations` of ' + str(num_generations))\n"
            "        per_device_train_batch_size = num_generations\n"
            "\n"
        )
        extra_args += check_num_generations
    elif "per_device_train_batch_size" in call_args and "num_generations" in call_args:
        if "steps_per_generation" not in call_args:
            print(f"Unsloth: Could not find `steps_per_generation` in {trainer_file}")
        if "generation_batch_size" not in call_args:
            print(f"Unsloth: Could not find `generation_batch_size` in {trainer_file}")

        check_num_generations = (
            "if (per_device_train_batch_size // num_generations) * num_generations != per_device_train_batch_size:\n"
            "    print('Unsloth: We now expect `per_device_train_batch_size` to be a multiple of `num_generations`.\\n"
            "We will change the batch size of ' + str(per_device_train_batch_size) + ' to the `num_generations` of ' + str(num_generations))\n"
            "    per_device_train_batch_size = num_generations\n"
            "\n"
        )
        extra_args += check_num_generations

    # Check temperature must not be <= 0. Also stop if >= 10
    if "temperature" in call_args:
        check_temperature = (
            "if temperature <= 0:\n"
            "    raise ValueError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.')\n"
            "elif temperature >= 10:\n"
            "    raise ValueError('Unsloth: Please set a positive non-zero temperature less than 10, since sampling will be quite erratic.')\n"
            "\n"
        )
        extra_args += check_temperature

    # Edit config with anything extra
    if trainer_file in RL_CONFIG_CHANGES:
        process_extra_args = RL_CONFIG_CHANGES[trainer_file]
        for process_extra_arg in process_extra_args:
            extra_args += process_extra_arg(old_RLTrainer_source, old_RLConfig_source)

    # Create RLConfig args
    extra_args = extra_args.split("\n")
    extra_args = "\n".join(" " * 8 + x for x in extra_args)
    RLConfig_arguments = arguments
    RLConfig_extra_args = extra_args
    RLConfig_call_args = call_args

    # TRL 0.27.0+ forces use_reentrant=False in gradient_checkpointing_kwargs.
    # Unsloth gradient checkpointing requires use_reentrant=True, so we remove
    # the setting after super().__init__() when it gets auto-applied.
    RLConfig_post = ""
    if trl_version >= Version("0.27.0"):
        RLConfig_post = (
            "        # Unsloth: Remove use_reentrant=False forced by TRL 0.27.0+\n"
            "        if getattr(self, 'gradient_checkpointing_kwargs', None) is not None:\n"
            "            if 'use_reentrant' in self.gradient_checkpointing_kwargs:\n"
            "                del self.gradient_checkpointing_kwargs['use_reentrant']\n"
        )

    # Patch vLLM and other functions
    RLTrainer_extras = patch_functions(
        RLTrainer, trainer_file, RLTrainer_name, all_imports, imports
    )
    if RLTrainer_extras is None:
        RLTrainer_extras = f"_Unsloth{RLTrainer_name} = {RLTrainer_name}"

    # Create full module
    exec(f"from trl.trainer import ({RLTrainer_name}, {RLConfig_name},)")
    __RLTrainer_doc__ = eval(f"trl.trainer.{RLTrainer_name}").__doc__
    if __RLTrainer_doc__ is None:
        __RLTrainer_doc__ = ""
    __RLConfig_doc__ = eval(f"trl.trainer.{RLConfig_name}").__doc__
    if __RLConfig_doc__ is None:
        __RLConfig_doc__ = ""

    # Get all pre-modules
    if trainer_file in RL_PRE_ITEMS:
        RL_pre = "\n".join(RL_PRE_ITEMS[trainer_file])
    else:
        RL_pre = ""

    # Check if SamplingParams is in there
    if "SamplingParams" in old_RLTrainer_source:
        RL_pre = RL_pre + "\n" + inspect.getsource(vLLMSamplingParams)

    # Selective log softmax and other functions
    selective_log_softmax_code = inspect.getsource(selective_log_softmax)
    grpo_selective_log_softmax_code = inspect.getsource(grpo_selective_log_softmax)
    calculate_pad_tokens_in_prompt_code = inspect.getsource(
        calculate_pad_tokens_in_prompt
    )
    create_completion_attention_mask_code = inspect.getsource(
        create_completion_attention_mask
    )
    left_pack_padding_code = inspect.getsource(left_pack_padding)
    align_logprobs_with_mask_code = inspect.getsource(align_logprobs_with_mask)
    autotune_batch_and_chunks_code = inspect.getsource(autotune_batch_and_chunks)
    sanitize_logprob_code = inspect.getsource(sanitize_logprob)
    # Get final source code
    RLTrainer_source = RLTrainer_replacement.format(
        RLTrainer_name = RLTrainer_name,
        __RLTrainer_doc__ = __RLTrainer_doc__,
        RLTrainer_arguments = RLTrainer_arguments,
        RLTrainer_extra_args = RLTrainer_extra_args,
        RLTrainer_call_args = RLTrainer_call_args,
        RLTrainer_kwargs = ",**kwargs"[1 if RLTrainer_call_args.endswith(",") else 0 :],
        RLConfig_name = RLConfig_name,
        __RLConfig_doc__ = __RLConfig_doc__,
        RLConfig_arguments = RLConfig_arguments,
        RLConfig_extra_args = RLConfig_extra_args,
        RLConfig_call_args = RLConfig_call_args,
        RLConfig_kwargs = ",**kwargs"[1 if RLConfig_call_args.endswith(",") else 0 :],
        RLConfig_post = RLConfig_post,
        RLTrainer_extras = RLTrainer_extras,
        RLTrainer_post = RLTrainer_post,
        RL_pre = RL_pre,
        max_seq_length_pre = max_seq_length_pre,
        max_seq_length_call = max_seq_length_call,
        max_seq_length_post = max_seq_length_post,
        selective_log_softmax_code = selective_log_softmax_code,
        grpo_selective_log_softmax_code = grpo_selective_log_softmax_code,
        calculate_pad_tokens_in_prompt_code = calculate_pad_tokens_in_prompt_code,
        create_completion_attention_mask_code = create_completion_attention_mask_code,
        autotune_batch_and_chunks_code = autotune_batch_and_chunks_code,
        left_pack_padding_code = left_pack_padding_code,
        align_logprobs_with_mask_code = align_logprobs_with_mask_code,
        sanitize_logprob_code = sanitize_logprob_code,
    )

    if RLTrainer_name == "GRPOTrainer":
        # Base torch_compile_options shared by all device types
        base_options = """torch_compile_options = {
            "epilogue_fusion"   : True,
            "max_autotune"      : False,
            "shape_padding"     : True,
            "trace.enabled"     : False,"""

        # Generate torch_compile_options based on device type
        if DEVICE_TYPE == "cuda":
            # CUDA-specific options (added to base options)
            cuda_options = """
            "triton.enable_persistent_tma_matmul": torch.cuda.get_device_capability()[0] >= 9,"""
            # cutlass options were added in PyTorch 2.8.0
            if torch_version >= Version("2.8.0"):
                cuda_options += """
            "cuda.cutlass_epilogue_fusion_enabled": torch.cuda.get_device_capability()[0] >= 9,
            "cuda.cutlass_tma_only": torch.cuda.get_device_capability()[0] >= 9,"""
            cuda_options += """
            "cuda.compile_opt_level"              : "-O2",
            "cuda.enable_cuda_lto"                : True,
        }"""
            new_options = base_options + cuda_options
        else:
            # XPU, HIP, and other device types use base options only
            new_options = (
                base_options
                + """
        }"""
            )

        pattern = r"torch_compile_options\s*=\s*\{[^}]*\}"

        RLTrainer_source = re.sub(
            pattern, new_options, RLTrainer_source, flags = re.DOTALL
        )

        if trl_version >= Version("0.27.0"):
            peft_pattern = (
                r"\s*if is_peft_available\(\) and is_peft_model\(model\) and args\.beta != 0\.0:"
                r".*?"
                r"param\.data = param\.data\.to\(torch\.bfloat16\)"
            )

            replacement_comment = "\n        # PEFT initialization logic removed via script for trl >= 0.27.0\n"

            RLTrainer_source = re.sub(
                peft_pattern, replacement_comment, RLTrainer_source, flags = re.DOTALL
            )

        elif trl_version >= Version("0.26.0"):
            peft_block_pattern = (
                r"\s*if is_peft_available\(\) and isinstance\(model, PeftModel\) and peft_config is not None:"
                r".*?"
                r"param\.data = param\.data\.to\(torch\.bfloat16\)"
            )

            RLTrainer_source = re.sub(
                peft_block_pattern,
                "\n        # TRL PEFT 0.26.0 initialization logic removed on unsloth side.\n",
                RLTrainer_source,
                flags = re.DOTALL,
            )

    # Remove TRL's unconditional bfloat16 cast of trainable params (added in
    # TRL 0.26.0). TRL hardcodes bfloat16 for QLoRA per the original paper's
    # recommendation, but this is wrong: it ignores the user's requested dtype
    # and breaks GradScaler when training with fp16=True. Unsloth already
    # handles adapter dtype correctly via patch_model_and_tokenizer, so the
    # entire block is unnecessary. For GRPOTrainer the enclosing peft init
    # block is already removed above, making this a no-op for GRPO.
    RLTrainer_source = RLTrainer_source.replace(
        'if getattr(model, "is_loaded_in_4bit", False) or getattr(model, "is_loaded_in_8bit", False):',
        "if False:",
    )

    if RLTrainer_name == "SFTTrainer":
        original_text = 'self._signature_columns = ["input_ids", "attention_mask", "completion_mask"]'
        new_text = 'self._signature_columns = ["input_ids", "attention_mask", "completion_mask","labels"]'
        RLTrainer_source = RLTrainer_source.replace(original_text, new_text)

        # Do NOT override _is_vlm -- let TRL detect VLM models naturally.
        # In TRL 0.27.1+, forcing _is_vlm=False causes a ValueError when
        # vision datasets are used with VLM models.
        #
        # However, some notebooks pass a bare tokenizer (processor.tokenizer) as
        # processing_class. TRL then sets _is_vlm=False even for VLM models.
        # Add a model-architecture-based override before the validation check.
        _vlm_check_original = (
            '        self._is_vision_dataset = "image" in dataset_sample or "images" in dataset_sample\n'
            "        if self._is_vision_dataset and not self._is_vlm:"
        )
        _vlm_check_patched = (
            '        self._is_vision_dataset = "image" in dataset_sample or "images" in dataset_sample\n'
            "        # Unsloth: override _is_vlm for VLM models that pass a bare tokenizer\n"
            "        if not self._is_vlm and self._is_vision_dataset:\n"
            "            _m = model\n"
            '            if hasattr(_m, "model"): _m = _m.model\n'
            '            if hasattr(getattr(_m, "config", None), "vision_config") or \\\n'
            '               _m.__class__.__name__.endswith("ForConditionalGeneration"):\n'
            "                self._is_vlm = True\n"
            "        if self._is_vision_dataset and not self._is_vlm:"
        )
        if _vlm_check_original in RLTrainer_source:
            RLTrainer_source = RLTrainer_source.replace(
                _vlm_check_original, _vlm_check_patched
            )

        # Fix TRL 0.22.x: VLM models with text-only datasets.
        # TRL 0.22.x checks _is_vlm (model type) not _is_vision_dataset (dataset
        # content, added in 0.25.1+). When _is_vlm=True, signature columns are
        # vision-only ["messages","prompt","completion","images"], which have zero
        # overlap with tokenized text columns. Fix: merge both column sets into the
        # VLM branch. Extra columns not in the dataset are harmlessly ignored by
        # _remove_unused_columns (it only raises when zero columns match).
        _sig_vlm_old = (
            'self._signature_columns = ["messages", "prompt", "completion", "images"]'
        )
        _sig_vlm_new = (
            'self._signature_columns = ["messages", "prompt", "completion", "images",'
            ' "input_ids", "labels", "attention_mask", "seq_lengths", "completion_mask", "assistant_masks"]'
        )
        RLTrainer_source = RLTrainer_source.replace(_sig_vlm_old, _sig_vlm_new)

        # Inject model reference before _prepare_dataset for dynamic
        # token_type_ids detection in sft_prepare_dataset
        _prep_pattern = r"([ \t]*)train_dataset = self\._prepare_dataset\("
        _prep_replacement = r"\1self._unsloth_model_ref = model\n\1train_dataset = self._prepare_dataset("
        RLTrainer_source = re.sub(
            _prep_pattern, _prep_replacement, RLTrainer_source, count = 1
        )

    # Silence TRL's noisy batch_size=1 + padding-free warning (handles both
    # the original "anihilate" typo and the corrected "annihilate" spelling)
    for _typo in ("anihilate", "annihilate"):
        _idx = RLTrainer_source.find(_typo)
        if _idx == -1:
            continue
        # Walk backwards to find "if args.per_device_train_batch_size"
        _block_start = RLTrainer_source.rfind(
            "if args.per_device_train_batch_size == 1", 0, _idx
        )
        if _block_start == -1:
            continue
        # Walk backwards to the newline before the if
        _line_start = RLTrainer_source.rfind("\n", 0, _block_start)
        # Walk forwards past the closing paren to the end of the block
        _close = RLTrainer_source.find(")", _idx)
        if _close == -1:
            continue
        _block_end = RLTrainer_source.find("\n", _close)
        if _block_end == -1:
            continue
        RLTrainer_source = (
            RLTrainer_source[:_line_start] + RLTrainer_source[_block_end:]
        )
        break

    # Remove multiple doc strings
    if __RLConfig_doc__ != "" and RLTrainer_source.count(__RLTrainer_doc__) == 2:
        RLTrainer_source = RLTrainer_source.replace(__RLTrainer_doc__, "", 1)

    # Remove multiple newlines
    RLTrainer_source = re.sub(r"[\n]{3,}", "\n", RLTrainer_source)

    # Create new function
    _resolved_module = _trainer_resolved_module or _config_resolved_module
    _model_location = (
        _resolved_module.__name__
        if _resolved_module is not None
        else f"trl.trainer.{trainer_file}"
    )
    created_module = create_new_function(
        f"Unsloth{RLTrainer_name}",
        RLTrainer_source,
        _model_location,
        imports,
        overwrite = False,
    )

    # Patch Trainer
    exec(
        f"trl.{RLTrainer_name} = created_module.Unsloth{RLTrainer_name}",
        locals(),
        globals(),
    )
    exec(
        f"trl.trainer.{RLTrainer_name} = created_module.Unsloth{RLTrainer_name}",
        locals(),
        globals(),
    )
    exec(
        f"trl.trainer.{trainer_file}.{RLTrainer_name} = created_module.Unsloth{RLTrainer_name}",
        locals(),
        globals(),
    )

    # Patch Config
    exec(
        f"trl.{RLConfig_name} = created_module.Unsloth{RLConfig_name}",
        locals(),
        globals(),
    )
    exec(
        f"trl.trainer.{RLConfig_name} = created_module.Unsloth{RLConfig_name}",
        locals(),
        globals(),
    )
    exec(
        f"trl.trainer.{trainer_file}.{RLConfig_name} = created_module.Unsloth{RLConfig_name}",
        locals(),
        globals(),
    )

    if trainer_file == "grpo_trainer":
        try:
            _wrap_grpo_generate_and_score(
                getattr(created_module, f"Unsloth{RLTrainer_name}")
            )
        except Exception as e:
            logger.info(
                f"Unsloth: Could not wrap _generate_and_score_completions for {RLTrainer_name}: {e}"
            )


def patch_functions(RLTrainer, trainer_file, RLTrainer_name, all_imports, imports):
    init = inspect.getsource(RLTrainer.__init__)
    old_init = init

    # Remove brackets in comments since it interferes ie (...)
    comments = re.findall(r"\#[^\n]{1,}\n", init)
    bracketed_comments = [x for x in comments if "(" in x or ")" in x]
    # Replace with [...] instead
    for bracketed_comment in bracketed_comments:
        init = init.replace(
            bracketed_comment,
            bracketed_comment.replace("(", "[").replace(")", "]"),
        )

    # Remove peft_config
    init = init.replace("elif peft_config is None:", "elif False:")
    init = init.replace("elif peft_config is not None:", "elif False:")
    init = init.replace("if peft_config is None:", "if False:")
    init = init.replace("if peft_config is not None:", "if False:")
    init = init.replace("get_peft_model(model, peft_config)", "model")
    # New TRL 0.20.0
    init = init.replace(
        "if peft_config is not None or (is_peft_available() and isinstance(model, PeftModel)):",
        "if False:",
    )
    # New TRL 0.20.0
    init = init.replace(
        "model = self._prepare_peft_model(model, peft_config, args)\n", "pass\n"
    )
    # TRL 0.22.0+ uses prepare_peft_model as a standalone function
    init = init.replace("model = prepare_peft_model(model, peft_config, args)", "pass")

    # Skip add_adapter("ref") for reference model computation
    # Unsloth: We comment out the "ref" adapter creation because:
    # 1. We want to use the original BASE MODEL as the reference model, not the SFT/LoRA model
    # 2. PEFT doesn't allow multiple adapters when target_parameters is used (MoE models)
    # When "ref" is not in peft_config, GRPO/RLOO fallback uses disable_adapter()
    # which gives the base model logits - exactly what we want
    add_adapter_block_pattern = (
        r"([ \t]*)"  # Capture leading indentation
        r"if\s+is_peft_available\(\)\s+and\s+is_peft_model\(model\)\s+and\s+args\.beta\s*!=\s*0\.0\s*:"
        r"(.*?)"  # Match the entire block until ref_param.data.copy_
        r"ref_param\.data\.copy_\(param\.data\)"
    )

    def comment_out_block(match):
        """Comment out each line in the matched block, preserving indentation."""
        full_match = match.group(0)
        indent = match.group(1)
        lines = full_match.split("\n")
        commented_lines = []
        # Add explanation comment first
        commented_lines.append(
            f"{indent}# Unsloth: Commented out - use base model as reference, not SFT/LoRA model"
        )
        # Comment out each line - insert # after leading whitespace to preserve indentation
        for line in lines:
            if line.strip():
                stripped = line.lstrip()
                leading_ws = line[: len(line) - len(stripped)]
                commented_lines.append(f"{leading_ws}# {stripped}")
            else:
                commented_lines.append(line)
        return "\n".join(commented_lines)

    init = re.sub(add_adapter_block_pattern, comment_out_block, init, flags = re.DOTALL)

    # Set use_vllm if not set
    if "args.use_vllm" in init and "model" in init and "args" in init:
        # .*? matches first match. .+? matches final match.
        replacer = re.findall(
            r"def __init__\(.*?\).*?\:\n",
            init,
            flags = re.MULTILINE | re.DOTALL,
        )
        if len(replacer) != 0:
            replacer = replacer[0]
            vllm_setter = (
                "\n"
                + " " * 8
                + "if hasattr(model, 'vllm_engine') and hasattr(args, 'use_vllm'):\n"
                + " " * 12
                + "if (getattr(args, 'use_vllm', False) == False):\n"
                + " " * 16
                + "args.use_vllm = True\n"
            )
            # " " * 16 + "args.vllm_importance_sampling_correction = True\n" + \
            # " " * 16 + "args.vllm_importance_sampling_cap = 2.0\n"

            if "grpo" in trainer_file and trl_version >= Version("0.18.0"):
                # If model has vllm_engine, then use vllm in colocate mode. Donot wait for server
                vllm_setter += " " * 12 + "args.vllm_mode='colocate'\n"
                if trl_version >= Version("0.23.0"):
                    # We need to set this flag for sleep mode auto working with trl update
                    vllm_setter += (
                        " " * 12
                        + "if os.environ.get('UNSLOTH_VLLM_STANDBY', '0') == '1':\n"
                        + " " * 16
                        + "args.vllm_enable_sleep_mode=True\n"
                    )

            init = init.replace(replacer, replacer + vllm_setter)

    # breakpoint()

    vllm_part = re.findall(
        r"(\n[\s]{8}" r"if (self|args)\.use_vllm\:.*?" r"\n[\s]{8}" "else:\n)",
        init,
        flags = re.MULTILINE | re.DOTALL,
    )

    if len(vllm_part) == 1:
        vllm_part, args = vllm_part[0][0], vllm_part[0][1]
        # Strip all comments
        new_vllm_part = re.sub(
            r"^\s*\#[^\n]*\n?", "", vllm_part, flags = re.MULTILINE
        )  # to also remove whole comment line instead of just starting at #
        new_vllm_part = re.sub(
            r"\s*\#.*$", "", new_vllm_part, flags = re.MULTILINE
        )  # remove comments that occur after code

        # Get SamplingParams
        sampling_params = re.findall(
            r"\n[\s]{4,}(self\.[^\s]{1,}[\s]{0,}\=[\s]{0,}" r"SamplingParams\(.+?\))",
            new_vllm_part,
            flags = re.MULTILINE | re.DOTALL,
        )

        if len(sampling_params) == 1:
            sampling_params = sampling_params[0]
            # Fix guided_decoding
            sampling_params = sampling_params.replace(
                "guided_decoding=guided_decoding,",
                "guided_decoding="
                'GuidedDecodingParams(backend="outlines", regex=args.vllm_guided_decoding_regex) '
                'if getattr(args, "vllm_guided_decoding_regex", None) is not None else None,',
            )
            # Replace with our vLLM engine
            sampling_params = (
                " " * 12
                + "self.llm = model.vllm_engine; self._last_loaded_step = 0; "
                + sampling_params
            )  # Add spaces

            # count the indentation of last line of sampling_params.
            splitted_sampling_params = sampling_params.split("\n")
            if len(splitted_sampling_params) >= 2:
                last_line = splitted_sampling_params[-1]
                last_prev_line = splitted_sampling_params[-2]
                last_prev_indentation = len(last_prev_line) - len(
                    last_prev_line.lstrip()
                )
                last_indentation = len(last_line) - len(last_line.lstrip())

                # Add extra arguments to SamplingParams
                extra = "**getattr(getattr(args, 'vllm_sampling_params', vLLMSamplingParams()), '_set_kwargs', {})"
                # Backwards replace
                to_replace = (
                    ",\n"
                    + " " * last_prev_indentation
                    + extra
                    + ",\n"
                    + " " * last_indentation
                    + ")"
                )
                sampling_params = to_replace.join(sampling_params.rsplit(")", 1))
                # Strip multiple commas
                sampling_params = re.sub(r"[\,][\s]{0,}\,", ",", sampling_params)

                new_vllm_part = (
                    f"\n{' '*8}if {args}.use_vllm:\n{sampling_params}"
                    f"\n{' '*8}else:\n"
                )

        if trl_version >= Version("0.18.0"):
            # Replace LLM init with already existing vLLM engine for colocate mode
            vllm_llm_init_pattern = r"self\.llm\s*=\s*LLM\(.*?\)*\)\s*?\n(?!,)"
            vllm_llm_replacement = "self.llm = model.vllm_engine\n"
            new_vllm_part = re.sub(
                vllm_llm_init_pattern,
                vllm_llm_replacement,
                new_vllm_part,
                flags = re.DOTALL,  # Ensure . matches newlines [[5]]
            )

        init = init.replace(vllm_part, new_vllm_part)

    # Search for vLLM calling in all child functions
    functions = dir(RLTrainer)
    RLTrainer_source = inspect.getsource(RLTrainer)
    functions = [x for x in functions if f"def {x}" in RLTrainer_source]

    changed = {
        "__init__": (
            old_init,
            init,
        )
    }
    edit_functions = RL_FUNCTIONS.get(trainer_file, [])

    for function in functions:
        if not hasattr(RLTrainer, function):
            continue
        if function in changed:
            original_source, source = changed[function]
        else:
            fx = getattr(RLTrainer, function)
            try:
                source = inspect.getsource(fx)
            except:
                continue
            original_source = source

        # Check for function
        for edit_function in edit_functions:
            source = edit_function(function, source)

        """
        import torch
        X = torch.ones((2, 2048, 201088), dtype = torch.bfloat16, device = "cuda")
        X[torch.randperm(2, dtype = torch.int64, device = X.device)]

        will error out in torch 2.8 AcceleratorError: CUDA error: invalid configuration argument
        """
        source = re.sub(
            r"(\n[\s]{4,})generation_batch = shuffle_sequence_dict\(generation_batch\)\n",
            r"\n\1try: generation_batch = shuffle_sequence_dict(generation_batch)\n\1except: pass\n",
            source,
        )

        # llm_model = self.llm.llm_engine.model_executor.driver_worker.model_runner.model
        source = re.sub(
            r"(\n[\s]{4,}).+?model_executor\.driver_worker.+?\n",
            r"\n\1pass\n",
            source,
        )

        # llm_model.load_weights(model.state_dict().items())
        source = re.sub(
            r"(\n[\s]{4,}).+?load_weights\(.+?\n",
            r"\n\1pass\n",
            source,
        )

        # .state_dict()
        source = re.sub(
            r"\.state_dict\(\)",
            r"",
            source,
        )

        # Replace self.llm.generate and self.llm.chat
        if "CUDA_VISIBLE_DEVICES" in os.environ:
            lora_name = (
                trainer_file
                + "_lora_model_' + "
                + "(os.environ.get('CUDA_VISIBLE_DEVICES', '0').replace(',',''))"
            )
        else:
            lora_name = trainer_file + "_lora_model'"
        source = re.sub(
            r"(self\.llm\.(?:generate|chat)\([^\)]{1,})\)",
            r"\1, lora_request = self.model.load_lora('"
            + lora_name
            + r", load_tensors = True))",
            source,
        )
        # All these are to fix multiple commas before lora_request (in case the original code ends with something like ",)")
        # https://github.com/huggingface/trl/blob/main/trl/trainer/grpo_trainer.py#L1388 for eg has such an ending
        source = re.sub(r"\,[\s]{1,}\,[\s]{0,}lora_request", ", lora_request", source)
        source = re.sub(r"[\s]{1,}\,[\s]{0,}lora_request", ", lora_request", source)
        source = re.sub(r"[\,]{1,}[\s]{0,}lora_request", ", lora_request", source)
        # Prefer using unsloth's sampling params and fallback to trl's if not found
        # We'll enable this later separately when combining both this and GRPOConfig params
        # source = re.sub(
        #     r"sampling_params\s*=\s*sampling_params",
        #     r"sampling_params = getattr(self.args, 'vllm_sampling_params', sampling_params)",
        #     source
        # )
        # Fix later versions of SamplingParams via grpo_update_SamplingParams
        source = source.replace(
            "sampling_params = SamplingParams(**generation_kwargs)",
            "sampling_params = SamplingParams("
            "**grpo_update_SamplingParams("
            "SamplingParams, generation_kwargs, "
            "getattr(self.args, 'vllm_sampling_params', None)"
            ")"
            ")",
        )

        # Skip if no changes done
        if source == original_source:
            continue

        # Find all imports
        imports += [x for x in all_imports if not x.startswith("_") and x in source]

        changed[function] = (
            original_source,
            source,
        )

    # Import all functions
    imports = list(set(imports))

    # Patch all functions
    for function in changed:
        old, new = changed[function]
        RLTrainer_source = RLTrainer_source.replace(old, new)

    RLTrainer_source = RLTrainer_source.replace(
        f"class {RLTrainer_name}", f"class _Unsloth{RLTrainer_name}", 1
    )
    return RLTrainer_source


def patch_trl_rl_trainers():
    # Patch all TRL modules if they have vLLM or PEFT
    import trl.trainer

    all_trainers = dir(trl.trainer)
    all_trainers = [
        x
        for x in all_trainers
        if x.islower() and x.endswith("_trainer") and x != "base_trainer"
    ]
    for trainer in all_trainers:
        try:
            _patch_trl_rl_trainers(trainer)
        except Exception as e:
            logger.warning_once(f"Unsloth: Could not patch trl.trainer.{trainer}: {e}")
    return


def patch_trl_openenv():
    for function in RL_ADDITIONAL_FUNCTIONS["openenv"]:
        logger.info(f"Unsloth: Patching trl openenv with function: {function.__name__}")
        function()  # Call the function to apply the patch
    return


def patch_trl_vllm_generation():
    # trl moved vllm stuff to trl/generation/vllm_generation.py
    # We need to min_p patch it to not instantiate another vLLM instance if we already have one with fast_inference
    # Find the instance of self.llm = LLM(..) (multiline) and wrap it around an if clause
    for function in RL_ADDITIONAL_FUNCTIONS["vllm_generation"]:
        logger.info(
            f"Unsloth: Patching trl VLLMGeneration with function: {function.__name__}"
        )
        function()
    return


def patch_trl_vllm_generation():
    # trl moved vllm stuff to trl/generation/vllm_generation.py
    # We need to min_p patch it to not instantiate another vLLM instance if we already have one with fast_inference
    # Find the instance of self.llm = LLM(..) (multiline) and wrap it around an if clause
    for function in RL_ADDITIONAL_FUNCTIONS["vllm_generation"]:
        logger.info(
            f"Unsloth: Patching trl VLLMGeneration with function: {function.__name__}"
        )
        function()
    return


def PatchFastRL(algorithm = None, FastLanguageModel = None):
    if FastLanguageModel is not None:
        PatchRL(FastLanguageModel)
    patch_trl_rl_trainers()
    patch_trl_openenv()
    patch_trl_vllm_generation()
    if type(algorithm) is str and algorithm.islower():
        PatchRLStatistics(algorithm)


================================================
FILE: unsloth/models/rl_replacements.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = [
    "RL_EXTRA_ARGS",
    "RL_FUNCTIONS",
    "RL_PRE_ITEMS",
    "RL_CONFIG_CHANGES",
    "RL_METRICS_CHANGES",
]

import os
import re
import torch
import inspect
import linecache
from collections import defaultdict
from unsloth_zoo.rl_replacements import (
    RL_REPLACEMENTS,
    left_pack_padding,
    chunked_selective_log_softmax,
)
from unsloth_zoo.utils import Version
from trl import __version__ as trl_version_raw
from importlib.metadata import version as importlib_version
from unsloth_zoo.log import logger
from unsloth_zoo.device_type import device_synchronize
import importlib.util
from ..device_type import (
    is_hip,
    get_device_type,
    DEVICE_TYPE,
    DEVICE_TYPE_TORCH,
    DEVICE_COUNT,
    ALLOW_PREQUANTIZED_MODELS,
)
import textwrap
from ._utils import _get_inference_mode_context_manager

RL_EXTRA_ARGS = defaultdict(list)
RL_FUNCTIONS = defaultdict(list)
RL_PRE_ITEMS = defaultdict(list)
RL_CONFIG_CHANGES = defaultdict(list)
RL_METRICS_CHANGES = defaultdict(list)
RL_ADDITIONAL_FUNCTIONS = defaultdict(list)

torch_compile_options = {
    "epilogue_fusion": True,
    "max_autotune": False,  # I saw speedups, but not sure if this has issues in collab
    "shape_padding": True,
    "trace.enabled": False,
    "triton.cudagraphs": False,
}

try:
    trl_version = Version(trl_version_raw)
except Exception:
    try:
        trl_version = Version(importlib_version("trl"))
    except Exception:
        trl_version = Version("0.0.0")


# Check untrained tokens
def sft_trainer_fix_untrained_tokens(call_args, extra_args):
    if "model" in call_args and "train_dataset" in call_args:
        fix_tokenizer = (
            "IGNORED_TOKENIZER_NAMES = os.environ.get('UNSLOTH_IGNORED_TOKENIZER_NAMES', '').split('\\n')\n"
            "from unsloth_zoo.tokenizer_utils import fix_untrained_tokens\n"
            "from unsloth_zoo.training_utils  import fix_zero_training_loss\n"
            "if 'tokenizer' not in locals(): tokenizer = processing_class\n"
            "fix_untrained_tokens(model, tokenizer, train_dataset, IGNORED_TOKENIZER_NAMES, eps = 1e-16)\n"
            "fix_zero_training_loss(model, tokenizer, train_dataset)\n"
        )
        return fix_tokenizer
    return ""


RL_EXTRA_ARGS["sft_trainer"].append(sft_trainer_fix_untrained_tokens)


# Fix top_k for GRPO vLLM.
# https://github.com/huggingface/trl/pull/4695 with this change trl added top_k in GRPOConfig and defaults to 0
# We don't want that since vllm's all include top_k is -1 and 0 returns an error on SamplingParams creation.
def grpo_config_fix_vllm_top_k(old_RLTrainer_source, old_RLConfig_source):
    return "if use_vllm and (top_k is None or top_k == 0): top_k = -1\n"


RL_CONFIG_CHANGES["grpo_trainer"].append(grpo_config_fix_vllm_top_k)


# Remove DPO columns which might randomnly be tokenized
def dpo_trainer_fix_columns(call_args, extra_args):
    if "model" in call_args and "train_dataset" in call_args:
        fix_dpo = (
            "if hasattr(train_dataset, 'column_names'):\n"
            "    column_names = set(train_dataset.column_names)\n"
            "    check = ['chosen', 'rejected', 'prompt', 'chosen_input_ids', 'chosen_attention_mask',\n"
            "             'chosen_labels', 'rejected_input_ids', 'rejected_attention_mask', 'rejected_labels',\n"
            "             'prompt_input_ids', 'prompt_attention_mask']\n"
            "    if all(x in column_names for x in check):\n"
            "        train_dataset = train_dataset.remove_columns(['chosen', 'rejected', 'prompt'])\n"
            "    del check, column_names\n"
        )
        return fix_dpo
    return ""


RL_EXTRA_ARGS["dpo_trainer"].append(dpo_trainer_fix_columns)


# Fix tokenizer double BOS
def sft_trainer_prepare_dataset(function_name, function):
    if (
        function_name != "_prepare_non_packed_dataloader"
        and function_name != "_prepare_dataset"
    ):
        return function

    fast_sft_prepare_dataset = RL_REPLACEMENTS.get("sft_prepare_dataset", None)
    if fast_sft_prepare_dataset is not None:
        params = inspect.signature(fast_sft_prepare_dataset).parameters.keys()
        params = ".*?".join(params)
        matched = re.match(
            r"[\s]{0,}def _prepare_dataset\(.*?" + params + r".*?\)",
            function,
            flags = re.MULTILINE | re.DOTALL,
        )
        if matched:
            # Use fast version!
            function = inspect.getsource(fast_sft_prepare_dataset)
            function = function.split("\n")
            function = "\n".join(" " * 4 + x for x in function)
            function = function.replace(
                "def sft_prepare_dataset", "def _prepare_dataset"
            )
            return function

    check_text = (
        "if 'skip_prepare_dataset' in locals() and skip_prepare_dataset:\n"
        "    return dataset\n"
        "if 'tokenizer'          not in locals(): tokenizer = processing_class\n"
        "if 'formatting_func'    not in locals(): raise RuntimeError('Unsloth: Please file a bug report - `formatting_func` does not exist!')\n"
        "if 'dataset_text_field' not in locals() and 'args' in locals(): dataset_text_field = args.dataset_text_field\n"
        "if 'dataset_text_field' not in locals(): raise RuntimeError('Unsloth: Please file a bug report - `dataset_text_field` does not exist!')\n"
        "test_text = dataset[0][dataset_text_field] if (formatting_func is None and dataset_text_field is not None) else formatting_func(dataset[0])[0]\n"
        "chat_template = getattr(tokenizer, 'chat_template', None)\n"
        "chat_template = '' if chat_template is None else chat_template\n"
        "has_bos_token_already = (test_text.startswith(tokenizer.bos_token) or tokenizer.bos_token in chat_template) "
        "if getattr(tokenizer, 'bos_token', None) is not None else False\n"
        "if 'add_special_tokens' not in locals() and has_bos_token_already:\n"
        "    from functools import partial\n"
        "    tokenizer_call = tokenizer.__call__\n"
        "    tokenizer.__call__ = partial(tokenizer_call, add_special_tokens = False)\n"
        "    processing_class = tokenizer\n"
        "else:\n"
        "    tokenizer_call = None\n"
        "    add_special_tokens = False if has_bos_token_already else locals().get('add_special_tokens', False)\n"
    )

    check_text = check_text.split("\n")
    check_text = "\n".join(" " * 8 + x for x in check_text)
    check_text = check_text.rstrip() + "\n"

    # .*? matches first match. .+? matches final match.
    replacer = re.findall(
        r"def " + function_name + r"\(.*?\).*?\:\n",
        function,
        flags = re.MULTILINE | re.DOTALL,
    )
    if len(replacer) != 0:
        replacer = replacer[0]
        function = function.replace(replacer, replacer + check_text)

    # Return tokenizer's original state
    return_state = (
        "if tokenizer_call is not None: tokenizer.__call__ = tokenizer_call\n"
    )
    function = re.sub(
        r"\n([ ]{4,})(return .*?[\s]{0,})$",
        rf"\1{return_state}\1\2",
        function,
    )
    return function


RL_FUNCTIONS["sft_trainer"].append(sft_trainer_prepare_dataset)


# Ignore mean_token_accuracy since it needs logits
# We override it directly with our version
def sft_trainer_compute_loss(function_name, function):
    if function_name != "compute_loss":
        return function

    def compute_loss(
        self, model, inputs, return_outputs = False, num_items_in_batch = None
    ):
        outputs = super().compute_loss(
            model,
            inputs,
            return_outputs = return_outputs,
            num_items_in_batch = num_items_in_batch,
        )
        return outputs

    function = inspect.getsource(compute_loss)
    return function


RL_FUNCTIONS["sft_trainer"].append(sft_trainer_compute_loss)


# Fix bare pop("push_to_hub_token") in compiled SFT/IterativeSFT trainer __init__
# On transformers 5.0+, to_dict() no longer includes push_to_hub_token, so bare pop KeyErrors
def sft_trainer_push_to_hub_token(function_name, function):
    if function_name != "__init__":
        return function
    return function.replace(
        'dict_args.pop("push_to_hub_token")', 'dict_args.pop("push_to_hub_token", None)'
    )


RL_FUNCTIONS["sft_trainer"].append(sft_trainer_push_to_hub_token)


# Autocast precision for GRPO
def grpo_trainer__prepare_inputs(function_name, function):
    if function_name != "_prepare_inputs":
        return function

    # Add mixed precision training
    function = function.replace(
        "with torch.inference_mode():",
        "with torch.inference_mode(), "
        "torch.amp.autocast(device_type = 'cuda', "
        "dtype = ((torch.float16 if os.environ.get('ACCELERATE_MIXED_PRECISION', 'fp16') == 'fp16' else torch.bfloat16) "
        "if not torch.is_autocast_enabled('cuda') else nullcontext())"
        "if os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '0' else torch.float16):",
    )
    function = function.replace(
        "self.accelerator.unwrap_model(self.model)",
        "self.accelerator.unwrap_model(self.model, keep_fp32_wrapper = False)",
    )
    return function


RL_FUNCTIONS["grpo_trainer"].append(grpo_trainer__prepare_inputs)


# Remove collective RPC of reload weights from generate
# trl added reload weights (potentially for quantized models), we don't need it for our use case (LoRA primarily)
# https://github.com/huggingface/trl/commit/7856d3b1f6518601732f489883b341bb6dd36434#diff-964e6fd373aa93037604064cb2b822d7f8e2735e33f791065acf2c4c3552d393R1168-R1169
def grpo_trainer__generate_single_turn(function_name, function):
    if function_name != "_generate_single_turn":
        return function

    # Remove the reload_weights collective RPC call from the generate function's source
    # function = function.replace('self.llm.collective_rpc("reload_weights")', "")
    # The regex below does the same thing but is more flexible and can handle single or double quotes
    # This is for older versions.
    function = re.sub(
        r"self\.llm\.collective_rpc\(\s*(['\"])reload_weights\1\s*\)",
        "",
        function,
    )

    # Current TRL versions call vllm_generation.sync_weights() every step.
    # When Unsloth fast inference LoRA is active, weights are already shared.
    sync_weights_block = re.compile(
        r"(?P<indent>[ \t]*)with profiling_context\(self,\s*(['\"])sync_weights\2\s*\):\n"
        r"(?P=indent)[ \t]+self\.vllm_generation\.sync_weights\(\)\n",
        re.MULTILINE,
    )

    def remove_sync_weights_block(match):
        indent = match.group("indent")
        return (
            f"{indent}# Unsloth fast inference LoRA shares weights with vLLM already.\n"
            f"{indent}# Skipping per-step vLLM sync_weights().\n"
        )

    function = sync_weights_block.sub(remove_sync_weights_block, function)

    # TRL 0.24.0-0.25.1 truncation regression fix
    #
    # TRL 0.22.2-0.23.1 used smart truncation via truncate_with_protected_tokens():
    #   - Tokenizes first without truncation
    #   - Then truncates keeping the RIGHTMOST tokens (preserves assistant turn)
    #   - Protects special tokens (image_token, vision_start/end) from removal
    #
    # TRL 0.24.0-0.25.1 removed this and passed kwargs directly to the tokenizer:
    #   max_length=self.max_prompt_length, truncation=True, add_special_tokens=False
    # This causes issues because tokenizer truncation doesn't protect special tokens
    # and may not preserve the end of the prompt properly.
    #
    # TRL 0.26.2+ removed these kwargs entirely (no tokenizer-level truncation).
    #
    # Fix: Remove these kwargs so TRL 0.24.0-0.25.1 behaves like 0.26.2+ (no truncation).
    # This is a no-op for versions that don't have these kwargs (0.22.2-0.23.1, 0.26.2+).
    for pattern in [
        r'["\']?max_length["\']?\s*[:=]\s*self\.max_prompt_length\s*,\s*\n?',
        r'["\']?truncation["\']?\s*[:=]\s*True\s*,\s*\n?',
        r'["\']?add_special_tokens["\']?\s*[:=]\s*False\s*,\s*\n?',
    ]:
        function = re.sub(pattern, "", function)

    return function


RL_FUNCTIONS["grpo_trainer"].append(grpo_trainer__generate_single_turn)


# Fix incorrect special tokens handling and truncation in older TRL versions
def grpo_trainer__generate_and_score_completions(function_name, function):
    if function_name != "_generate_and_score_completions":
        return function

    # TRL 0.19.0 did skip_special_tokens = True which should be False
    function = function.replace(
        "prompt_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False",
        "prompt_ids, skip_special_tokens=False, clean_up_tokenization_spaces=False",
    )

    # Left pad prompt before calculation old and ref hidden states
    line_to_replace = 'batch_size = self.args.per_device_train_batch_size if mode == "train" else self.args.per_device_eval_batch_size'

    # The new multi-line string that will replace the line above
    replacement_lines = """
        max_left_pad = None
        batch_size = self.args.per_device_train_batch_size if mode == "train" else self.args.per_device_eval_batch_size
        try:
            # TRL 0.23.1 and below path
            if not has_images:
                # Left pad prompt before calculation old and ref hidden states
                left_pad_tokens_per_prompt = calculate_pad_tokens_in_prompt(prompt_completion_ids, logits_to_keep, self.processing_class.pad_token_id)
                max_left_pad = torch.max(left_pad_tokens_per_prompt).item()
        except:
            # TRL 0.24.0 and below path
            if images is None:
                # Left pad prompt before calculation old and ref hidden states
                left_pad_tokens_per_prompt = calculate_pad_tokens_in_prompt(prompt_completion_ids, logits_to_keep, self.processing_class.pad_token_id)
                max_left_pad = torch.max(left_pad_tokens_per_prompt).item()
        self.model.for_training()"""

    function = function.replace(line_to_replace, replacement_lines)

    pattern_to_find = re.compile(
        r"^\s*if self\.args\.gradient_accumulation_steps % generate_every != 0 or \(\s*"
        r"self\.use_vllm and self\.vllm_importance_sampling_correction\s*"
        r"\):",
        re.MULTILINE,
    )

    replacement_text = """
            if self.args.gradient_accumulation_steps % generate_every != 0 or (
                self.use_vllm
            ):"""
    # Use re.sub() to perform the replacement
    function, num_replacements = pattern_to_find.subn(replacement_text, function)

    pattern_to_find = re.compile(
        r"(^\s*)all_logprobs = \["  # Capture indentation (group 1)
        r".*?"  # Match everything inside non-greedily
        r"for output in outputs\.outputs\s*"
        r"\]",
        re.DOTALL | re.MULTILINE,
    )

    # sanitize_logprob is injected as a module-level function via RLTrainer_replacement
    # template in rl.py (from RL_REPLACEMENTS), so just reference it directly here.
    replacement_text = (
        r"\1all_logprobs = [\n"
        r"\1    [sanitize_logprob(next(iter(logprob.values()))) for logprob in output.logprobs]\n"
        r"\1    for outputs in all_outputs\n"
        r"\1    for output in outputs.outputs\n"
        r"\1]"
    )

    function, num_replacements = pattern_to_find.subn(replacement_text, function)

    # Always between max_prompt_length and use_vllm
    found = re.findall(
        r"\n(([ ]{8,})if self\.max_prompt_length is not None:.*?"
        r"\2if self\.use_vllm:)",
        function,
        flags = re.DOTALL | re.MULTILINE,
    )
    if len(found) != 0:
        replace_part, spacing = found[0]
        removed_comments = re.sub(r"\#[^\n]{1,}", "", replace_part)
        splits = removed_comments.split("\n")
        if (
            sum(re.match(rf"{spacing}[^\s]", x) is not None for x in splits) == 2
            and len(spacing) >= 8
        ):
            new_replacement = f"""\n{spacing}if self.max_prompt_length is not None:
            # If max_prompt_length is set, we trim the prompt to keep only the last `max_prompt_length` tokens.
            # Then we decode those tokens back into text. We manually remove leading pad tokens from the decoded text,
            # because we can't use `skip_special_tokens=True` (some special tokens are still needed for generation).
            protected = [self.image_token_id, self.vision_start_token_id, self.vision_end_token_id]
            protected = [token for token in protected if token is not None]
            prompt_ids, prompt_mask = truncate_with_protected_tokens(
                prompt_ids, prompt_mask, self.max_prompt_length, protected
            )

            prompts_text = [re.sub(rf"^({{re.escape(self.pad_token)}})+", "", text) for text in prompts_text]

            # The chat template inserts a single image token into the prompt text. However, when this text is later
            # tokenized, the single image token string is expanded into multiple image token IDs, depending on the
            # image size. Since we're detokenizing here, we may see repeated image tokens in the decoded text. We
            # collapse them back into a single token string to match the original template.
            if self.image_token is not None:
                prompts_text = [
                    re.sub(rf"({{re.escape(self.image_token)}})+", self.image_token, text) for text in prompts_text
                ]
        # Generate completions using either vLLM or regular generation
        if self.use_vllm:"""
            function = function.replace(replace_part, new_replacement)

    # Important note: we disable TRL's importance sampling logic
    # It is disabled because the LLM path moves left padding to the right.
    # We must adjust the vLLM sampling_logprob tensor in Unsloth to account for this.
    string_to_find = "if self.use_vllm and self.vllm_importance_sampling_correction:"

    replacement_string = (
        "if False and self.use_vllm and self.vllm_importance_sampling_correction:"
    )

    function = function.replace(string_to_find, replacement_string)

    string_to_find = """        if "image_sizes" in prompt_inputs:
            output["image_sizes"] = prompt_inputs["image_sizes"]"""

    replacement_string = """        if "image_sizes" in prompt_inputs:
            output["image_sizes"] = prompt_inputs["image_sizes"]
        if max_left_pad is not None:
            output["max_left_pad"] = torch.tensor(prompt_ids.shape[0] * [max_left_pad]).unsqueeze(-1)
        try:
            if self.use_vllm and getattr(self, "vllm_importance_sampling_correction", False):
                output["sampling_per_token_logps"] = sampling_per_token_logps
        except NameError:
            output["sampling_per_token_logps"] = None"""

    function = function.replace(string_to_find, replacement_string)

    # TRL 0.24.0+ extracts prompts = [x["prompt"] for x in inputs], losing metadata
    # like reasoning_effort. Inject code to store per-sample chat_template_kwargs on self.
    _metadata_extraction = (
        "\n"
        "        # Unsloth: Extract per-sample chat_template_kwargs before metadata is lost\n"
        "        _ct_ = getattr(self.processing_class, 'chat_template', None) or ''\n"
        "        _sk_ = {'prompt', 'chosen', 'rejected', 'completion', 'messages', 'label',\n"
        "                'images', 'image', 'videos', 'video', 'audios', 'audio'}\n"
        "        self._unsloth_batch_chat_kwargs = []\n"
        "        for _inp_ in inputs:\n"
        "            _kw_ = {}\n"
        "            if isinstance(_inp_, dict):\n"
        "                for _k_ in _inp_.keys() - _sk_:\n"
        "                    if _k_ in _ct_ and isinstance(_inp_[_k_], str):\n"
        "                        _kw_[_k_] = _inp_[_k_]\n"
        "            self._unsloth_batch_chat_kwargs.append(_kw_)\n"
    )
    # Insert after: prompts = [x["prompt"] for x in inputs]
    _target_line = 'prompts = [x["prompt"] for x in inputs]'
    if _target_line in function:
        function = function.replace(
            _target_line,
            _target_line + _metadata_extraction,
        )

    # This path is for TRL 0.24.0 images is a variable exclusive to this version
    string_to_find = """        if images is not None:
            output["num_images"] = num_images"""

    replacement_string = """        if images is not None:
            output["num_images"] = num_images
        if max_left_pad is not None:
            output["max_left_pad"] = torch.tensor(prompt_ids.shape[0] * [max_left_pad]).unsqueeze(-1)
        try:
            if self.use_vllm and getattr(self, "vllm_importance_sampling_correction", False):
                output["sampling_per_token_logps"] = sampling_per_token_logps
        except NameError:
            output["sampling_per_token_logps"] = None"""

    function = function.replace(string_to_find, replacement_string)

    if trl_version >= Version("0.24.0"):
        # We replace the call using 'completions' with one using 'completions_text'
        string_to_find = "        rewards_per_func = self._calculate_rewards(inputs, prompts, completions, completion_ids_list)"
        replacement_string = (
            "        if images is not None:\n"
            "            rewards_per_func = self._calculate_rewards(inputs, prompts_text, completions_text, completion_ids_list)\n"
            "        else:\n"
            "            rewards_per_func = self._calculate_rewards(inputs, prompts, completions, completion_ids_list)"
        )
        function = function.replace(string_to_find, replacement_string)

    if "wake_up()" not in function:
        # Sleep functionality has been added to trl in v0.23.0. We do not want to redo this.
        # https://github.com/huggingface/trl/commit/edbe8234bc7e528f72ac76607de9d3e4753e2709

        pattern = re.compile(r".*self\.llm\.generate\(.*\).*", re.MULTILINE)
        matches = list(pattern.finditer(function))
        patched = function

        # Generally there's only one match. But this is just to make sure we don't miss any.
        for match in reversed(matches):
            line = match.group(0)
            indent_match = re.match(r"(\s*)", line)
            indent = indent_match.group(1) if indent_match else ""

            wrapped = (
                f"{indent}if hasattr(self, 'llm'):\n"
                f"{indent}    if getattr(self.llm.llm_engine.vllm_config.model_config, 'enable_sleep_mode', False):\n"
                f"{indent}        self.llm.wake_up()\n"
                f"{line}\n\n"
                f"{indent}if hasattr(self, 'llm'):\n"
                f"{indent}    if getattr(self.llm.llm_engine.vllm_config.model_config, 'enable_sleep_mode', False):\n"
                f"{indent}        self.llm.sleep(os.environ.get('VLLM_SLEEP_MODE', 1))\n"
            )

            patched = patched[: match.start()] + wrapped + patched[match.end() :]

        function = patched

    return function


RL_FUNCTIONS["grpo_trainer"].append(grpo_trainer__generate_and_score_completions)


# Fix {"reasoning_effort" : "high"} not applied
def grpo_trainer_fix_maybe_apply_chat_template(function_name, function):
    spaces = function.find("def ")
    if spaces % 4 != 0:
        return function
    spaces += 4
    replacement = """
        _chat_template_ = getattr(self.processing_class, "chat_template", None)
        if _chat_template_ is None: _chat_template_ = ""
        _supported_keys_ = set(("prompt", "chosen", "rejected", "completion", "messages", "label"))
        _batch_chat_kwargs_ = getattr(self, "_unsloth_batch_chat_kwargs", None)

        prompts_text = []
        for _idx_, _example_ in enumerate(__INPUTS__REPLACEMENT__):
            _tokenizer_kwargs_ = {}
            if type(_example_) is not dict:
                _example_ = {"prompt": _example_}
            _left_keys_ = _example_.keys() - _supported_keys_
            for k in _left_keys_:
                if k in _chat_template_:
                    v = _example_[k]
                    if type(v) is str:
                        _tokenizer_kwargs_[k] = v
            if _batch_chat_kwargs_ is not None and _idx_ < len(_batch_chat_kwargs_):
                for _bk_, _bv_ in _batch_chat_kwargs_[_idx_].items():
                    if _bk_ not in _tokenizer_kwargs_:
                        _tokenizer_kwargs_[_bk_] = _bv_
            _x_ = maybe_apply_chat_template(_example_, self.processing_class, **_tokenizer_kwargs_)["prompt"]
            prompts_text.append(_x_)
    """
    replacement = textwrap.dedent(replacement).strip()
    replacement = textwrap.indent(replacement, spaces * " ")
    replacement = f"\n{replacement}\n"
    what = 'prompts_text = [maybe_apply_chat_template(example, self.processing_class)["prompt"] for example in inputs]'
    function = function.replace(
        what, replacement.replace("__INPUTS__REPLACEMENT__", "inputs")
    )

    """prompts_text = [
        maybe_apply_chat_template({"prompt": prompt}, self.processing_class)["prompt"] for prompt in prompts
    ]"""
    function = re.sub(
        r"prompts_text = \["
        r"[\s]{0,}"
        r"maybe_apply_chat_template\(\{[\"\']prompt[\"\'][\s]{0,}\:[\s]{0,}prompt[\s]{0,}\}[\s]{0,}\,[\s]{0,}self\.processing_class\)"
        r"\[[\"\']prompt[\"\']\] for prompt in prompts"
        r"[\s]{0,}"
        r"\]",
        replacement.replace("__INPUTS__REPLACEMENT__", "prompts"),
        function,
    )
    return function


RL_FUNCTIONS["grpo_trainer"].append(grpo_trainer_fix_maybe_apply_chat_template)


# Remove _move_model_to_vllm
def grpo_trainer__move_model_to_vllm(function_name, function):
    if function_name != "_move_model_to_vllm":
        return function

    def _move_model_to_vllm(self, *args, **kwargs):
        return None

    function = inspect.getsource(_move_model_to_vllm)
    return function


RL_FUNCTIONS["grpo_trainer"].append(grpo_trainer__move_model_to_vllm)


# Edit _get_per_token_logps to handle mixed precision
def grpo_trainer__get_per_token_logps(function_name, function):
    if function_name != "_get_per_token_logps":
        return function

    def _get_per_token_logps(
        self, model, input_ids, attention_mask, logits_to_keep, compute_efficient = False
    ):
        if True:  # os.environ.get('UNSLOTH_USE_NEW_MODEL', '0') == '0':
            return None  # Unsloth efficient GRPO
        # Otherwise, calculate normally:
        if not hasattr(self, "_autocast_dtype"):
            self._autocast_dtype = (
                torch.float16
                if os.environ.get("ACCELERATE_MIXED_PRECISION", "fp16") == "fp16"
                else torch.bfloat16
            )
            if os.environ.get("UNSLOTH_FORCE_FLOAT32", "0") == "1":
                self._autocast_dtype = torch.float16

        os.environ["UNSLOTH_RETURN_HIDDEN_STATES"] = "1"
        with torch.amp.autocast(device_type = DEVICE_TYPE, dtype = self._autocast_dtype):
            # We add 1 to `logits_to_keep` because the last logits of the sequence is later excluded
            logits = model(
                input_ids = input_ids,
                attention_mask = attention_mask,
                logits_to_keep = logits_to_keep + 1,
            ).logits
            # logits = logits[:, :-1, :]  # (B, L-1, V), exclude the last logit: it corresponds to the next token pred
            return logits
            # input_ids = input_ids[:, -logits_to_keep:]
            # For transformers<=4.48, logits_to_keep argument isn't supported, so here we drop logits ourselves.
            # See https://github.com/huggingface/trl/issues/2770
            # logits = logits[:, -logits_to_keep:]
            # return logits
            # See https://huggingface.co/blog/the_n_implementation_details_of_rlhf_with_ppo#policy-training-implementation-details
            # logits = logits / self.temperature
            # logps = selective_log_softmax(logits, input_ids)

            # row_indices, col_indices = torch.where(logps < -20)

            # # Method 1: Check if tensors have elements
            # if len(row_indices) > 0 and len(col_indices) > 0:
            #     breakpoint()  # Breakpoint triggered here
            #     print("Found high values!")
            # return  logps #  compute logprobs for the input tokens

    function = inspect.getsource(_get_per_token_logps)
    return function


RL_FUNCTIONS["grpo_trainer"].append(grpo_trainer__get_per_token_logps)


def grpo_trainer__get_per_token_logps_and_entropies(function_name, function):
    if function_name != "_get_per_token_logps_and_entropies":
        return function

    # Just copy over from _get_per_token_logps replacement function above. For now this returns None anyway
    def _get_per_token_logps_and_entropies(
        self,
        model,
        input_ids,
        attention_mask,
        logits_to_keep,
        batch_size = None,
        compute_entropy = False,
        compute_efficient = False,
        *args,
        **kwargs,
    ):
        # All Unsloth code here in this function is licensed under AGPL3
        # if True: # os.environ.get('UNSLOTH_USE_NEW_MODEL', '0') == '0':
        #     return None, None  # logps, entropies Unsloth efficient GRPO
        if compute_efficient:
            return None, None
        else:
            if not hasattr(self, "_autocast_dtype"):
                self._autocast_dtype = (
                    torch.float16
                    if os.environ.get("ACCELERATE_MIXED_PRECISION", "fp16") == "fp16"
                    else torch.bfloat16
                )
                if os.environ.get("UNSLOTH_FORCE_FLOAT32", "0") == "1":
                    self._autocast_dtype = torch.float16

            pixel_values, image_grid_thw = (
                kwargs.get("pixel_values", None),
                kwargs.get("image_grid_thw", None),
            )
            pixel_attention_mask, image_sizes = (
                kwargs.get("pixel_attention_mask", None),
                kwargs.get("image_sizes", None),
            )

            unwrapped_model = self.accelerator.unwrap_model(
                model, keep_fp32_wrapper = False
            )

            lm_head = self.model.get_output_embeddings().weight

            dtype_bytes = (
                16 if self._autocast_dtype in [torch.float16, torch.bfloat16] else 32
            )
            total_rows = input_ids.shape[0]
            seq_len = input_ids.shape[1]
            hidden_dim = lm_head.shape[1]
            vocab_dim = lm_head.shape[0]

            if self.args.unsloth_grpo_mini_batch is None:
                B, multiplier = autotune_batch_and_chunks(
                    total_rows,
                    seq_len,
                    hidden_dim,
                    vocab_dim,
                    dtype_bytes,
                    self.args.unsloth_logit_chunk_multiplier,
                )
                B = total_rows // B
            else:
                B = self.args.unsloth_grpo_mini_batch

                if self.args.unsloth_logit_chunk_multiplier is None:
                    multiplier = max(4, seq_len // 4096)
                else:
                    multiplier = self.args.unsloth_logit_chunk_multiplier

            all_logprobs_list = []
            if pixel_values is None:
                left_pad_tokens_per_prompt = calculate_pad_tokens_in_prompt(
                    input_ids, logits_to_keep, self.processing_class.pad_token_id
                )
                max_left_pad = torch.max(left_pad_tokens_per_prompt).item()
                input_ids = left_pack_padding(
                    input_ids, self.processing_class.pad_token_id
                )
                attention_mask = input_ids != self.processing_class.pad_token_id
                attention_mask = attention_mask.to(attention_mask.dtype)
            else:
                max_left_pad = 0

            # input_ids_chunks = torch.chunk(input_ids, chunks = B, dim = 0)
            attention_mask_chunks = torch.chunk(attention_mask, chunks = B, dim = 0)

            def chunk_optional(tensor, chunks):
                if tensor is None:
                    return [None] * chunks
                return torch.chunk(tensor, chunks = chunks, dim = 0)

            import math

            total_samples = input_ids.shape[0]
            batch_size = math.ceil(total_samples / B)

            input_ids_chunks = []
            attention_mask_chunks = []
            pixel_values_chunks = []
            image_grid_thw_chunks = []
            pixel_attention_mask_chunks = []

            current_pixel_idx = 0
            # TRL 0.23.0 batching logic
            for start in range(0, total_samples, batch_size):
                end = start + batch_size

                input_ids_chunks.append(input_ids[start:end])
                attention_mask_chunks.append(attention_mask[start:end])

                if image_grid_thw is not None and pixel_values is not None:
                    grid_slice = image_grid_thw[start:end]
                    image_grid_thw_chunks.append(grid_slice)

                    batch_pixel_count = grid_slice.prod(dim = -1).sum().item()

                    start_pixel_idx = current_pixel_idx
                    end_pixel_idx = current_pixel_idx + batch_pixel_count

                    pixel_values_chunks.append(
                        pixel_values[start_pixel_idx:end_pixel_idx]
                    )

                    if pixel_attention_mask is not None:
                        pixel_attention_mask_chunks.append(
                            pixel_attention_mask[start_pixel_idx:end_pixel_idx]
                        )
                    else:
                        pixel_attention_mask_chunks.append(None)

                    current_pixel_idx = end_pixel_idx

                else:
                    pixel_values_chunks.append(None)
                    image_grid_thw_chunks.append(None)
                    pixel_attention_mask_chunks.append(None)

            if image_sizes is not None and not isinstance(image_sizes, torch.Tensor):
                image_sizes_chunks = [[size] for size in image_sizes]
            else:
                image_sizes_chunks = chunk_optional(image_sizes, B)

            temperature = self.temperature
            logit_softcapping = getattr(model.config, "final_logit_softcapping", 0)
            if logit_softcapping is None:
                logit_softcapping = 0
            logit_scale_multiply = getattr(model.config, "logit_scale", 0)
            if logit_scale_multiply is None:
                logit_scale_multiply = 0
            logit_scale_divide = getattr(model.config, "logits_scaling", 0)
            if logit_scale_divide is None:
                logit_scale_divide = 0

            zipped_inputs = zip(
                input_ids_chunks,
                attention_mask_chunks,
                pixel_values_chunks,
                image_grid_thw_chunks,
                pixel_attention_mask_chunks,
                image_sizes_chunks,
            )
            os.environ["UNSLOTH_RETURN_HIDDEN_STATES"] = "1"

            with _get_inference_mode_context_manager(model):
                for (
                    input_ids_chunk,
                    attention_mask_chunk,
                    pixel_values_chunk,
                    image_grid_thw_chunk,
                    pixel_attention_mask_chunk,
                    image_sizes_chunk,
                ) in zipped_inputs:
                    with torch.amp.autocast(
                        device_type = "cuda", dtype = self._autocast_dtype
                    ):
                        if pixel_values is None:
                            logits_chunk = unwrapped_model(
                                input_ids = input_ids_chunk,
                                attention_mask = attention_mask_chunk,
                                pixel_values = pixel_values_chunk,
                                image_grid_thw = image_grid_thw_chunk,
                                pixel_attention_mask = pixel_attention_mask_chunk,
                                image_sizes = image_sizes_chunk,
                            ).logits

                            completion_input_ids_chunk = input_ids_chunk[
                                :, -(logits_to_keep + max_left_pad) :
                            ]
                            logits_chunk = logits_chunk[
                                :, -(logits_to_keep + max_left_pad + 1) :, :
                            ]
                            logits_chunk = logits_chunk[:, :-1, :]
                            logprobs_chunk = (
                                chunked_hidden_states_selective_log_softmax(
                                    logits_chunk,
                                    lm_head,
                                    completion_input_ids_chunk,
                                    chunks = input_ids_chunk.shape[0] * multiplier,
                                    logit_scale_multiply = logit_scale_multiply,
                                    logit_scale_divide = logit_scale_divide,
                                    logit_softcapping = logit_softcapping,
                                    temperature = temperature,
                                )
                            )
                        else:
                            # Essentially, for VLMs we do not go via the optimized path in models/,
                            # so we don't encounter the Flash Attn left-padding issue.
                            logits_chunk = unwrapped_model(
                                input_ids = input_ids_chunk,
                                attention_mask = attention_mask_chunk,
                                pixel_values = pixel_values_chunk,
                                image_grid_thw = image_grid_thw_chunk,
                                pixel_attention_mask = pixel_attention_mask_chunk,
                                image_sizes = image_sizes_chunk,
                                logits_to_keep = logits_to_keep + 1,
                            ).logits

                            logits_chunk = logits_chunk[:, :-1, :]
                            completion_input_ids_chunk = input_ids_chunk[
                                :, -logits_to_keep:
                            ]
                            # Guard: check if model returned hidden states or logits
                            if logits_chunk.shape[-1] == lm_head.shape[1]:
                                logprobs_chunk = (
                                    chunked_hidden_states_selective_log_softmax(
                                        logits_chunk,
                                        lm_head,
                                        completion_input_ids_chunk,
                                        chunks = input_ids_chunk.shape[0] * multiplier,
                                        logit_scale_multiply = logit_scale_multiply,
                                        logit_scale_divide = logit_scale_divide,
                                        logit_softcapping = logit_softcapping,
                                        temperature = temperature,
                                    )
                                )
                            else:
                                # Model returned logits directly - scaling/softcapping already applied by model forward
                                logprobs_chunk = chunked_selective_log_softmax(
                                    logits_chunk,
                                    completion_input_ids_chunk,
                                    temperature,
                                )
                    # This is needed to avoid race conditions with GPT OSS offload_embbed=True
                    # However, it seems that this line does not slow down or disrupt models.
                    device_synchronize()
                    all_logprobs_list.append(logprobs_chunk)
                logprobs = torch.cat(all_logprobs_list, dim = 0)
                entropies = None

            os.environ["UNSLOTH_RETURN_HIDDEN_STATES"] = "0"

            return logprobs.detach(), entropies  # logps, entropies
            # input_ids = input_ids[:, -logits_to_keep:]
            # For transformers<=4.48, logits_to_keep argument isn't supported, so here we drop logits ourselves.
            # See https://github.com/huggingface/trl/issues/2770
            # logits = logits[:, -logits_to_keep:]
            # return logits
            # See https://huggingface.co/blog/the_n_implementation_details_of_rlhf_with_ppo#policy-training-implementation-details
            # logits = logits / self.temperature
            # logps = selective_log_softmax(logits, input_ids)

            # row_indices, col_indices = torch.where(logps < -20)

            # # Method 1: Check if tensors have elements
            # if len(row_indices) > 0 and len(col_indices) > 0:
            #     breakpoint()  # Breakpoint triggered here
            #     print("Found high values!")
            # return  logps #  compute logprobs for the input tokens

    function = inspect.getsource(_get_per_token_logps_and_entropies)
    return function


RL_FUNCTIONS["grpo_trainer"].append(grpo_trainer__get_per_token_logps_and_entropies)

grpo_compute_loss = RL_REPLACEMENTS["grpo_compute_loss"]
grpo_compute_loss_slow = RL_REPLACEMENTS["grpo_compute_loss_slow"]
UnslothEfficientGRPO = RL_REPLACEMENTS["UnslothEfficientGRPO"]
grpo_accumulated_loss = RL_REPLACEMENTS["grpo_accumulated_loss"]
grpo_update_SamplingParams = RL_REPLACEMENTS["grpo_update_SamplingParams"]
RL_PRE_ITEMS["grpo_trainer"].append(inspect.getsource(grpo_compute_loss))
RL_PRE_ITEMS["grpo_trainer"].append(inspect.getsource(UnslothEfficientGRPO))
RL_PRE_ITEMS["grpo_trainer"].append(inspect.getsource(grpo_accumulated_loss))
RL_PRE_ITEMS["grpo_trainer"].append(grpo_compute_loss_slow)
RL_PRE_ITEMS["grpo_trainer"].append(inspect.getsource(grpo_update_SamplingParams))
RL_PRE_ITEMS["grpo_trainer"].append(
    inspect.getsource(_get_inference_mode_context_manager)
)


# Edit _get_per_token_logps to handle mixed precision
def grpo_trainer_compute_loss(function_name, function):
    if function_name != "compute_loss":
        return function

    def compute_loss(
        self, model, inputs, return_outputs = False, num_items_in_batch = None
    ):
        if return_outputs:
            raise ValueError("The GRPOTrainer does not support returning outputs")
        # Compute the per-token log probabilities for the model

        prompt_ids, prompt_mask = inputs["prompt_ids"], inputs["prompt_mask"]
        completion_ids, completion_mask = (
            inputs["completion_ids"],
            inputs["completion_mask"],
        )
        pixel_values, image_grid_thw = (
            inputs.get("pixel_values", None),
            inputs.get("image_grid_thw", None),
        )
        pixel_attention_mask, image_sizes = (
            inputs.get("pixel_attention_mask", None),
            inputs.get("image_sizes", None),
        )
        num_items_in_batch = inputs.get("num_items_in_batch", None)
        sampling_per_token_logps = inputs.get("sampling_per_token_logps", None)
        current_gradient_accumulation_steps = self.current_gradient_accumulation_steps
        num_processes = self.accelerator.num_processes

        input_ids = torch.cat([prompt_ids, completion_ids], dim = 1)
        bsz, qlen = input_ids.shape
        attention_mask = torch.cat([prompt_mask, completion_mask], dim = 1)
        # attention_mask = None
        logits_to_keep = completion_ids.size(
            1
        )  # we only need to compute the logits for the completion tokens
        _input_ids = input_ids
        _logits_to_keep = logits_to_keep

        get_logps_func = (
            lambda model,
            input_ids,
            attention_mask,
            logits_to_keep,
            batch_size = None,
            compute_entropy = False,
            compute_efficient = False: self._get_per_token_logps(
                model, input_ids, attention_mask, logits_to_keep, compute_efficient
            )
            if hasattr(self, "_get_per_token_logps")
            else self._get_per_token_logps_and_entropies(
                model,
                input_ids,
                attention_mask,
                logits_to_keep,
                batch_size,
                compute_entropy,
                compute_efficient,
            )[0]
        )  # logps

        per_token_logps = get_logps_func(
            model, input_ids, attention_mask, logits_to_keep, compute_efficient = True
        )
        # Compute the KL divergence between the model and the reference model
        # _prepare_inputs doesn't return reference log probs anymore. We need to calculate it ourselves.
        # https://github.com/huggingface/trl/blob/05bc43e960396581e458195b8388efe6b82cae1f/trl/trainer/grpo_trainer.py#L1328
        # if self.beta != 0.0:
        #     with torch.inference_mode(), model.disable_adapter():
        #         ref_per_token_logps = per_token_logps = get_logps_func(model, input_ids, attention_mask, logits_to_keep)
        # else:
        #     ref_per_token_logps = None
        ref_logps = inputs.get("ref_per_token_logps", None)
        # per_token_kl = torch.exp(ref_per_token_logps - per_token_logps) - (ref_per_token_logps - per_token_logps) - 1
        # x - x.detach() allows for preserving gradients from x
        advantages = inputs["advantages"]
        # per_token_loss = torch.exp(per_token_logps - per_token_logps.detach()) * advantages.unsqueeze(1)
        # per_token_loss = -(per_token_loss - self.beta * per_token_kl)
        # loss = ((per_token_loss * completion_mask).sum(dim=1) / completion_mask.sum(dim=1)).mean()
        old_logps = inputs.get("old_per_token_logps", None)

        input_ids = input_ids[:, -logits_to_keep:]

        # Get logit softcapping and logit scale
        logit_softcapping = getattr(model.config, "final_logit_softcapping", 0)  # Gemma
        if logit_softcapping is None:
            logit_softcapping = 0
        logit_scale_multiply = getattr(model.config, "logit_scale", 0)  # Cohere
        if logit_scale_multiply is None:
            logit_scale_multiply = 0
        logit_scale_divide = getattr(model.config, "logits_scaling", 0)  # Granite
        if logit_scale_divide is None:
            logit_scale_divide = 0

        max_left_pad = inputs.get("max_left_pad", 0)
        if per_token_logps is not None:
            (
                loss,
                completion_length,
                mean_kl,
                delta,
                flat_is_ratio,
                coef_1,
                completion_mask,
            ) = grpo_compute_loss_slow(
                ref_logps,
                per_token_logps,
                old_logps,
                input_ids,
                completion_mask,
                self.beta,
                advantages,
                pixel_values = pixel_values,
                image_grid_thw = image_grid_thw,
                loss_type = self.args.loss_type,
                importance_sampling_level = self.importance_sampling_level,
                epsilon_low = self.epsilon_low,
                epsilon_high = self.epsilon_high,
                max_completion_length = self.args.max_completion_length,
                delta = self.args.delta,
                temperature = self.args.temperature,
                max_left_pad = max_left_pad,
                logit_softcapping = logit_softcapping,
                logit_scale_multiply = logit_scale_multiply,
                logit_scale_divide = logit_scale_divide,
                num_items_in_batch = num_items_in_batch,
                current_gradient_accumulation_steps = current_gradient_accumulation_steps,
                num_processes = num_processes,
                sampling_per_token_logps = sampling_per_token_logps,
            )
        else:
            if hasattr(self.args, "loss_type"):
                (
                    loss,
                    completion_length,
                    mean_kl,
                    delta,
                    flat_is_ratio,
                    coef_1,
                    completion_mask,
                ) = grpo_accumulated_loss(
                    trainer = self,
                    input_ids = _input_ids,
                    pixel_values = pixel_values,
                    image_grid_thw = image_grid_thw,
                    logits_to_keep = logits_to_keep,
                    completion_mask = completion_mask,
                    advantages = advantages,
                    old_logps = old_logps,
                    ref_logps = ref_logps,
                    n_chunks = self.args.unsloth_num_chunks,
                    loss_type = self.args.loss_type,
                    importance_sampling_level = self.importance_sampling_level,
                    epsilon_low = self.epsilon_low,
                    epsilon_high = self.epsilon_high,
                    max_completion_length = self.args.max_completion_length,
                    delta = self.args.delta,
                    temperature = self.args.temperature,
                    max_left_pad = max_left_pad,
                    logit_softcapping = logit_softcapping,
                    logit_scale_multiply = logit_scale_multiply,
                    logit_scale_divide = logit_scale_divide,
                    attention_mask = attention_mask,
                    num_items_in_batch = num_items_in_batch,
                    current_gradient_accumulation_steps = current_gradient_accumulation_steps,
                    num_processes = num_processes,
                    sampling_per_token_logps = sampling_per_token_logps,
                )
            else:
                # to ensure backwards compatibility with trl 0.15.2 and maybe even 0.17
                loss, completion_length, mean_kl, coef_1, completion_mask = (
                    grpo_accumulated_loss(
                        trainer = self,
                        input_ids = _input_ids,
                        logits_to_keep = logits_to_keep,
                        completion_mask = completion_mask,
                        advantages = advantages,
                        old_logps = old_logps,
                        ref_logps = ref_logps,
                        n_chunks = self.args.unsloth_num_chunks,
                        temperature = self.args.temperature,
                        logit_softcapping = logit_softcapping,
                        logit_scale_multiply = logit_scale_multiply,
                        logit_scale_divide = logit_scale_divide,
                        attention_mask = attention_mask,
                    )
                )
        if "train" in self._metrics:
            mode = "eval" if self.control.should_evaluate else "train"
            self._metrics[mode]["completion_length"].append(completion_length.item())
            self._metrics[mode]["kl"].append(mean_kl.item())
        else:
            self._metrics["completion_length"].append(completion_length.item())
            self._metrics["kl"].append(mean_kl.item())

        if (
            self.use_vllm
            and delta is not None
            and getattr(self, "vllm_importance_sampling_correction", False)
        ):
            mean_delta = (
                torch.mean(delta)
                if delta.numel() > 0
                else torch.tensor(0.0, device = self.model.device)
            )
            max_delta = (
                torch.max(delta)
                if delta.numel() > 0
                else torch.tensor(0.0, device = self.model.device)
            )
            self._metrics[mode]["sampling/sampling_logp_difference/mean"].append(
                self.accelerator.gather(mean_delta).mean().item()
            )
            self._metrics[mode]["sampling/sampling_logp_difference/max"].append(
                self.accelerator.gather(max_delta).max().item()
            )

            min_importance_sampling_ratio = (
                torch.min(flat_is_ratio)
                if flat_is_ratio.numel() > 0
                else torch.tensor(0.0, device = self.model.device)
            )
            mean_importance_sampling_ratio = (
                torch.mean(flat_is_ratio)
                if flat_is_ratio.numel() > 0
                else torch.tensor(0.0, device = self.model.device)
            )
            max_importance_sampling_ratio = (
                torch.max(flat_is_ratio)
                if flat_is_ratio.numel() > 0
                else torch.tensor(0.0, device = self.model.device)
            )
            self._metrics[mode]["sampling/importance_sampling_ratio/min"].append(
                self.accelerator.gather(min_importance_sampling_ratio)
                .nan_to_num(nan = float("inf"))
                .min()
                .item()
            )
            self._metrics[mode]["sampling/importance_sampling_ratio/mean"].append(
                self.accelerator.gather(mean_importance_sampling_ratio).nanmean().item()
            )
            self._metrics[mode]["sampling/importance_sampling_ratio/max"].append(
                self.accelerator.gather(max_importance_sampling_ratio)
                .nan_to_num(nan = float("-inf"))
                .max()
                .item()
            )

        completion_token_count = completion_mask.sum().clamp(min = 1.0)

        def masked_batch_mean(x):
            if x.shape[1] == 1:  # when importance_sampling_level == "sequence"
                return x.mean()
            else:
                return (x * completion_mask).sum() / completion_token_count

        if advantages.dim() == 1:
            advantages = advantages.unsqueeze(1)

        if self.loss_type in ["grpo", "bnpo", "dr_grpo", "dapo"]:
            # Compute the clipped probability ratios
            is_low_clipped = (coef_1 < 1 - self.epsilon_low) & (advantages < 0)
            is_high_clipped = (coef_1 > 1 + self.epsilon_high) & (advantages > 0)
            is_region_clipped = is_low_clipped | is_high_clipped

            low_clip = masked_batch_mean(is_low_clipped.float())
            high_clip = masked_batch_mean(is_high_clipped.float())
            clip_ratio = masked_batch_mean(is_region_clipped.float())

            gathered_low_clip = self.accelerator.gather(low_clip)
            self._metrics[mode]["clip_ratio/low_mean"].append(
                gathered_low_clip.nanmean().item()
            )
            self._metrics[mode]["clip_ratio/low_min"].append(
                nanmin(gathered_low_clip).item()
            )
            gathered_high_clip = self.accelerator.gather(high_clip)
            self._metrics[mode]["clip_ratio/high_mean"].append(
                gathered_high_clip.nanmean().item()
            )
            self._metrics[mode]["clip_ratio/high_max"].append(
                nanmax(gathered_high_clip).item()
            )
            gathered_clip_ratio = self.accelerator.gather(clip_ratio)
            self._metrics[mode]["clip_ratio/region_mean"].append(
                gathered_clip_ratio.nanmean().item()
            )
        elif self.loss_type == "cispo":
            is_cispo_clipped = (coef_1 > self.epsilon_high) & (advantages > 0)
            cispo_clip_ratio = masked_batch_mean(is_cispo_clipped.float())
            gathered_cispo_clip_ratio = self.accelerator.gather(cispo_clip_ratio)
            self._metrics[mode]["cispo_clip_ratio"].append(
                gathered_cispo_clip_ratio.nanmean().item()
            )

        return loss

    function = inspect.getsource(compute_loss)
    return function


RL_FUNCTIONS["grpo_trainer"].append(grpo_trainer_compute_loss)


# Fix KTO shape mismatch when Unsloth model forward truncates input_ids
# but labels aren't truncated. TRL 0.27.2+ _process_tokens only truncates
# completions, not prompts -- so prompts exceeding max_seq_length cause the
# model to produce shorter logits than the labels expect.
def kto_trainer_get_batch_logps(function_name, function):
    if function_name != "get_batch_logps":
        return function
    # The raise is inside an if block inside the method, so we need
    # to preserve the exact indentation of the raise statement.
    old = 'raise ValueError("Logits (batch and sequence length dim) and labels must have the same shape.")'
    new = (
        "# Unsloth: auto-truncate to shorter sequence length (model may have truncated input_ids)\n"
        "            _min_len = min(logits.shape[1], labels.shape[1])\n"
        "            logits = logits[:, :_min_len, :]\n"
        "            labels = labels[:, :_min_len]"
    )
    function = function.replace(old, new)
    return function


RL_FUNCTIONS["kto_trainer"].append(kto_trainer_get_batch_logps)


# https://github.com/huggingface/trl/blob/main/trl/trainer/grpo_trainer.py#L356
# TRL warns if batch size is not a multiple of num_generations -> fix this.
def grpo_trainer_fix_batch_size(RLTrainer_source, RLConfig_source):
    if "divisible by the number of generations" not in RLTrainer_source:
        # in later trl versions this doesn't exist anymore
        return ""
    if "num_generations" not in RLConfig_source:
        return ""

    check_batch_size = (
        "div = per_device_train_batch_size // num_generations\n"
        "if div * num_generations != per_device_train_batch_size:\n"
        "    print('Unsloth: We now expect `per_device_train_batch_size` to be a multiple of `num_generations`.\\n"
        "We will change the batch size of ' + str(per_device_train_batch_size) + ' to the `num_generations` of ' + str(num_generations))\n"
        "    per_device_train_batch_size = num_generations\n"
    )
    return check_batch_size


RL_CONFIG_CHANGES["grpo_trainer"].append(grpo_trainer_fix_batch_size)


# Add other reward function names
def grpo_trainer_metrics(RLTrainer_source, RLConfig_source):
    if "reward_funcs" not in RLTrainer_source:
        return ""

    # For new TRL we have /mean and /std
    use_mean = "rewards/{reward_func_name}/mean" in RLTrainer_source
    use_std = "rewards/{reward_func_name}/std" in RLTrainer_source
    if not use_mean:
        use_normal = "rewards/{reward_func_name}" in RLTrainer_source
    else:
        use_normal = False

    log_metrics = (
        "if not isinstance(reward_funcs, list): _reward_funcs = [reward_funcs]\n"
        "else: _reward_funcs = reward_funcs\n"
        "for reward_func in _reward_funcs:\n"
        "    try:\n"
        "        reward_func_name = reward_func.__name__\n"
        f"        if {use_mean}:\n"
        "            other_metrics.append(f'rewards/{reward_func_name}/mean')\n"
        f"        if {use_std}:\n"
        "            other_metrics.append(f'rewards/{reward_func_name}/std')\n"
        f"        if {use_normal}:\n"
        "            other_metrics.append(f'rewards/{reward_func_name}')\n"
        "    except: pass\n"
    )
    return log_metrics


RL_METRICS_CHANGES["grpo_trainer"].append(grpo_trainer_metrics)


def openenv_vllm_reload_weights():
    # This function patches the trl openenv generate_rollout_completions function to:
    # 1. Remove the reload_weights call (unsloth handles weight reloading)
    # 2. Fix wake_up call to be compatible with unsloth (remove tags to wake everything)
    #
    # The issue: TRL's wake_up(tags=["kv_cache"]) only wakes kv_cache, leaving is_sleeping=True
    # at the executor level. This causes unsloth's patched generate to try waking up again,
    # resulting in double create_and_map on already-mapped handles.
    #
    # The fix: Use wake_up() with no tags, which wakes everything. Unsloth's patched
    # CuMemAllocator.wake_up skips weights anyway, so this is safe.
    if importlib.util.find_spec("trl") is None:
        return
    if Version(importlib_version("trl")) < Version("0.26.0"):
        return

    try:
        import trl.experimental.openenv.utils as openenv_utils
        import trl.experimental.openenv as openenv
    except (ImportError, NameError, Exception) as e:
        logger.info(f"Unsloth: Failed to import trl openenv: {e}")
        logger.info(
            "Unsloth: trl.experimental.openenv not available — skipping RL openenv patches."
        )
        return

    # trl 0.28 changed the function name yet again! Thanks trl :)
    patch_target_name = "_generate_rollout_completions_colocate"
    if hasattr(openenv_utils, patch_target_name):
        patch_target = getattr(openenv_utils, patch_target_name)
    else:
        # Older TRL versions may keep sleep/wake logic in the public dispatcher.
        patch_target_name = "generate_rollout_completions"
        patch_target = getattr(openenv_utils, patch_target_name)

    src = inspect.getsource(patch_target)
    src = textwrap.dedent(src)
    original_src = src

    # Remove the reload_weights call - unsloth handles this differently
    src = re.sub(r'.*\.collective_rpc\(\s*([\'"])reload_weights\1\s*\).*\n?', "", src)

    # Change wake_up(tags=["kv_cache"]) to wake_up() - wake everything to set is_sleeping=False
    # This prevents double wake_up issues. Unsloth's allocator skips weights anyway.
    src = re.sub(r"\.wake_up\(tags=\[.*?\]\)", ".wake_up()", src)

    if original_src == src:
        logger.warning("Unsloth: Warning - regex did not match, patch may have failed")
        return

    # Execute and explicitly assign to module
    local_ns = {}
    exec(compile(src, "<unsloth>", "exec"), openenv_utils.__dict__, local_ns)
    patched_func = local_ns[patch_target_name]

    # Patch the target function in utils; if dispatcher was patched also update parent module alias.
    setattr(openenv_utils, patch_target_name, patched_func)
    if patch_target_name == "generate_rollout_completions":
        openenv.generate_rollout_completions = patched_func
    logger.info(f"Unsloth: Patched trl openenv {patch_target_name}")


RL_ADDITIONAL_FUNCTIONS["openenv"].append(openenv_vllm_reload_weights)


def vllm_generation_init_patch():
    # trl moved vllm stuff to trl/generation/vllm_generation.py
    # We need to patch it to not instantiate another vLLM instance if we already have one with fast_inference
    # Edit the TRL source directly and install the patched function in the TRL module.
    # https://github.com/huggingface/trl/commit/0eb66d8f2fc63b3d00d8dbc18f99c3f48750bd16
    # This exists in trl versions 0.28.0 and above

    if importlib.util.find_spec("trl") is None:
        return
    if Version(importlib_version("trl")) < Version("0.28.0"):
        return

    try:
        import trl.generation.vllm_generation as vllm_generation
    except (ImportError, NameError, Exception) as e:
        logger.info(f"Unsloth: Failed to import trl.generation.vllm_generation: {e}")
        return

    def patch_vllm_generation_method(method_name, transform, marker, filename_suffix):
        method = getattr(vllm_generation.VLLMGeneration, method_name, None)
        if method is None:
            logger.info(f"Unsloth: Could not find VLLMGeneration.{method_name}")
            return False

        try:
            src = inspect.getsource(method)
        except Exception as e:
            logger.info(
                f"Unsloth: Could not get source of VLLMGeneration.{method_name}: {e}"
            )
            return False

        src = textwrap.dedent(src)
        if marker in src:
            return True

        src = transform(src)
        filename = f"<unsloth_trl_vllm_generation_{filename_suffix}_patch>"
        source_lines = [line + "\n" for line in src.splitlines()]
        linecache.cache[filename] = (
            len(src),
            None,
            source_lines,
            filename,
        )

        local_ns = {}
        exec(compile(src, filename, "exec"), vllm_generation.__dict__, local_ns)
        setattr(vllm_generation.VLLMGeneration, method_name, local_ns[method_name])
        return True

    # Patch init to remove vLLM.LLM instantiation
    def patch_init_vllm(src):
        pattern = re.compile(
            r"(?P<llm_block>^(?P<indent>[ \t]*)self\.llm\s*=\s*LLM\s*\(\n(?:.*\n)*?^(?P=indent)\))",
            re.MULTILINE,
        )

        def replace_llm_block(match):
            indent = match.group("indent")
            llm_block = textwrap.dedent(match.group("llm_block"))
            return (
                f"{indent}if hasattr(model, 'vllm_engine'):\n"
                f"{indent}    # Unsloth already inits vLLM in fast inference mode. Do not redo :)\n"
                f"{indent}    self.llm = model.vllm_engine\n"
                f"{indent}    self.unsloth_fast_inference_lora = True\n"
                f"{indent}else:\n" + textwrap.indent(llm_block, indent + "    ")
            )

        patched_src, num_replacements = pattern.subn(replace_llm_block, src, count = 1)
        if num_replacements == 0:
            raise RuntimeError(
                "Unsloth: Warning - regex did not match, VLLMGeneration._init_vllm patch may have failed"
            )
        return patched_src

    # has some sync_weights or reload rpc calls.
    # we patched the grpo_trainer to strip them for prev versions
    # Ref: grpo_trainer__generate_single_turn above around L270-280
    def patch_sync_weights(src):
        pattern = re.compile(
            r"^(?P<def_line>def sync_weights\(self\):\n)(?P<body>(?:.*\n)*)",
            re.MULTILINE,
        )

        def replace_sync_weights(match):
            body = match.group("body")
            guard = (
                "    if getattr(self, 'unsloth_fast_inference_lora', False):\n"
                "        # Unsloth fast inference LoRA shares weights with vLLM already.\n"
                "        return\n\n"
            )
            return match.group("def_line") + guard + body

        patched_src, num_replacements = pattern.subn(replace_sync_weights, src, count = 1)
        if num_replacements == 0:
            raise RuntimeError(
                "Unsloth: Warning - regex did not match, VLLMGeneration.sync_weights patch may have failed"
            )
        return patched_src

    def patch_generate(src):
        pattern = re.compile(
            r"^(?P<indent>[ \t]*)self\.llm\.collective_rpc\(\s*(['\"])reload_weights\2\s*\)\s*$",
            re.MULTILINE,
        )

        def replace_reload_weights(match):
            indent = match.group("indent")
            return f'{indent}pass  # self.llm.collective_rpc("reload_weights")'

        patched_src, num_replacements = pattern.subn(
            replace_reload_weights, src, count = 1
        )
        if num_replacements == 0:
            raise RuntimeError(
                "Unsloth: Warning - regex did not match, VLLMGeneration.generate patch may have failed"
            )
        return patched_src

    try:
        init_patched = patch_vllm_generation_method(
            "_init_vllm",
            patch_init_vllm,
            "self.unsloth_fast_inference_lora = True",
            "init_vllm",
        )
        sync_patched = patch_vllm_generation_method(
            "sync_weights",
            patch_sync_weights,
            "if getattr(self, 'unsloth_fast_inference_lora', False):",
            "sync_weights",
        )
        generate_patched = patch_vllm_generation_method(
            "generate",
            patch_generate,
            'pass  # self.llm.collective_rpc("reload_weights")',
            "generate",
        )
    except RuntimeError as e:
        logger.warning(str(e))
        return

    if init_patched:
        logger.info("Unsloth: Patched trl VLLMGeneration._init_vllm")
    if sync_patched:
        logger.info("Unsloth: Patched trl VLLMGeneration.sync_weights")
    if generate_patched:
        logger.info("Unsloth: Patched trl VLLMGeneration.generate")


RL_ADDITIONAL_FUNCTIONS["vllm_generation"].append(vllm_generation_init_patch)


================================================
FILE: unsloth/models/sentence_transformer.py
================================================
# Copyright 2025 electroglyph. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging

from .loader import FastModel, DISABLE_SDPA_MODEL_NAMES
from ._utils import SUPPORTS_BFLOAT16
import inspect
import json
import os
import types
from huggingface_hub import hf_hub_download
from typing import Optional
import torch
from transformers.modeling_outputs import BaseModelOutput
from collections import OrderedDict
from transformers.models.distilbert import modeling_distilbert
from transformers.modeling_attn_mask_utils import _prepare_4d_attention_mask_for_sdpa
import transformers
from packaging.version import Version
import re
from transformers import AutoModel, AutoConfig
from transformers.models.auto.auto_factory import _get_model_class
import tempfile
from huggingface_hub import HfApi, get_token
from ..save import unsloth_save_pretrained_torchao, unsloth_save_pretrained_gguf
import contextlib
import shutil


def _save_pretrained_torchao(
    self,
    save_directory,
    tokenizer = None,
    torchao_config = None,
    push_to_hub = False,
    token = None,
):
    self.save_pretrained(save_directory)

    # grab inner model
    inner_model = self[0].auto_model
    if hasattr(inner_model, "_orig_mod"):
        inner_model = inner_model._orig_mod

    # merge LoRA first
    if hasattr(inner_model, "merge_and_unload"):
        inner_model = inner_model.merge_and_unload()

    # confirm Transformer path
    transformer_path = "0_Transformer"
    modules_path = os.path.join(save_directory, "modules.json")
    if os.path.exists(modules_path):
        try:
            with open(modules_path, "r") as f:
                modules = json.load(f)
            for m in modules:
                if m.get("type", "").endswith("Transformer"):
                    transformer_path = m.get("path", "")
                    break
        except:
            pass

    transformer_dir = os.path.join(save_directory, transformer_path)
    transformer_dir = os.path.abspath(transformer_dir)

    if tokenizer is None:
        tokenizer = self.tokenizer

    @contextlib.contextmanager
    def patch_unsloth_save():
        original_causal = transformers.AutoModelForCausalLM
        original_rmtree = shutil.rmtree
        # unsloth_save_pretrained_torchao expects AutoModelForCausalLM
        transformers.AutoModelForCausalLM = transformers.AutoModel
        # prevent unsloth from deleting the unquantized model directory
        shutil.rmtree = lambda *args, **kwargs: None
        try:
            yield
        finally:
            # unpatch
            transformers.AutoModelForCausalLM = original_causal
            shutil.rmtree = original_rmtree

    with patch_unsloth_save():
        unsloth_save_pretrained_torchao(
            inner_model,
            transformer_dir,
            tokenizer = tokenizer,
            torchao_config = torchao_config,
            push_to_hub = push_to_hub,
            token = token,
        )

    # avoid `0_Transformer-torchao`, it was either this or fix modules.json
    torchao_dir = transformer_dir + "-torchao"
    if os.path.exists(torchao_dir):
        if not os.path.exists(transformer_dir):
            os.makedirs(transformer_dir, exist_ok = True)

        # move contents
        for item in os.listdir(torchao_dir):
            s = os.path.join(torchao_dir, item)
            d = os.path.join(transformer_dir, item)
            if os.path.isdir(s):
                shutil.copytree(s, d, dirs_exist_ok = True)
            else:
                shutil.copy2(s, d)

        # remove torchao dir
        shutil.rmtree(torchao_dir)

        # remove conflicting safetensors if we brought in bin
        if os.path.exists(os.path.join(transformer_dir, "pytorch_model.bin")):
            safetensors_path = os.path.join(transformer_dir, "model.safetensors")
            if os.path.exists(safetensors_path):
                try:
                    os.remove(safetensors_path)
                except:
                    pass

    try:
        FastSentenceTransformer._add_unsloth_branding(save_directory)
    except:
        pass


# Thanks Etherl:
def _save_pretrained_gguf(
    self,
    save_directory,
    tokenizer = None,
    quantization_method = "fast_quantized",
    first_conversion = None,
    push_to_hub = False,
    token = None,
    max_shard_size = "5GB",
    temporary_location = "_unsloth_temporary_saved_buffers",
    maximum_memory_usage = 0.85,
    **kwargs,
):
    """
    Saves the SentenceTransformer model to GGUF format by saving the inner transformer model,
    converting it, and placing the resulting GGUF files in the save directory.
    """
    # 1. Save standard SentenceTransformer structure (configs, modules.json, etc.)
    self.save_pretrained(save_directory)

    # 2. Extract inner transformer model
    inner_model = self[0].auto_model
    if hasattr(inner_model, "_orig_mod"):
        inner_model = inner_model._orig_mod

    # If it's a PEFT model, unsloth_save_pretrained_gguf handles merging,
    # but we pass the inner model wrapper.

    # 3. Identify where the transformer weights are stored
    transformer_path = "0_Transformer"
    modules_path = os.path.join(save_directory, "modules.json")
    if os.path.exists(modules_path):
        try:
            with open(modules_path, "r") as f:
                modules = json.load(f)
            for m in modules:
                if m.get("type", "").endswith("Transformer"):
                    transformer_path = m.get("path", "")
                    break
        except:
            pass

    # This is where Unsloth will perform the save + conversion operations
    transformer_dir = os.path.join(save_directory, transformer_path)
    # Ensure this path is absolute for consistent comparison later
    transformer_dir = os.path.abspath(transformer_dir)

    if tokenizer is None:
        tokenizer = self.tokenizer

    # 4. Patch environment to ensure Unsloth treats this embedding model correctly
    @contextlib.contextmanager
    def patch_unsloth_gguf_save():
        # Prevent deletion of the directory we just created via self.save_pretrained
        original_rmtree = shutil.rmtree
        try:
            yield
        finally:
            shutil.rmtree = original_rmtree

    # 5. Call Unsloth's GGUF saver on the inner model targeting the transformer subdirectory
    with patch_unsloth_gguf_save():
        result = unsloth_save_pretrained_gguf(
            inner_model,
            save_directory = transformer_dir,
            tokenizer = tokenizer,
            quantization_method = quantization_method,
            first_conversion = first_conversion,
            push_to_hub = False,  # Force local first to move files
            token = token,
            max_shard_size = max_shard_size,
            temporary_location = temporary_location,
            maximum_memory_usage = maximum_memory_usage,
        )

    # 6. Move GGUF files from the subdirectory (0_Transformer) to the root save_directory
    gguf_files = result.get("gguf_files", [])

    new_gguf_locations = []

    for gguf_file in gguf_files:
        if os.path.exists(gguf_file):
            filename = os.path.basename(gguf_file)
            dest_path = os.path.join(save_directory, filename)

            # Convert to absolute path to avoid mixing relative/absolute in commonpath
            abs_gguf_file = os.path.abspath(gguf_file)

            # Check if file is inside transformer_dir (subpath)
            try:
                is_subpath = (
                    os.path.commonpath([abs_gguf_file, transformer_dir])
                    == transformer_dir
                )
            except ValueError:
                # Can happen on Windows with different drives, or mix of absolute/relative (handled by abspath above)
                is_subpath = False

            if is_subpath:
                # If the GGUF file is inside the transformer_dir, move it out to root
                shutil.move(gguf_file, dest_path)
                new_gguf_locations.append(dest_path)
            else:
                # If it's elsewhere, move it to root if not already there
                if os.path.abspath(dest_path) != abs_gguf_file:
                    shutil.move(gguf_file, dest_path)
                new_gguf_locations.append(dest_path)

    # Update result with new locations
    result["gguf_files"] = new_gguf_locations

    # 7. Add branding
    try:
        FastSentenceTransformer._add_unsloth_branding(save_directory)

        # Add GGUF details to README
        readme_path = os.path.join(save_directory, "README.md")
        if os.path.exists(readme_path):
            with open(readme_path, "a", encoding = "utf-8") as f:
                f.write("\n## GGUF Quantization\n")
                f.write(
                    f"This model contains GGUF quantized versions in: {', '.join([os.path.basename(f) for f in new_gguf_locations])}\n"
                )
    except:
        pass

    # 8. Handle Push to Hub if requested
    if push_to_hub:
        if token is None:
            token = get_token()

        api = HfApi(token = token)
        repo_id = save_directory  # Assuming save_directory is the repo name if pushing

        print(f"Unsloth: Uploading to {repo_id}...")
        try:
            api.create_repo(
                repo_id = repo_id, exist_ok = True, private = kwargs.get("private", False)
            )
            api.upload_folder(
                folder_path = save_directory,
                repo_id = repo_id,
                commit_message = "Upload GGUF and SentenceTransformer model",
            )
            print(f"Unsloth: Uploaded to https://huggingface.co/{repo_id}")
        except Exception as e:
            print(f"Unsloth: Upload failed: {e}")

    return result


def _push_to_hub_gguf(
    self,
    repo_id,
    tokenizer = None,
    quantization_method = "fast_quantized",
    first_conversion = None,
    token = None,
    private = None,
    commit_message = "Upload GGUF SentenceTransformer model trained with Unsloth",
    commit_description = "Upload GGUF model trained with Unsloth 2x faster",
    max_shard_size = "5GB",
    temporary_location = "_unsloth_temporary_saved_buffers",
    maximum_memory_usage = 0.85,
    create_pr = False,
    revision = None,
    tags = None,
    **kwargs,
):
    """
    Converts the SentenceTransformer model to GGUF format and pushes to the Hugging Face Hub.

    This method:
    1. Saves the model locally to a temporary directory in GGUF format.
    2. Uploads the GGUF files, config, Ollama Modelfile, and README to the Hub.
    3. Cleans up the temporary directory.

    Args:
        repo_id (str): The Hugging Face Hub repo ID (e.g., "username/model-name").
        tokenizer: The tokenizer to save. Defaults to `self.tokenizer`.
        quantization_method (str or list): GGUF quantization method(s). Can be a string or list of strings.
            Choose from the following options:
            * "not_quantized"  : Recommended. Fast conversion. Slow inference, big files.
            * "fast_quantized" : Recommended. Fast conversion. OK inference, OK file size.
            * "quantized"      : Recommended. Slow conversion. Fast inference, small files.
            * "f32"     : Not recommended. Retains 100% accuracy, but super slow and memory hungry.
            * "f16"     : Fastest conversion + retains 100% accuracy. Slow and memory hungry.
            * "q8_0"    : Fast conversion. High resource use, but generally acceptable.
            * "q4_k_m"  : Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_K
            * "q5_k_m"  : Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_K
            * "q2_k"    : Uses Q4_K for the attention.vw and feed_forward.w2 tensors, Q2_K for the other tensors.
            * "q3_k_l"  : Uses Q5_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K
            * "q3_k_m"  : Uses Q4_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K
            * "q3_k_s"  : Uses Q3_K for all tensors
            * "q4_0"    : Original quant method, 4-bit.
            * "q4_1"    : Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
            * "q4_k_s"  : Uses Q4_K for all tensors
            * "q5_0"    : Higher accuracy, higher resource usage and slower inference.
            * "q5_1"    : Even higher accuracy, resource usage and slower inference.
            * "q5_k_s"  : Uses Q5_K for all tensors
            * "q6_k"    : Uses Q8_K for all tensors
        first_conversion (str, optional): The initial conversion format before quantization.
        token (str, optional): Hugging Face token. Uses cached token if not provided.
        private (bool, optional): Whether the repo should be private.
        commit_message (str): Commit message for the upload.
        commit_description (str): Commit description for the upload.
        max_shard_size (str): Maximum shard size for saving.
        temporary_location (str): Temp directory for intermediate files.
        maximum_memory_usage (float): Max fraction of memory to use.
        create_pr (bool): Whether to create a pull request instead of pushing directly.
        revision (str, optional): Branch/revision to push to.
        tags (list, optional): Additional tags for the repo.

    Returns:
        str: The full repo ID on Hugging Face Hub.
    """
    if token is None:
        token = get_token()
    if token is None:
        raise ValueError(
            "No HF token provided. Please provide a token or login with `huggingface-cli login`"
        )

    api = HfApi(token = token)

    # Determine full repo_id
    if "/" not in repo_id:
        username = api.whoami()["name"]
        full_repo_id = f"{username}/{repo_id}"
    else:
        full_repo_id = repo_id

    model_name = full_repo_id.split("/")[-1]

    # Create repo
    try:
        api.create_repo(
            repo_id = full_repo_id,
            private = private,
            exist_ok = True,
            repo_type = "model",
        )
    except Exception as e:
        print(f"Unsloth Warning: Could not create repo: {e}")

    # Save to temporary directory first
    with tempfile.TemporaryDirectory(prefix = "unsloth_st_gguf_") as temp_dir:
        print(f"Unsloth: Converting SentenceTransformer to GGUF format...")

        # Call save_pretrained_gguf to do the local conversion
        result = _save_pretrained_gguf(
            self,
            save_directory = temp_dir,
            tokenizer = tokenizer,
            quantization_method = quantization_method,
            first_conversion = first_conversion,
            push_to_hub = False,  # We handle upload ourselves
            token = token,
            max_shard_size = max_shard_size,
            temporary_location = temporary_location,
            maximum_memory_usage = maximum_memory_usage,
        )

        gguf_files = result.get("gguf_files", [])
        modelfile_location = result.get("modelfile_location", None)
        is_vlm = result.get("is_vlm", False)
        fix_bos_token = result.get("fix_bos_token", False)

        print(f"Unsloth: Uploading GGUF to https://huggingface.co/{full_repo_id}...")

        # Upload GGUF files
        for file_location in gguf_files:
            if os.path.exists(file_location):
                filename = os.path.basename(file_location)
                print(f"  Uploading {filename}...")
                api.upload_file(
                    path_or_fileobj = file_location,
                    path_in_repo = filename,
                    repo_id = full_repo_id,
                    repo_type = "model",
                    commit_message = commit_message,
                    commit_description = commit_description,
                    create_pr = create_pr,
                    revision = revision,
                )

        # Upload Modelfile if exists
        if modelfile_location and os.path.exists(modelfile_location):
            print("  Uploading Ollama Modelfile...")
            api.upload_file(
                path_or_fileobj = modelfile_location,
                path_in_repo = "Modelfile",
                repo_id = full_repo_id,
                repo_type = "model",
                commit_message = f"{commit_message} - Ollama Modelfile",
                create_pr = create_pr,
                revision = revision,
            )

        # Upload config.json if exists
        config_path = os.path.join(temp_dir, "config.json")
        if os.path.exists(config_path):
            print("  Uploading config.json...")
            api.upload_file(
                path_or_fileobj = config_path,
                path_in_repo = "config.json",
                repo_id = full_repo_id,
                repo_type = "model",
                commit_message = f"{commit_message} - config",
                create_pr = create_pr,
                revision = revision,
            )

        # Create and upload README
        gguf_basenames = [os.path.basename(f) for f in gguf_files if os.path.exists(f)]
        readme_content = f"""---
tags:
- gguf
- llama.cpp
- unsloth
- sentence-transformers
{"- vision-language-model" if is_vlm else ""}
---

# {model_name} - GGUF

This sentence-transformers model was finetuned and converted to GGUF format using [Unsloth](https://github.com/unslothai/unsloth).

## Available Model files:
"""
        for fname in gguf_basenames:
            readme_content += f"- `{fname}`\n"

        if modelfile_location and os.path.exists(modelfile_location):
            readme_content += "\n## Ollama\n"
            readme_content += "An Ollama Modelfile is included for easy deployment.\n"

        if fix_bos_token:
            readme_content += "\n## Note\n"
            readme_content += (
                "The model's BOS token behavior was adjusted for GGUF compatibility.\n"
            )

        readme_content += (
            "\nThis was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth)\n"
            '[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)\n'
        )

        readme_path = os.path.join(temp_dir, "README.md")
        with open(readme_path, "w", encoding = "utf-8") as f:
            f.write(readme_content)

        api.upload_file(
            path_or_fileobj = readme_path,
            path_in_repo = "README.md",
            repo_id = full_repo_id,
            repo_type = "model",
            commit_message = "Add README",
            create_pr = create_pr,
            revision = revision,
        )

    # Add tags
    all_tags = ["gguf", "llama-cpp", "unsloth", "sentence-transformers"]
    if is_vlm:
        all_tags.append("vision-language-model")
    if tags is not None:
        if isinstance(tags, (list, tuple)):
            all_tags.extend(tags)
        else:
            all_tags.append(tags)
    try:
        api.add_tags(repo_id = full_repo_id, tags = all_tags, repo_type = "model")
    except:
        pass

    print(
        f"Unsloth: Successfully uploaded GGUF to https://huggingface.co/{full_repo_id}"
    )
    return full_repo_id


class FastSentenceTransformer(FastModel):
    @staticmethod
    def _read_pooling_mode(model_name, token):
        """
        Read the pooling mode from the modules.json file if it exists, otherwise return "mean".
        """
        try:
            if os.path.exists(model_name) and os.path.exists(
                os.path.join(model_name, "modules.json")
            ):
                modules_json_path = os.path.join(model_name, "modules.json")
            else:
                modules_json_path = hf_hub_download(
                    model_name, "modules.json", token = token
                )

            with open(modules_json_path, "r") as f:
                modules_config = json.load(f)

            pooling_config_path = None
            for module in modules_config:
                if module.get("type", "") == "sentence_transformers.models.Pooling":
                    pooling_path = module.get("path", "")
                    if pooling_path:
                        # try to find config.json for pooling module
                        if os.path.exists(model_name) and os.path.exists(
                            os.path.join(model_name, pooling_path, "config.json")
                        ):
                            pooling_config_path = os.path.join(
                                model_name, pooling_path, "config.json"
                            )
                        else:
                            pooling_config_path = hf_hub_download(
                                model_name,
                                os.path.join(pooling_path, "config.json"),
                                token = token,
                            )
                        break

            if pooling_config_path:
                with open(pooling_config_path, "r") as f:
                    pooling_config = json.load(f)
                    # from here:
                    # https://github.com/huggingface/sentence-transformers/blob/main/sentence_transformers/models/Pooling.py#L43
                    pooling_map = {
                        "pooling_mode_cls_token": "cls",
                        "pooling_mode_mean_tokens": "mean",
                        "pooling_mode_max_tokens": "max",
                        "pooling_mode_mean_sqrt_len_tokens": "mean_sqrt_len",
                        "pooling_mode_weightedmean_tokens": "weightedmean",
                        "pooling_mode_lasttoken": "lasttoken",
                    }
                    for config_key, mode in pooling_map.items():
                        if pooling_config.get(config_key):
                            if mode != "mean":
                                print(f"Pooling mode detected as {mode}, updating...")
                            return mode

        except Exception as e:
            print(
                f"Failed to detect pooling mode, not a sentence-transformers model. Using default pooling mode 'mean', this may or may not work."
            )
            return "mean"

    # should prolly be done upstream instead of this hackfest here
    @staticmethod
    def _patch_mpnet_v4():
        """
        Patch the MPNetModel to support gradient checkpointing.
        Supports transformers 4.
        """
        from transformers.models.mpnet import modeling_mpnet

        # add supports_gradient_checkpointing flag
        modeling_mpnet.MPNetModel.supports_gradient_checkpointing = True

        # add _set_gradient_checkpointing method
        def _set_gradient_checkpointing(self, module = None, value = True):
            if module is None:
                module = self.encoder
            if isinstance(module, modeling_mpnet.MPNetEncoder):
                module.gradient_checkpointing = value

        modeling_mpnet.MPNetModel._set_gradient_checkpointing = (
            _set_gradient_checkpointing
        )

        # patch MPNetEncoder.forward to support checkpointing
        # based on:
        # https://github.com/huggingface/transformers/blob/v4.57.3/src/transformers/models/mpnet/modeling_mpnet.py#L321
        def forward(
            self,
            hidden_states: torch.Tensor,
            attention_mask: Optional[torch.Tensor] = None,
            head_mask: Optional[torch.Tensor] = None,
            output_attentions: bool = False,
            output_hidden_states: bool = False,
            return_dict: bool = False,
            **kwargs,
        ):
            position_bias = self.compute_position_bias(hidden_states)
            all_hidden_states = () if output_hidden_states else None
            all_attentions = () if output_attentions else None

            for i, layer_module in enumerate(self.layer):
                if output_hidden_states:
                    all_hidden_states = all_hidden_states + (hidden_states,)

                # do gradient checkpointing if enabled and training
                if getattr(self, "gradient_checkpointing", False) and self.training:

                    def create_custom_forward(module):
                        # bog standard checkpoint
                        def custom_forward(*inputs):
                            return module(*inputs, output_attentions = output_attentions)

                        return custom_forward

                    layer_outputs = torch.utils.checkpoint.checkpoint(
                        create_custom_forward(layer_module),
                        hidden_states,
                        attention_mask,
                        head_mask[i] if head_mask is not None else None,
                        position_bias,
                        use_reentrant = True,  # fix for torch 2.9
                    )
                else:
                    # original code from here on
                    layer_outputs = layer_module(
                        hidden_states,
                        attention_mask,
                        head_mask[i] if head_mask is not None else None,
                        position_bias,
                        output_attentions = output_attentions,
                        **kwargs,
                    )

                hidden_states = layer_outputs[0]

                if output_attentions:
                    all_attentions = all_attentions + (layer_outputs[1],)

            if output_hidden_states:
                all_hidden_states = all_hidden_states + (hidden_states,)

            if not return_dict:
                return tuple(
                    v
                    for v in [hidden_states, all_hidden_states, all_attentions]
                    if v is not None
                )
            return BaseModelOutput(
                last_hidden_state = hidden_states,
                hidden_states = all_hidden_states,
                attentions = all_attentions,
            )

        # assign the patched forward
        modeling_mpnet.MPNetEncoder.forward = forward

    @staticmethod
    def _patch_mpnet_v5():
        """
        Patch the MPNetModel to support gradient checkpointing.
        Supports transformers 5.
        """
        from transformers.models.mpnet import modeling_mpnet

        # add supports_gradient_checkpointing flag
        modeling_mpnet.MPNetModel.supports_gradient_checkpointing = True

        # add _set_gradient_checkpointing method
        def _set_gradient_checkpointing(self, module = None, value = True):
            if module is None:
                module = self.encoder
            if isinstance(module, modeling_mpnet.MPNetEncoder):
                module.gradient_checkpointing = value

        modeling_mpnet.MPNetModel._set_gradient_checkpointing = (
            _set_gradient_checkpointing
        )

        # patch MPNetEncoder.forward to support checkpointing
        # based on:
        # https://github.com/huggingface/transformers/blob/v5.0.0rc1/src/transformers/models/mpnet/modeling_mpnet.py#L284
        def forward(
            self,
            hidden_states: torch.Tensor,
            attention_mask: Optional[torch.Tensor] = None,
            output_attentions: bool = False,
            output_hidden_states: bool = False,
            return_dict: bool = False,
            **kwargs,
        ):
            position_bias = self.compute_position_bias(hidden_states)
            all_hidden_states = () if output_hidden_states else None
            all_attentions = () if output_attentions else None

            for i, layer_module in enumerate(self.layer):
                if output_hidden_states:
                    all_hidden_states = all_hidden_states + (hidden_states,)

                # do gradient checkpointing if enabled and training
                if getattr(self, "gradient_checkpointing", False) and self.training:

                    def create_custom_forward(module):
                        # checkpoint
                        def custom_forward(*inputs):
                            return module(*inputs, output_attentions = output_attentions)

                        return custom_forward

                    layer_outputs = torch.utils.checkpoint.checkpoint(
                        create_custom_forward(layer_module),
                        hidden_states,
                        attention_mask,
                        position_bias,
                        use_reentrant = True,  # required for torch >= 2.9
                    )
                else:
                    # original code from here on
                    layer_outputs = layer_module(
                        hidden_states,
                        attention_mask,
                        position_bias,
                        output_attentions,
                        **kwargs,
                    )

                hidden_states = layer_outputs[0]

                if output_attentions:
                    all_attentions = all_attentions + (layer_outputs[1],)

            if output_hidden_states:
                all_hidden_states = all_hidden_states + (hidden_states,)

            if not return_dict:
                return tuple(
                    v
                    for v in [hidden_states, all_hidden_states, all_attentions]
                    if v is not None
                )
            return BaseModelOutput(
                last_hidden_state = hidden_states,
                hidden_states = all_hidden_states,
                attentions = all_attentions,
            )

        modeling_mpnet.MPNetEncoder.forward = forward

    @staticmethod
    def _patch_distilbert_v4():
        # change kwargs to positional args to be compatible with peft_utils
        """
        Patch the forward method of the DistilBertModel to use positional arguments instead of keyword arguments.
        Transformers 4 version.
        """

        # based on:
        # https://github.com/huggingface/transformers/blob/v4.57.3/src/transformers/models/distilbert/modeling_distilbert.py#L666
        # original code from here on:
        def forward(
            self,
            input_ids: Optional[torch.Tensor] = None,
            attention_mask: Optional[torch.Tensor] = None,
            head_mask: Optional[torch.Tensor] = None,
            inputs_embeds: Optional[torch.Tensor] = None,
            output_attentions: Optional[bool] = None,
            output_hidden_states: Optional[bool] = None,
            return_dict: Optional[bool] = None,
        ):
            output_attentions = (
                output_attentions
                if output_attentions is not None
                else self.config.output_attentions
            )
            output_hidden_states = (
                output_hidden_states
                if output_hidden_states is not None
                else self.config.output_hidden_states
            )
            return_dict = (
                return_dict if return_dict is not None else self.config.use_return_dict
            )

            if input_ids is not None and inputs_embeds is not None:
                raise ValueError(
                    "You cannot specify both input_ids and inputs_embeds at the same time"
                )
            elif input_ids is not None:
                self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
                input_shape = input_ids.size()
            elif inputs_embeds is not None:
                input_shape = inputs_embeds.size()[:-1]
            else:
                raise ValueError(
                    "You have to specify either input_ids or inputs_embeds"
                )

            device = input_ids.device if input_ids is not None else inputs_embeds.device

            head_mask_is_none = head_mask is None
            # Prepare head mask if needed
            head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers)

            embeddings = self.embeddings(
                input_ids, inputs_embeds
            )  # (bs, seq_length, dim)

            if self.config._attn_implementation == "flash_attention_2":
                attention_mask = (
                    attention_mask
                    if (attention_mask is not None and 0 in attention_mask)
                    else None
                )
            else:
                if attention_mask is None:
                    attention_mask = torch.ones(
                        input_shape, device = device
                    )  # (bs, seq_length)

                if (
                    self.config._attn_implementation == "sdpa"
                    and head_mask_is_none
                    and not output_attentions
                ):
                    attention_mask = _prepare_4d_attention_mask_for_sdpa(
                        attention_mask, embeddings.dtype, tgt_len = input_shape[1]
                    )
            # patch here, change kwargs to positional args:
            return self.transformer(
                embeddings,
                attention_mask,
                head_mask,
                output_attentions,
                output_hidden_states,
                return_dict,
            )

        modeling_distilbert.DistilBertModel.forward = forward

    @staticmethod
    def _has_add_pooling_layer(config, auto_model_class = None):
        """
        Checks if the model class supports the `add_pooling_layer` argument
        """
        try:
            if auto_model_class is None:
                auto_model_class = AutoModel
            # try to resolve the class
            model_class = _get_model_class(config, auto_model_class._model_mapping)

            if model_class:
                sig = inspect.signature(model_class.__init__)
                return "add_pooling_layer" in sig.parameters
        except:
            pass

        return False

    @staticmethod
    def _patch_distilbert_v5():
        """
        Patch the forward method of the DistilBertModel to use positional arguments instead of keyword arguments.
        Transformers 5 version.
        """
        # based on:
        # https://github.com/huggingface/transformers/blob/v5.0.0rc1/src/transformers/models/distilbert/modeling_distilbert.py#L386
        # original code from here on:
        from transformers.masking_utils import create_bidirectional_mask

        def forward(
            self,
            input_ids: Optional[torch.Tensor] = None,
            attention_mask: Optional[torch.Tensor] = None,
            inputs_embeds: Optional[torch.Tensor] = None,
            position_ids: Optional[torch.Tensor] = None,
            **kwargs,
        ):
            if (input_ids is None) ^ (inputs_embeds is not None):
                raise ValueError(
                    "You must specify exactly one of input_ids or inputs_embeds"
                )

            embeddings = self.embeddings(input_ids, inputs_embeds, position_ids)

            attention_mask = create_bidirectional_mask(
                config = self.config,
                input_embeds = embeddings,
                attention_mask = attention_mask,
            )

            # patch here: unsloth gradient checkpointing hook needs positional arguments
            return self.transformer(
                embeddings,
                attention_mask,
                **kwargs,
            )

        modeling_distilbert.DistilBertModel.forward = forward

    @staticmethod
    def _add_unsloth_tags(repo_id, token, tags = None):
        """
        Add Unsloth and sentence-transformers tags to the Hugging Face Hub repository.
        """
        from huggingface_hub import HfApi

        api = HfApi(token = token)
        if tags is None:
            tags = []
        tags.extend(["unsloth", "sentence-transformers"])
        try:
            api.add_tags(
                repo_id = repo_id,
                tags = tags,
                repo_type = "model",
            )
        except:
            pass

    @staticmethod
    def _add_unsloth_branding(save_directory):
        """
        Add Unsloth branding to the README.md file generated by sentence-transformers.
        """
        readme_path = os.path.join(save_directory, "README.md")
        if not os.path.exists(readme_path):
            return

        with open(readme_path, "r", encoding = "utf-8") as f:
            content = f.read()

        # add unsloth tag to frontmatter
        if "---\ntags:\n" in content:
            content = content.replace("---\ntags:\n", "---\ntags:\n- unsloth\n")
        else:
            # if tags exist but not right at start, use regex to append
            pattern = r"(^tags:\s*\n)"
            if re.search(pattern, content, re.MULTILINE):
                content = re.sub(
                    pattern, r"\1- unsloth\n", content, count = 1, flags = re.MULTILINE
                )

        # add branding badge and text
        branding = (
            "\n\nThis model was finetuned with [Unsloth](https://github.com/unslothai/unsloth).\n\n"
            '[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)\n'
        )

        # add to description
        if "# SentenceTransformer" in content:
            parts = content.split("# SentenceTransformer", 1)
            content = parts[0] + "# SentenceTransformer" + branding + parts[1]
        else:
            content += branding

        with open(readme_path, "w", encoding = "utf-8") as f:
            f.write(content)

    @staticmethod
    def _module_path(model_name, token = None):
        """
        Returns the path to the modules.json file or None
        """
        try:
            if os.path.exists(model_name) and os.path.isdir(model_name):
                path = os.path.join(model_name, "modules.json")
                return path if os.path.exists(path) else None
            else:
                try:
                    return hf_hub_download(model_name, "modules.json", token = token)
                except:
                    return None
        except:
            return None

    @staticmethod
    def _create_transformer_module(
        model_name,
        model,
        tokenizer,
        max_seq_length,
        trust_remote_code,
    ):
        """Helper to create and configure a Transformer module."""
        from sentence_transformers.models import Transformer

        # prevents sentence-transformers from loading the model a second time, thanks Etherl
        original_from_pretrained = AutoModel.from_pretrained

        def return_existing_model(*args, **kwargs):
            return model

        try:
            # Temporarily redirect AutoModel loading to return our pre-loaded model
            AutoModel.from_pretrained = return_existing_model

            # Initialize Transformer
            transformer_module = Transformer(
                model_name,
                max_seq_length = max_seq_length,
                model_args = {"trust_remote_code": trust_remote_code},
                config_args = {"trust_remote_code": trust_remote_code},
            )
        finally:
            # Restore original functionality immediately
            AutoModel.from_pretrained = original_from_pretrained

        transformer_module.tokenizer = tokenizer
        transformer_module.do_lower_case = getattr(tokenizer, "do_lower_case", False)

        # sentence-transformers only passes along known keys to model.forward
        model_forward_params = list(inspect.signature(model.forward).parameters)
        transformer_module.model_forward_params = set(model_forward_params) | {
            "input_ids",
            "attention_mask",
            "token_type_ids",
            "inputs_embeds",
        }

        # determine max_seq_length if not provided
        if max_seq_length is None:
            if hasattr(model, "config") and hasattr(
                model.config, "max_position_embeddings"
            ):
                max_seq_length = model.config.max_position_embeddings
            elif hasattr(tokenizer, "model_max_length"):
                max_seq_length = tokenizer.model_max_length
            else:
                max_seq_length = 512

        transformer_module.max_seq_length = max_seq_length
        transformer_module.config_keys = ["max_seq_length", "do_lower_case"]
        transformer_module.save_in_root = True

        if hasattr(model, "config"):
            model.config.tokenizer_class = tokenizer.__class__.__name__

        return transformer_module

    @staticmethod
    def _load_modules(
        model_name,
        token,
        model,
        tokenizer,
        max_seq_length,
        pooling_mode,
        trust_remote_code = False,
    ) -> tuple[OrderedDict, bool]:
        """
        Load modules from modules.json if available, otherwise fallback to hard-coded modules.

        Returns:
            tuple[OrderedDict, bool]: (modules, no_modules_json)
        """
        from sentence_transformers.util import import_from_string, load_dir_path
        from sentence_transformers.models import Pooling, Normalize

        modules = OrderedDict()
        modules_json_path = FastSentenceTransformer._module_path(model_name, token)

        if modules_json_path:
            with open(modules_json_path, encoding = "utf8") as f:
                modules_config = json.load(f)

            for module_config in modules_config:
                class_ref = module_config["type"]
                name = module_config.get(
                    "name", str(module_config.get("idx", len(modules)))
                )

                if class_ref == "sentence_transformers.models.Transformer":
                    transformer_module = (
                        FastSentenceTransformer._create_transformer_module(
                            model_name,
                            model,
                            tokenizer,
                            max_seq_length,
                            trust_remote_code,
                        )
                    )
                    modules[name] = transformer_module
                else:
                    # load other modules (Pooling, Normalize, etc.)
                    module_path = module_config["path"]
                    if os.path.isdir(model_name):
                        load_path = os.path.join(model_name, module_path)
                    else:
                        try:
                            load_path = load_dir_path(
                                model_name, module_path, token = token
                            )
                        except Exception as e:
                            print(
                                f"Unsloth Warning: Could not download module {module_path}: {e}"
                            )
                            continue

                    module_class = import_from_string(class_ref)
                    try:
                        module = module_class.load(load_path)
                        modules[name] = module
                    except Exception as e:
                        print(
                            f"Unsloth Warning: Failed to load module {name} ({class_ref}): {e}"
                        )

            return modules, False

        # fallback if no modules.json (non sentence-transformers models)
        print(
            "Unsloth: No modules.json found, falling back to [Transformer, Pooling, Normalize]. This may or may not work."
        )

        transformer_module = FastSentenceTransformer._create_transformer_module(
            model_name, model, tokenizer, max_seq_length, trust_remote_code
        )
        modules["0"] = transformer_module

        hidden_size = getattr(model.config, "hidden_size", 768)

        if pooling_mode == "mean":
            pooling_mode = FastSentenceTransformer._read_pooling_mode(model_name, token)

        modules["1"] = Pooling(
            word_embedding_dimension = hidden_size, pooling_mode = pooling_mode
        )
        modules["2"] = Normalize()

        return modules, True

    # Encoder model types that benefit from native torch.compile instead of Unsloth patching
    ENCODER_MODEL_TYPES = {
        "mpnet",
        "bert",
        "distilbert",
        "modernbert",
        "roberta",
        "xlm-roberta",
        "albert",
        "electra",
    }

    @staticmethod
    def _estimate_compile_threshold(
        model,
        batch_size = None,
        grad_accum = None,
        max_seq_length = None,
    ):
        """
        Estimate the minimum training steps needed for torch.compile to be beneficial.
        Returns the threshold with a 1.2x safety margin built in.

        Based on empirical benchmarks:
        - Larger models have lower breakeven (more time saved per step)
        - Warmup time scales with model size but speedup also increases

        Optional inputs (batch_size, grad_accum, max_seq_length) allow
        a coarse pre-run adjustment. These are intentionally conservative
        and avoid any runtime measurements.
        """
        # Get parameter count from inner model
        if hasattr(model, "__getitem__"):
            try:
                inner = model[0].auto_model
                params = sum(p.numel() for p in inner.parameters())
            except:
                params = 100_000_000  # Default to 100M if can't determine
        else:
            params = sum(p.numel() for p in model.parameters())

        model_type = None
        try:
            if "inner" in locals():
                model_type = getattr(getattr(inner, "config", None), "model_type", None)
        except Exception:
            model_type = None
        if isinstance(model_type, str):
            model_type = model_type.lower()

        params_m = params / 1e6

        # Empirical formula based on benchmarks with batch_size=2, grad_accum=4
        # Small models: high fixed overhead, lower speedup
        # Large models: warmup scales but speedup is significant
        if params_m < 50:
            estimated_warmup = 35 + params_m * 0.3
            base_speedup = 1.35
        elif params_m < 200:
            estimated_warmup = 12 + params_m * 0.03
            base_speedup = 1.75
        else:
            estimated_warmup = 15 + params_m * 0.04
            base_speedup = 1.60

        # Estimate time per step (ms) and time saved
        naive_ms = 50 + params_m * 1.0
        compiled_ms = naive_ms / base_speedup
        time_saved_per_step_s = (naive_ms - compiled_ms) / 1000

        if time_saved_per_step_s > 0:
            breakeven = estimated_warmup / time_saved_per_step_s
        else:
            breakeven = float("inf")

        # Return threshold with 1.2x safety margin
        threshold = breakeven * 1.2

        # Optional adjustment based on expected work per step.
        # This uses only pre-run information (batch size, grad accum, seq length).
        generic_scale = 1.0
        fast_scale = 1.0
        if (
            batch_size is not None
            or grad_accum is not None
            or max_seq_length is not None
        ):
            try:
                bs = int(batch_size) if batch_size is not None else 2
                ga = int(grad_accum) if grad_accum is not None else 4
                seq = int(max_seq_length) if max_seq_length is not None else 512
            except Exception:
                bs, ga, seq = 2, 4, 512

            bs = max(1, bs)
            ga = max(1, ga)
            # Guard against unbounded tokenizer.model_max_length
            seq = max(64, min(seq, 8192))

            ref_bs, ref_ga, ref_seq = 2, 4, 512

            # Generic path: lighter scaling, less conservative than params-only.
            ga_scale = (ref_ga / ga) ** 1.0
            bs_seq_scale = ((ref_bs * ref_seq) / (bs * seq)) ** 0.15
            generic_scale = 0.35 * ga_scale * bs_seq_scale
            generic_scale = max(0.05, min(generic_scale, 5.0))

            # Fast encoder path: stronger scaling based on observed behavior.
            fast_ga_scale = (ref_ga / ga) ** 1.5
            fast_bs_seq_scale = ((ref_bs * ref_seq) / (bs * seq)) ** 0.25
            fast_scale = 0.2 * fast_ga_scale * fast_bs_seq_scale
            fast_scale = max(0.05, min(fast_scale, 5.0))

        # Conservative safety factors: generic is less conservative than fast.
        generic_threshold = threshold * generic_scale * 1.25

        is_fast_type = (
            isinstance(model_type, str)
            and model_type in FastSentenceTransformer.ENCODER_MODEL_TYPES
        )
        if is_fast_type:
            fast_threshold = threshold * fast_scale * 1.5
            # Prefer the smaller (less conservative) of the two estimates.
            final_threshold = min(generic_threshold, fast_threshold)
        else:
            final_threshold = generic_threshold

        # Reduce mpnet overestimation slightly.
        if model_type == "mpnet":
            final_threshold *= 0.7

        # Lower bound to avoid compiling on extremely short runs.
        return int(max(20, final_threshold))

    @staticmethod
    def _apply_torch_compile(model, mode = "default"):
        """
        Apply torch.compile to a SentenceTransformer model.
        Includes workaround for accelerate's unwrap_model bug.
        """
        if hasattr(model, "__getitem__"):
            inner_model = model[0].auto_model
            compiled = torch.compile(inner_model, mode = mode)
            model[0].auto_model = compiled
            # Fix for accelerate unwrap_model bug:
            # When SentenceTransformer contains a compiled inner model,
            # accelerate checks has_compiled_regions() which returns True,
            # then tries to access model.__dict__["_orig_mod"] which fails.
            # This workaround sets _orig_mod to satisfy accelerate.
            model.__dict__["_orig_mod"] = model
        else:
            model = torch.compile(model, mode = mode)
        return model

    @staticmethod
    def from_pretrained(
        model_name,
        max_seq_length = None,
        dtype = None,
        load_in_4bit = False,  # Changed default: 4-bit is slow for encoders
        load_in_8bit = False,
        load_in_16bit = True,  # Changed default: 16-bit is optimal for encoders
        full_finetuning = False,
        token = None,
        device_map = "sequential",
        rope_scaling = None,
        fix_tokenizer = True,
        trust_remote_code = False,
        use_gradient_checkpointing = False,  # Changed default: conflicts with torch.compile
        resize_model_vocab = None,
        revision = None,
        use_exact_model_name = False,
        offload_embedding = False,
        random_state = 3407,
        max_lora_rank = 64,
        disable_log_stats = True,
        qat_scheme = None,
        unsloth_tiled_mlp = False,
        pooling_mode = "mean",
        for_inference = False,
        **kwargs,
    ):
        try:
            from sentence_transformers import SentenceTransformer
            from sentence_transformers.models import Transformer, Pooling, Normalize
        except ImportError:
            raise ImportError(
                "Unsloth: To use `FastSentenceTransformer`, you must install `sentence-transformers`.\n"
                "Run `pip install sentence-transformers` to install it."
            )

        # if for_inference == True, skip Unsloth optimizations to avoid torch compile issues
        if for_inference:
            st_device = device_map
            if isinstance(st_device, dict) or (
                isinstance(st_device, str) and st_device in ["auto", "sequential"]
            ):
                st_device = None

            # this was added because when loading for inference it was defaulting to float32
            # propagate dtype to model_kwargs, default to "auto"
            model_kwargs = kwargs.get("model_kwargs", {})
            model_kwargs["dtype"] = dtype if dtype is not None else "auto"

            # filter kwargs for SentenceTransformer
            st_kwargs = {
                "device": st_device,
                "trust_remote_code": trust_remote_code,
                "token": token,
                "revision": revision,
                "model_kwargs": model_kwargs,
            }

            # add other known kwargs if present
            known_keys = [
                "cache_folder",
                "truncate_dim",
                "tokenizer_kwargs",
                "config_kwargs",
            ]
            for k in known_keys:
                if k in kwargs:
                    st_kwargs[k] = kwargs[k]

            st_model = SentenceTransformer(model_name, **st_kwargs)
            return st_model

        # sanity check, thanks Etherl:
        if full_finetuning and (load_in_4bit or load_in_8bit):
            print(
                "Unsloth: You selected full finetuning support, but 4bit / 8bit is enabled - disabling LoRA / QLoRA."
            )
            load_in_4bit = False
            load_in_8bit = False
            load_in_fp8 = False
            load_in_16bit = False

        if int(load_in_4bit) + int(load_in_8bit) + int(load_in_16bit) >= 2:
            raise RuntimeError(
                "Unsloth: Can only load in 4bit or 8bit or 16bit, not a combination!\n"
                "Also, we by default set `load_in_16bit = True`.\n"
                "If you want 4bit LoRA finetuning, set `load_in_16bit = False` and `load_in_4bit = True`\n"
                "If you want 8bit finetuning, set both `load_in_16bit = False` and `load_in_8bit = True`"
            )

        if "auto_model" not in kwargs:
            kwargs["auto_model"] = AutoModel

        transformers4 = Version(transformers.__version__).major < 5
        model_type = ""
        config = None
        try:
            config = AutoConfig.from_pretrained(
                model_name, token = token, trust_remote_code = trust_remote_code
            )
            model_type = getattr(config, "model_type", "")
        except:
            pass

        # Fast encoder path: Use native torch.compile for encoder models (6x speedup)
        # This bypasses Unsloth's auto-compiler which adds @torch.compiler.disable decorators
        # that interfere with torch.compile and cause runtime errors for encoder models.
        # NOTE: The old Unsloth path is BROKEN for encoder models with torch 2.9+ due to
        # conflicting @torch.compile and @torch.compiler.disable decorators.
        # Set UNSLOTH_COMPILE_DISABLE=1 to disable torch.compile and use the old path.
        is_encoder_model = (
            model_type.lower() in FastSentenceTransformer.ENCODER_MODEL_TYPES
        )
        use_fast_encoder = os.environ.get("UNSLOTH_COMPILE_DISABLE", "0") != "1"
        if use_fast_encoder and is_encoder_model:
            # torch.compile mode: "default" is safest for PEFT/LoRA training
            # Note: "reduce-overhead" uses CUDA Graphs which is incompatible with PEFT
            compile_mode = "default"

            # Determine dtype - handle float16 machines that don't support bfloat16
            if dtype is None:
                if load_in_16bit:
                    dtype = torch.float16 if not SUPPORTS_BFLOAT16 else torch.bfloat16
                else:
                    dtype = torch.float32
            elif dtype == torch.bfloat16 and not SUPPORTS_BFLOAT16:
                print(
                    "Unsloth: Device does not support bfloat16. Using float16 instead."
                )
                dtype = torch.float16

            # Determine device
            st_device = device_map
            if isinstance(st_device, dict) or (
                isinstance(st_device, str) and st_device in ["auto", "sequential"]
            ):
                st_device = "cuda"

            # Check if model supports SDPA (Scaled Dot Product Attention) for extra speedup
            supports_sdpa = False
            if config is not None:
                try:
                    model_class = _get_model_class(
                        config, kwargs.get("auto_model", AutoModel)._model_mapping
                    )
                    supports_sdpa = getattr(model_class, "_supports_sdpa", False)
                except:
                    pass

            # Build model_kwargs for SentenceTransformer
            model_kwargs = {"torch_dtype": dtype}

            # Enable SDPA if supported (1.2x extra speedup on top of torch.compile)
            # But disable for models with known SDPA + torch.compile backward issues
            _force_eager = False
            for _sdpa_model in DISABLE_SDPA_MODEL_NAMES:
                if _sdpa_model in model_type.lower():
                    supports_sdpa = False
                    _force_eager = True
                    break
            if supports_sdpa:
                model_kwargs["attn_implementation"] = "sdpa"
            elif _force_eager:
                model_kwargs["attn_implementation"] = "eager"

            # Print optimization status
            sdpa_str = " + SDPA" if supports_sdpa else ""
            if load_in_4bit:
                print(
                    f"Unsloth: Using fast encoder path for {model_type} with 4-bit quantization{sdpa_str}"
                )
            else:
                print(
                    f"Unsloth: Using fast encoder path for {model_type} (torch.compile{sdpa_str})"
                )

            # Handle 4-bit quantization via BitsAndBytesConfig
            if load_in_4bit:
                from transformers import BitsAndBytesConfig

                bnb_config = BitsAndBytesConfig(
                    load_in_4bit = True,
                    bnb_4bit_compute_dtype = dtype,
                    bnb_4bit_quant_type = "nf4",
                    bnb_4bit_use_double_quant = True,
                )
                model_kwargs["quantization_config"] = bnb_config
                # When using quantization, device must be handled by accelerate
                st_device = None

            # Handle gradient checkpointing - warn user it conflicts with torch.compile
            _use_gc = use_gradient_checkpointing
            if _use_gc and _use_gc != False:
                print(
                    "Unsloth Warning: Gradient checkpointing is incompatible with torch.compile."
                )
                print("Disabling torch.compile to enable gradient checkpointing.")
                compile_mode = None  # Disable compilation

                is_mpnet = "mpnet" == model_type.lower()

                if is_mpnet and transformers4:
                    FastSentenceTransformer._patch_mpnet_v4()
                elif is_mpnet:
                    FastSentenceTransformer._patch_mpnet_v5()

            # Load via native SentenceTransformer (bypasses Unsloth patching)
            st_model = SentenceTransformer(
                model_name,
                device = st_device,
                trust_remote_code = trust_remote_code,
                token = token,
                revision = revision,
                model_kwargs = model_kwargs,
            )

            # Store metadata for get_peft_model
            st_model._unsloth_fast_encoder = True
            st_model._compile_mode = compile_mode
            st_model._dtype = dtype
            st_model._load_in_4bit = load_in_4bit
            st_model.no_modules = False

            # Add save methods
            def _save_pretrained_merged(self, save_directory, **save_kwargs):
                self.save_pretrained(save_directory)
                tokenizer = save_kwargs.pop("tokenizer", self.tokenizer)
                if hasattr(self[0], "auto_model"):
                    inner = self[0].auto_model
                    # Handle compiled model
                    if hasattr(inner, "_orig_mod"):
                        inner = inner._orig_mod
                    if hasattr(inner, "merge_and_unload"):
                        merged = inner.merge_and_unload()
                        merged.save_pretrained(save_directory)
                    elif hasattr(inner, "save_pretrained"):
                        inner.save_pretrained(save_directory)
                if tokenizer is not None:
                    tokenizer.save_pretrained(save_directory)
                FastSentenceTransformer._add_unsloth_branding(save_directory)

            st_model.save_pretrained_merged = types.MethodType(
                _save_pretrained_merged, st_model
            )

            st_model.save_pretrained_torchao = types.MethodType(
                _save_pretrained_torchao, st_model
            )

            st_model.save_pretrained_gguf = types.MethodType(
                _save_pretrained_gguf, st_model
            )

            st_model.push_to_hub_gguf = types.MethodType(_push_to_hub_gguf, st_model)

            def _push_to_hub_merged(self, repo_id, **push_kwargs):
                hub_token = push_kwargs.get("token", None) or get_token()
                if hub_token is None:
                    raise ValueError("No HF token provided")
                api = HfApi(token = hub_token)
                try:
                    api.create_repo(
                        repo_id = repo_id,
                        private = push_kwargs.get("private"),
                        exist_ok = True,
                        repo_type = "model",
                    )
                except:
                    pass
                FastSentenceTransformer._add_unsloth_tags(repo_id, hub_token)
                with tempfile.TemporaryDirectory() as temp_dir:
                    self.save_pretrained_merged(temp_dir, **push_kwargs)
                    api.upload_folder(
                        folder_path = temp_dir,
                        repo_id = repo_id,
                        commit_message = push_kwargs.get(
                            "commit_message", "Upload model"
                        ),
                    )
                print(f"Unsloth: Pushed to https://huggingface.co/{repo_id}")

            st_model.push_to_hub_merged = types.MethodType(
                _push_to_hub_merged, st_model
            )

            return st_model

        # Warn if using 4-bit with encoder (slow due to dequantization overhead)
        if is_encoder_model and load_in_4bit:
            print(
                "Unsloth Warning: 4-bit quantization adds ~2.3x overhead for encoder models."
            )
            print("Consider using load_in_16bit=True for better performance.")

        # check if the model supports add_pooling_layer
        if "add_pooling_layer" not in kwargs:
            supported = FastSentenceTransformer._has_add_pooling_layer(
                config, kwargs.get("auto_model", AutoModel)
            )
            if supported:
                kwargs["add_pooling_layer"] = False

        # forces fp8 to be False since it's not supported
        fp8 = kwargs.pop("load_in_fp8", None)
        if fp8:
            logging.info("Unsloth: Disabling fp8 for model")
        load_in_fp8 = False

        # this is a fix for Snowflake/snowflake-arctic-embed-l-v2.0
        # it has pooler weights which we don't care about for training,
        # however unsloth throws an exception if "UNSLOTH_WARN_UNINITIALIZED" == 1 and it sees unused weights
        old_environ = os.environ.get("UNSLOTH_WARN_UNINITIALIZED", "1")
        os.environ["UNSLOTH_WARN_UNINITIALIZED"] = "0"

        is_distilbert = "distilbert" == model_type.lower()
        is_mpnet = "mpnet" == model_type.lower()

        if is_distilbert and transformers4:
            FastSentenceTransformer._patch_distilbert_v4()
        elif is_distilbert:
            FastSentenceTransformer._patch_distilbert_v5()
        elif is_mpnet and transformers4:
            FastSentenceTransformer._patch_mpnet_v4()
        elif is_mpnet:
            FastSentenceTransformer._patch_mpnet_v5()

        # check if modules.json exists - if not, force 16-bit training
        # why? because i have to implement saving myself for these models, and i don't feel like adding dequantization
        # to the save_pretrained_merged for a model that really should be trained in 16-bit anyway
        has_modules_json = (
            FastSentenceTransformer._module_path(model_name, token) is not None
        )

        if not has_modules_json and load_in_4bit:
            print(
                "Unsloth: No modules.json found. This is not a sentence-transformers model.\n"
                "Forcing 16-bit loading to simplify merged model saving."
            )
            load_in_4bit = False
            load_in_16bit = True

        try:
            model, tokenizer = FastModel.from_pretrained(
                model_name = model_name,
                max_seq_length = max_seq_length,
                dtype = dtype,
                load_in_4bit = load_in_4bit,
                load_in_8bit = load_in_8bit,
                load_in_16bit = load_in_16bit,
                full_finetuning = full_finetuning,
                token = token,
                device_map = device_map,
                rope_scaling = rope_scaling,
                fix_tokenizer = fix_tokenizer,
                trust_remote_code = trust_remote_code,
                use_gradient_checkpointing = use_gradient_checkpointing,
                resize_model_vocab = resize_model_vocab,
                revision = revision,
                return_logits = False,
                use_exact_model_name = use_exact_model_name,
                offload_embedding = offload_embedding,
                random_state = random_state,
                max_lora_rank = max_lora_rank,
                disable_log_stats = disable_log_stats,
                qat_scheme = qat_scheme,
                load_in_fp8 = load_in_fp8,
                unsloth_tiled_mlp = unsloth_tiled_mlp,
                **kwargs,
            )
        finally:
            os.environ["UNSLOTH_WARN_UNINITIALIZED"] = old_environ

        # try to load modules, otherwise fallback to old hard-coded modules
        from sentence_transformers import SentenceTransformer

        modules, no_modules = FastSentenceTransformer._load_modules(
            model_name,
            token,
            model,
            tokenizer,
            max_seq_length,
            pooling_mode,
            trust_remote_code = trust_remote_code,
        )

        st_device = device_map
        if isinstance(st_device, dict) or (
            isinstance(st_device, str) and st_device in ["auto", "sequential"]
        ):
            st_device = None

        st_model = SentenceTransformer(modules = modules, device = st_device)
        st_model.no_modules = no_modules

        def _save_pretrained_merged(self, save_directory, **kwargs):
            # check which adapter files exist before save_pretrained
            adapter_files = ["adapter_model.safetensors", "adapter_config.json"]
            existing_before = {
                f
                for f in adapter_files
                if os.path.exists(os.path.join(save_directory, f))
            }

            # sentence-transformers config and modules only get saved if we call save_pretrained
            self.save_pretrained(save_directory)

            # remove LoRA adapters only if they were created by save_pretrained (not pre-existing)
            for file in adapter_files:
                if file not in existing_before:
                    try:
                        os.remove(os.path.join(save_directory, file))
                    except:
                        pass

            tokenizer = kwargs.pop("tokenizer", self.tokenizer)
            if self.no_modules:
                # fallback for non-sentence-transformers models
                print(
                    "Unsloth: No modules detected. Using standard merge_and_unload for saving..."
                )
                safe_kwargs = kwargs.copy()
                # filter out Unsloth-specific args that are not in huggingface's save_pretrained
                unsloth_args = [
                    "save_method",
                    "temporary_location",
                    "maximum_memory_usage",
                ]
                for k in unsloth_args:
                    safe_kwargs.pop(k, None)

                merged_model = self[0].auto_model.merge_and_unload()
                merged_model.save_pretrained(save_directory, **safe_kwargs)
                if tokenizer is not None:
                    tokenizer.save_pretrained(save_directory)
            else:
                self[0].auto_model.save_pretrained_merged(
                    save_directory, tokenizer = tokenizer, **kwargs
                )

            # add Unsloth branding to the generated README
            try:
                FastSentenceTransformer._add_unsloth_branding(save_directory)
            except Exception as e:
                print(f"Unsloth Warning: Failed to add branding to README: {e}")

        st_model.save_pretrained_merged = types.MethodType(
            _save_pretrained_merged, st_model
        )

        st_model.save_pretrained_torchao = types.MethodType(
            _save_pretrained_torchao, st_model
        )

        st_model.save_pretrained_gguf = types.MethodType(
            _save_pretrained_gguf, st_model
        )

        st_model.push_to_hub_gguf = types.MethodType(_push_to_hub_gguf, st_model)

        def _push_to_hub_merged(self, repo_id, **kwargs):
            token = kwargs.get("token", None) or get_token()
            if token is None:
                raise ValueError(
                    "No HF token provided. Please provide a token or login with `hf auth login`"
                )
            private = kwargs.get("private", None)
            commit_message = kwargs.get("commit_message", "Upload model")

            from huggingface_hub import HfApi

            api = HfApi(token = token)
            try:
                api.create_repo(
                    repo_id = repo_id,
                    private = private,
                    exist_ok = True,
                    repo_type = "model",
                )
            except:
                pass

            # order doesn't seem to matter for this after repo creation...
            FastSentenceTransformer._add_unsloth_tags(repo_id, token)

            with tempfile.TemporaryDirectory() as temp_dir:
                self.save_pretrained_merged(temp_dir, **kwargs)
                api.upload_folder(
                    folder_path = temp_dir,
                    repo_id = repo_id,
                    commit_message = commit_message,
                )
            print(
                f"Unsloth: Successfully pushed merged model to https://huggingface.co/{repo_id}"
            )

        st_model.push_to_hub_merged = types.MethodType(_push_to_hub_merged, st_model)
        return st_model

    @staticmethod
    def get_peft_model(
        model,
        r = 16,
        target_modules = [
            "query",
            "key",
            "value",
            "dense",
        ],
        lora_alpha = 16,
        lora_dropout = 0.0,
        bias = "none",
        layers_to_transform = None,
        layers_pattern = None,
        use_gradient_checkpointing = False,  # Changed default: conflicts with torch.compile
        random_state = 3407,
        max_seq_length = 2048,
        use_rslora = False,
        modules_to_save = None,
        init_lora_weights = True,
        loftq_config = {},
        **kwargs,
    ):
        from sentence_transformers import SentenceTransformer
        from peft import LoraConfig, get_peft_model as peft_get_peft_model

        if "task_type" not in kwargs:
            kwargs["task_type"] = "FEATURE_EXTRACTION"
            print("Setting task_type to FEATURE_EXTRACTION")

        if isinstance(model, SentenceTransformer):
            # Check if this is a fast encoder model (uses torch.compile instead of Unsloth patching)
            is_fast_encoder = getattr(model, "_unsloth_fast_encoder", False)

            if is_fast_encoder:
                # Fast encoder path: Use native PEFT + torch.compile (6x speedup)
                transformer_module = model[0]
                inner_model = transformer_module.auto_model

                # Check if model is quantized (4-bit/8-bit)
                is_quantized = (
                    getattr(inner_model, "is_quantized", False)
                    or getattr(inner_model.config, "quantization_config", None)
                    is not None
                )

                # Track if gradient checkpointing was actually enabled
                gc_enabled = False

                # this is needed when from_pretrained was called without gradient
                # checkpointing but get_peft_model requests it
                if use_gradient_checkpointing and use_gradient_checkpointing != False:
                    import transformers
                    from packaging.version import Version

                    transformers4 = Version(transformers.__version__).major < 5
                    model_type = getattr(inner_model.config, "model_type", "").lower()

                    if model_type == "mpnet" and transformers4:
                        FastSentenceTransformer._patch_mpnet_v4()
                    elif model_type == "mpnet":
                        FastSentenceTransformer._patch_mpnet_v5()

                # Prepare for k-bit training if quantized
                if is_quantized:
                    from ._utils import prepare_model_for_kbit_training

                    _gc_for_kbit = (
                        use_gradient_checkpointing
                        if use_gradient_checkpointing
                        else False
                    )
                    try:
                        inner_model = prepare_model_for_kbit_training(
                            inner_model,
                            use_gradient_checkpointing = _gc_for_kbit,
                        )
                        print("Unsloth: Prepared quantized model for k-bit training")
                        gc_enabled = bool(_gc_for_kbit)
                    except ValueError as e:
                        if "does not support gradient checkpointing" in str(e):
                            # Model doesn't support gradient checkpointing, disable it
                            print(
                                f"Unsloth Warning: {inner_model.__class__.__name__} does not support gradient checkpointing. Skipping."
                            )
                            inner_model = prepare_model_for_kbit_training(
                                inner_model,
                                use_gradient_checkpointing = False,
                            )
                            print(
                                "Unsloth: Prepared quantized model for k-bit training (without gradient checkpointing)"
                            )
                        else:
                            raise

                # Enable gradient checkpointing if requested (only for non-quantized, since prepare_model handles it)
                elif use_gradient_checkpointing and use_gradient_checkpointing != False:
                    if hasattr(inner_model, "gradient_checkpointing_enable"):
                        try:
                            inner_model.gradient_checkpointing_enable()
                            print("Unsloth: Enabled gradient checkpointing")
                            gc_enabled = True
                        except ValueError as e:
                            if "does not support gradient checkpointing" in str(e):
                                print(
                                    f"Unsloth Warning: {inner_model.__class__.__name__} does not support gradient checkpointing. Skipping."
                                )

                # Create LoRA config
                lora_config = LoraConfig(
                    r = r,
                    lora_alpha = lora_alpha,
                    target_modules = target_modules,
                    lora_dropout = lora_dropout,
                    bias = bias,
                    task_type = kwargs.get("task_type", "FEATURE_EXTRACTION"),
                )

                # Apply PEFT directly (not through FastModel)
                peft_model = peft_get_peft_model(inner_model, lora_config)

                # Apply QAT if specified
                qat_scheme = kwargs.get("qat_scheme", None)
                if qat_scheme is not None:
                    from ._utils import _prepare_model_for_qat

                    peft_model = _prepare_model_for_qat(peft_model, qat_scheme)

                # Determine compile mode (only if not using gradient checkpointing)
                compile_mode = getattr(model, "_compile_mode", "default")
                # Re-enable torch.compile if gradient checkpointing was requested but couldn't be enabled
                if compile_mode is None and not gc_enabled:
                    compile_mode = "default"
                    print(
                        "Unsloth: Re-enabling torch.compile since gradient checkpointing is not supported"
                    )

                # Re-assign the peft model back to the transformer module
                transformer_module.auto_model = peft_model

                # Store compile info for auto-compile at trainer time
                # torch.compile is deferred until training starts so we can check max_steps
                if compile_mode is not None:
                    model._compile_mode = compile_mode
                    model._compile_threshold = (
                        FastSentenceTransformer._estimate_compile_threshold(model)
                    )
                    # Flag to indicate compile has not been applied yet
                    model._compile_pending = True
                    print(
                        f"Unsloth: torch.compile will be applied automatically if max_steps > {model._compile_threshold}"
                    )
                else:
                    model._compile_mode = None
                    model._compile_pending = False
                    print(
                        "Unsloth: torch.compile disabled (gradient checkpointing enabled)"
                    )

                return model

            # Original path for non-fast-encoder models
            transformer_module = model[0]
            inner_model = transformer_module.auto_model

            peft_model = FastModel.get_peft_model(
                model = inner_model,
                r = r,
                target_modules = target_modules,
                lora_alpha = lora_alpha,
                lora_dropout = lora_dropout,
                bias = bias,
                layers_to_transform = layers_to_transform,
                layers_pattern = layers_pattern,
                use_gradient_checkpointing = use_gradient_checkpointing,
                random_state = random_state,
                max_seq_length = max_seq_length,
                use_rslora = use_rslora,
                modules_to_save = modules_to_save,
                init_lora_weights = init_lora_weights,
                loftq_config = loftq_config,
                **kwargs,
            )

            # re-assign the peft model back to the transformer module
            transformer_module.auto_model = peft_model
            return model
        else:
            return FastModel.get_peft_model(
                model = model,
                r = r,
                target_modules = target_modules,
                lora_alpha = lora_alpha,
                lora_dropout = lora_dropout,
                bias = bias,
                layers_to_transform = layers_to_transform,
                layers_pattern = layers_pattern,
                use_gradient_checkpointing = use_gradient_checkpointing,
                random_state = random_state,
                max_seq_length = max_seq_length,
                use_rslora = use_rslora,
                modules_to_save = modules_to_save,
                init_lora_weights = init_lora_weights,
                loftq_config = loftq_config,
                **kwargs,
            )


def _patch_sentence_transformer_trainer():
    """
    Patch SentenceTransformerTrainer to automatically apply torch.compile
    when training steps exceed the breakeven threshold.

    This is called automatically when this module is imported.
    """
    try:
        from sentence_transformers import SentenceTransformerTrainer
    except ImportError:
        return  # sentence_transformers not installed

    if getattr(SentenceTransformerTrainer, "_unsloth_auto_compile_patched", False):
        return  # Already patched

    from functools import wraps

    _original_init = SentenceTransformerTrainer.__init__

    @wraps(_original_init)
    def _patched_init(self, *args, **kwargs):
        # Extract model and training_args
        model = kwargs.get("model") or (args[0] if args else None)
        training_args = kwargs.get("args") or (args[1] if len(args) > 1 else None)

        # Check if model has pending compile
        if (
            model is not None
            and training_args is not None
            and getattr(model, "_compile_pending", False)
        ):
            max_steps = getattr(training_args, "max_steps", -1)
            compile_mode = getattr(model, "_compile_mode", "default")

            # Re-estimate threshold now that training args are available
            batch_size = getattr(training_args, "per_device_train_batch_size", None)
            grad_accum = getattr(training_args, "gradient_accumulation_steps", None)
            max_seq_length = getattr(model, "max_seq_length", None)
            if max_seq_length is None and hasattr(model, "__getitem__"):
                try:
                    max_seq_length = getattr(model[0], "max_seq_length", None)
                except Exception:
                    max_seq_length = None
            if max_seq_length is None:
                tokenizer = getattr(model, "tokenizer", None)
                max_seq_length = (
                    getattr(tokenizer, "model_max_length", None)
                    if tokenizer is not None
                    else None
                )

            threshold = FastSentenceTransformer._estimate_compile_threshold(
                model,
                batch_size = batch_size,
                grad_accum = grad_accum,
                max_seq_length = max_seq_length,
            )
            model._compile_threshold = threshold

            if max_steps > 0 and max_steps >= threshold:
                print(
                    f"Unsloth: Auto-compiling model ({max_steps} steps >= {threshold} threshold)"
                )
                FastSentenceTransformer._apply_torch_compile(model, mode = compile_mode)
                model._compile_pending = False
            elif max_steps > 0:
                print(
                    f"Unsloth: Skipping torch.compile ({max_steps} steps < {threshold} threshold)"
                )
                model._compile_pending = False

        # Call original __init__
        _original_init(self, *args, **kwargs)

        # Disable mixed precision when FORCE_FLOAT32 is active (matches rl.py behavior)
        if os.environ.get("UNSLOTH_FORCE_FLOAT32", "0") == "1":
            if hasattr(self, "args") and self.args is not None:
                if self.args.fp16 or self.args.bf16:
                    print(
                        "Unsloth: Switching to float32 training since model cannot work with float16"
                    )
                    self.args.fp16 = False
                    self.args.bf16 = False
                    if hasattr(self.args, "bf16_full_eval"):
                        self.args.bf16_full_eval = False
                    if hasattr(self.args, "fp16_full_eval"):
                        self.args.fp16_full_eval = False

    SentenceTransformerTrainer.__init__ = _patched_init
    SentenceTransformerTrainer._unsloth_auto_compile_patched = True


# Auto-patch trainer on module import
_patch_sentence_transformer_trainer()


================================================
FILE: unsloth/models/vision.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch
from transformers import (
    BitsAndBytesConfig,
    AutoProcessor,
    AutoTokenizer,
    AutoModelForCausalLM,
)

try:
    from transformers import AutoModelForImageTextToText

    AutoModelForVision2Seq = AutoModelForImageTextToText
except:
    from transformers import AutoModelForVision2Seq
from ..kernels import (
    post_patch_loss_function,
)
from ._utils import __version__, importlib_version, _prepare_model_for_qat
from ._utils import *
from .loader_utils import _get_fp8_mode_and_check_settings
from ..save import patch_saving_functions
from ..models.loader_utils import is_distributed
from unsloth_zoo.gradient_checkpointing import (
    unpatch_unsloth_gradient_checkpointing,
    unpatch_unsloth_smart_gradient_checkpointing,
)
import torch.utils.checkpoint as torch_checkpoint
import transformers.modeling_utils as hf_modeling_utils
from peft import LoraConfig, TaskType, get_peft_model as _get_peft_model
from peft import PeftModelForCausalLM
from transformers import set_seed as transformers_set_seed
from unsloth_zoo.peft_utils import (
    get_peft_regex,
    SKIP_QUANTIZATION_MODULES,
    requires_grad_for_gradient_checkpointing,
)
from transformers.models.llama.modeling_llama import logger
from transformers import __version__ as transformers_version
from triton import __version__ as triton_version
from unsloth_zoo.utils import _get_dtype
from unsloth_zoo.hf_utils import (
    dtype_from_config,
    add_dtype_kwargs,
    fix_lora_auto_mapping,
    get_auto_processor,
)
from unsloth_zoo.patching_utils import patch_model_and_tokenizer
from unsloth_zoo.training_utils import prepare_model_for_training

from unsloth_zoo.utils import Version
from transformers import __version__ as transformers_version

import types
import functools
import os
import gc
import math
from typing import Optional, Tuple, List, Union
import re, inspect, sys
import contextlib

try:
    from huggingface_hub.utils import get_token
except:
    # Old HF Hub versions <= 0.0.25
    from huggingface_hub.utils._token import get_token
from ..device_type import (
    is_hip,
    get_device_type,
    DEVICE_TYPE,
    DEVICE_TYPE_TORCH,
    DEVICE_COUNT,
    ALLOW_PREQUANTIZED_MODELS,
)

__all__ = [
    "FastBaseModel",
]

global NUM_LOGITS_TO_KEEP
NUM_LOGITS_TO_KEEP = dict()

VLLM_SUPPORTED_VLM = [
    "qwen2_5_vl",
    "gemma3",
    "mistral3",
    "qwen3_vl",
    "qwen3_vl_moe",
]
VLLM_NON_LORA_VLM = [
    "mllama",
]
PRE_COMPILE_INFERENCE = [
    "gpt_oss",
]

from transformers import GenerationConfig, CompileConfig, AutoConfig

try:
    from transformers import PreTrainedConfig

    PretrainedConfig = PreTrainedConfig
except:
    from transformers import PretrainedConfig

HAS_TORCH_DTYPE = "torch_dtype" in PretrainedConfig.__doc__

_compile_config = CompileConfig(
    fullgraph = False,
    dynamic = None,
    mode = "reduce-overhead",
)
_compile_config.disable = True  # Must set manually

try:
    torch_compiler_set_stance = torch.compiler.set_stance
except:
    torch_compiler_set_stance = None


def unsloth_base_fast_generate(
    self,
    *args,
    **kwargs,
):
    if len(args) != 0:
        input_ids = args[0]
    elif "input_ids" in kwargs:
        input_ids = kwargs["input_ids"]
    elif "input" in kwargs:
        input_ids = kwargs["input"]
    elif "input_features" in kwargs:
        input_ids = kwargs["input_features"]
    elif "input_embeds" in kwargs:
        input_ids = kwargs["input_embeds"]
    elif "inputs" in kwargs:
        input_ids = kwargs["inputs"]
    else:
        key = next(iter(kwargs.keys()))
        if type(kwargs[key]) is not torch.Tensor:
            raise TypeError("Unsloth: You need to pass in input_ids to .generate!")
        input_ids = kwargs[key]
    assert type(input_ids) is torch.Tensor
    bsz = input_ids.shape[0]

    FastBaseModel.for_inference(self)
    dtype = _get_dtype(dtype_from_config(self.config))
    # Handle full float32 cases as config.dtype == torch.float32!
    do_bfloat16_mixed_precision = (
        os.environ.get("UNSLOTH_BFLOAT16_MIXED_PRECISION", "0") == "1"
    )
    if do_bfloat16_mixed_precision:
        dtype = torch.bfloat16

    # Check if VLM
    is_vlm = any(
        x.endswith(("ForConditionalGeneration", "ForVisionText2Text"))
        for x in self.config.architectures
    )
    is_vlm = is_vlm or hasattr(self.config, "vision_config")
    arch = self.config.architectures[0]

    # Remove token_type_ids - WRONG for Gemma 3 since bidirectional attention
    if hasattr(self, "generate") and hasattr(self, "forward"):
        # did not combine with below since self might not have model
        keys = inspect.signature(self.forward).parameters.keys()
        if "token_type_ids" not in keys:
            kwargs.pop("token_type_ids", None)
    # kwargs.pop("token_type_ids", None)

    # VLMs do not allow logits_to_keep
    global NUM_LOGITS_TO_KEEP
    if arch not in NUM_LOGITS_TO_KEEP:
        m = self
        # Find which is needed ie
        # num_logits_to_keep or logits_to_keep
        while hasattr(m, "model"):
            if hasattr(m, "forward"):
                keys = inspect.signature(m.forward).parameters.keys()
                if "num_logits_to_keep" in keys:
                    NUM_LOGITS_TO_KEEP[arch] = "num_logits_to_keep"
                    break
                elif "logits_to_keep" in keys:
                    NUM_LOGITS_TO_KEEP[arch] = "logits_to_keep"
                    break
            m = m.model
        if arch not in NUM_LOGITS_TO_KEEP:
            NUM_LOGITS_TO_KEEP[arch] = None
    key = NUM_LOGITS_TO_KEEP[arch]
    if key is not None and key not in kwargs:
        kwargs[key] = 1

    # Check pad_token
    model_eos_token_id = getattr(self.config, "eos_token_id", None)
    if model_eos_token_id is not None and hasattr(model_eos_token_id, "__iter__"):
        model_eos_token_id = model_eos_token_id[0]

    kwargs["pad_token_id"] = kwargs.pop("pad_token_id", model_eos_token_id)

    # Get pixel values for VLMs
    try:
        kwargs["pixel_values"] = kwargs["pixel_values"].to(dtype)
    except:
        pass

    # Mixed precision autocast
    if os.environ.get("UNSLOTH_FORCE_FLOAT32", "0") == "1":
        autocaster = torch.autocast(device_type = DEVICE_TYPE_TORCH, dtype = torch.float16)
        dtype = torch.float16
    else:
        autocaster = torch.autocast(device_type = DEVICE_TYPE_TORCH, dtype = dtype)
    # Prepare LoRA
    # state_dict = convert_lora_modules(self, dtype = dtype)

    # Set compile dynamic shapes
    torch._dynamo.mark_static(input_ids, 0)
    torch._dynamo.mark_dynamic(input_ids, 1)
    if "attention_mask" in kwargs:
        torch._dynamo.mark_static(kwargs["attention_mask"], 0)
        torch._dynamo.mark_dynamic(kwargs["attention_mask"], 1)
    if "token_type_ids" in kwargs:
        torch._dynamo.mark_static(kwargs["token_type_ids"], 0)
        torch._dynamo.mark_dynamic(kwargs["token_type_ids"], 1)

    # Fix generation_config
    # Use hybrid if sliding window seen, otherwise try static
    cache_implementation = getattr(self.config, "cache_implementation", None)
    if getattr(
        self, "_supports_static_cache", getattr(self, "_can_compile_fullgraph", True)
    ):
        if os.environ.get("UNSLOTH_DISABLE_STATIC_GENERATION", "0") == "0":
            cache_implementation = "static"
        elif Version(transformers_version) < Version("4.56.0.dev0"):
            cache_implementation = None
        else:
            # Should work in latest transformers!
            cache_implementation = "static"
    else:
        cache_implementation = None
    if cache_implementation is not None:
        swa = getattr(
            getattr(self.config, "text_config", self.config), "sliding_window", None
        )
        if (swa == 0 or type(swa) is not int) and (
            getattr(self, "_can_compile_fullgraph", True) is True
        ):
            cache_implementation = "static"
        else:
            if Version(transformers_version) < Version("4.56.0.dev0"):
                cache_implementation = "hybrid"
            else:
                cache_implementation = "static"
    # [TODO] Unsure why static fails
    if do_bfloat16_mixed_precision:
        cache_implementation = None

    if "generation_config" in kwargs:
        kwargs["generation_config"].cache_implementation = cache_implementation
        if cache_implementation is not None:
            kwargs["generation_config"].compile_config = _compile_config
    else:
        kwargs["cache_implementation"] = cache_implementation
        if cache_implementation is not None:
            kwargs["compile_config"] = _compile_config

    # Delete cached Flex Attention masks to reset inference
    for name, module in self.named_modules():
        if hasattr(module, "_flex_attention_cache"):
            try:
                del module._flex_attention_cache
            except:
                pass
        # Solves AttributeError: 'SlidingWindowLayer' object has no attribute 'max_batch_size'
        if hasattr(module, "_cache") and "cache_utils" in str(module._cache.__class__):
            try:
                del module._cache
            except:
                pass

    # DO INFERENCE
    with torch.inference_mode(), autocaster:
        output = self._old_generate(*args, **kwargs)

    # Delete cached Flex Attention masks to reset inference
    for name, module in self.named_modules():
        if hasattr(module, "_flex_attention_cache"):
            try:
                del module._flex_attention_cache
            except:
                pass
        # Solves AttributeError: 'SlidingWindowLayer' object has no attribute 'max_batch_size'
        if hasattr(module, "_cache") and "cache_utils" in str(module._cache.__class__):
            try:
                del module._cache
            except:
                pass

    # FastBaseModel.for_training(self)
    return output


def _construct_vlm_processor_fallback(
    tokenizer_name, model_type, token, trust_remote_code
):
    """Construct a VLM processor manually when AutoProcessor.from_pretrained fails.

    Some VLMs (e.g., LFM2.5-VL) have tokenizer_class entries that AutoTokenizer
    cannot resolve. This function loads the image processor and tokenizer separately,
    sets required special token attributes, and constructs the processor.
    """
    try:
        from transformers import AutoImageProcessor, PreTrainedTokenizerFast, AutoConfig
        from transformers.models.auto.processing_auto import PROCESSOR_MAPPING_NAMES
        import json

        # Load image processor
        image_processor = AutoImageProcessor.from_pretrained(
            tokenizer_name,
            token = token,
            trust_remote_code = trust_remote_code,
        )
        # Load tokenizer via PreTrainedTokenizerFast (bypasses tokenizer_class check)
        tok = PreTrainedTokenizerFast.from_pretrained(
            tokenizer_name,
            padding_side = "left",
            token = token,
            trust_remote_code = trust_remote_code,
        )
        # Read tokenizer_config.json for model-specific special tokens
        try:
            from huggingface_hub import hf_hub_download

            config_path = hf_hub_download(
                tokenizer_name, "tokenizer_config.json", token = token
            )
            with open(config_path, "r", encoding = "utf-8") as f:
                tok_config = json.load(f)
            # Set model-specific special tokens and their IDs
            for key in (
                "image_token",
                "image_start_token",
                "image_end_token",
                "image_thumbnail",
                "video_token",
            ):
                if key in tok_config and not hasattr(tok, key):
                    setattr(tok, key, tok_config[key])
                    id_key = key + "_id" if not key.endswith("_id") else key
                    token_id = tok.convert_tokens_to_ids(tok_config[key])
                    if not hasattr(tok, id_key):
                        setattr(tok, id_key, token_id)
        except Exception:
            pass

        # Find the processor class - try model_type first, then top-level config model_type
        proc_class_name = PROCESSOR_MAPPING_NAMES.get(model_type)
        if proc_class_name is None:
            # model_type might be a sub-model type (e.g. "lfm2" instead of "lfm2_vl").
            # Try the top-level config.model_type which often has the processor mapping.
            try:
                config = AutoConfig.from_pretrained(
                    tokenizer_name,
                    token = token,
                    trust_remote_code = trust_remote_code,
                )
                proc_class_name = PROCESSOR_MAPPING_NAMES.get(config.model_type)
            except Exception:
                pass

        if proc_class_name is not None:
            import transformers

            proc_class = getattr(transformers, proc_class_name, None)
            if proc_class is not None:
                processor = proc_class(image_processor = image_processor, tokenizer = tok)
                # Copy chat_template from tokenizer to processor if needed
                if not getattr(processor, "chat_template", None) and getattr(
                    tok, "chat_template", None
                ):
                    processor.chat_template = tok.chat_template
                return processor
    except Exception:
        pass
    return None


class FastBaseModel:
    @staticmethod
    def from_pretrained(
        model_name = "unsloth/Llama-3.2-1B-Instruct",
        max_seq_length = 2048,
        dtype = None,
        load_in_4bit = True,
        load_in_8bit = False,
        load_in_16bit = False,
        full_finetuning = False,
        token = None,
        device_map = "sequential",
        trust_remote_code = False,
        model_types = None,
        tokenizer_name = None,
        auto_model = AutoModelForVision2Seq,
        use_gradient_checkpointing = "unsloth",
        supports_sdpa = True,
        whisper_language = None,
        whisper_task = None,
        auto_config = None,
        offload_embedding = False,
        float32_mixed_precision = None,  # Forces float32 mixed precision
        # vLLM parameters
        fast_inference = False,
        gpu_memory_utilization = 0.5,
        float8_kv_cache = False,
        random_state = 3407,
        max_lora_rank = 64,
        disable_log_stats = False,
        unsloth_vllm_standby = False,
        load_in_fp8 = False,  # fp8 LoRA (True, False, 'block')
        **kwargs,
    ):
        if unsloth_vllm_standby and os.environ.get("UNSLOTH_VLLM_STANDBY", "0") != "1":
            raise RuntimeError(
                "Unsloth: UNSLOTH_VLLM_STANDBY is True, but UNSLOTH_VLLM_STANDBY is not set to 1!"
            )

        if model_types is None:
            raise RuntimeError(
                "Unsloth: Please use FastModel or FastVisionModel and not use FastBaseModel directly!"
            )
        if os.environ.get("UNSLOTH_MODEL_NAME", "") == "":
            os.environ["UNSLOTH_MODEL_NAME"] = model_name.lower()

        is_vlm = auto_model in [AutoModelForVision2Seq, AutoModelForImageTextToText]
        is_whisper = whisper_language is not None and whisper_task is not None
        auto_processor = AutoProcessor if (is_vlm or is_whisper) else AutoTokenizer

        model_type_arch = model_types[0]
        if model_type_arch == "siglip":
            for model_type_arch in model_types:
                if model_type_arch != "siglip":
                    break

        vllm_enable_lora = True

        if is_vlm and fast_inference:
            if not any(arch in VLLM_SUPPORTED_VLM for arch in model_types):
                raise RuntimeError(
                    f"Unsloth: Fast inference is only supported for Language models and Qwen2.5-VL, Gemma3 among vision models. "
                    f"Found architectures: {', '.join(model_types)}!"
                )

        if any(arch in VLLM_NON_LORA_VLM for arch in model_types):
            # mllama is still only in vllm v0 https://arc.net/l/quote/llwkfgmu
            # https://docs.vllm.ai/en/stable/models/supported_models.html#text-generation_1
            # vLLM V0 does not support LoRA on multi modal models.
            # TODO: Update this once vLLM V1 supports Llama 3.2 aka mllama
            vllm_enable_lora = False

        os.environ["UNSLOTH_USE_NEW_MODEL"] = "1"
        if trust_remote_code:
            print(
                "Unsloth: WARNING `trust_remote_code` is True.\n"
                "Are you certain you want to do remote code execution?"
            )
        token = hf_login(token)
        SUPPORTS_BFLOAT16 = is_bfloat16_supported()

        if DEVICE_TYPE == "cuda":
            gpu_stats = torch.cuda.get_device_properties(0)
            gpu_stats_name = (
                gpu_stats.name + ". " if gpu_stats.name != "" else "NVIDIA GPU Device. "
            )
            gpu_version = torch.version.cuda
            gpu_stats_snippet = f"CUDA: {gpu_stats.major}.{gpu_stats.minor}. CUDA Toolkit: {gpu_version}."
            try:
                vllm_version = f" vLLM: {importlib_version('vllm')}."
            except:
                vllm_version = ""
        elif DEVICE_TYPE == "hip":
            gpu_stats = torch.cuda.get_device_properties(0)
            gpu_stats_name = resolve_hip_gpu_stats_name(gpu_stats)
            gpu_version = torch.version.hip
            gpu_stats_snippet = f"ROCm Toolkit: {gpu_version}."
            try:
                vllm_version = f" vLLM: {importlib_version('vllm')}."
            except:
                vllm_version = ""
        elif DEVICE_TYPE == "xpu":
            gpu_stats = torch.xpu.get_device_properties(0)
            gpu_stats_name = (
                gpu_stats.name + ". " if gpu_stats.name != "" else "Intel XPU Device. "
            )
            gpu_version = torch.version.xpu
            gpu_stats_snippet = f"Intel Toolkit: {gpu_version}."
            # [TODO] After adding vLLM support for XPU, change this
            vllm_version = ""
        else:
            raise ValueError(f"Unsloth: Unsupported device type: {DEVICE_TYPE}")

        max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)

        arch_name = model_type_arch.title()
        arch_name = arch_name.replace("_Vl_", "_VL_").replace("_Moe", "_MoE")
        statistics = (
            f"==((====))==  Unsloth {__version__}: Fast {arch_name} patching. Transformers: {transformers_version}.{vllm_version}\n"
            f"   {chr(92)}{chr(92)}   /|    {gpu_stats_name}Num GPUs = {DEVICE_COUNT}. Max memory: {max_memory} GB. Platform: {platform_system}.\n"
            f"O^O/ {chr(92)}_/ {chr(92)}    Torch: {torch.__version__}. {gpu_stats_snippet} Triton: {triton_version}\n"
            f"{chr(92)}        /    Bfloat16 = {str(SUPPORTS_BFLOAT16).upper()}. FA [Xformers = {xformers_version}. FA2 = {HAS_FLASH_ATTENTION}]\n"
            f' "-____-"     Free license: http://github.com/unslothai/unsloth'
        )

        print(statistics)

        # Warn about fast transfers
        if "HF_HUB_ENABLE_HF_TRANSFER" in os.environ:
            old_hf_transfer = os.environ["HF_HUB_ENABLE_HF_TRANSFER"]
            if old_hf_transfer in ("False", "false"):
                old_hf_transfer = "0"
            if old_hf_transfer in ("True", "true"):
                old_hf_transfer = "1"
        else:
            old_hf_transfer = "0"
        if old_hf_transfer == "1":
            print(
                "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!"
            )
        if old_hf_transfer != "0":
            os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

        # For debugging - we use a download counter to see if environments are not breaking or if HF is down
        get_statistics(kwargs.get("local_files_only", False))

        if dtype is None:
            dtype = torch.float16 if not SUPPORTS_BFLOAT16 else torch.bfloat16
        elif os.environ.get("UNSLOTH_FORCE_FLOAT32", "0") == "1":
            if dtype == torch.float16:
                dtype = torch.bfloat16
        elif dtype == torch.bfloat16 and not SUPPORTS_BFLOAT16:
            logger.warning_once(
                "Device does not support bfloat16. Will change to float16."
            )
            dtype = torch.float16
        assert dtype in (torch.float16, torch.bfloat16, torch.float32)

        bnb_compute_dtype = dtype
        do_forced_float32 = False
        if os.environ.get("UNSLOTH_FORCE_FLOAT32", "0") == "1":
            print(
                f"Unsloth: Using float16 precision for {model_type_arch} won't work! Using float32."
            )
            bnb_compute_dtype = torch.float16
            do_forced_float32 = True

        # Check for custom data-types
        custom_datatype = None
        correct_dtype = None
        if os.environ.get("UNSLOTH_FORCE_CUSTOM_DTYPE", "") != "":
            custom_datatype = os.environ["UNSLOTH_FORCE_CUSTOM_DTYPE"]
            assert custom_datatype.count(";") >= 4
            checker, _dtype, _bnb_compute_dtype, _custom_datatype, execute_code = (
                custom_datatype.split(";", 4)
            )
            # Allow custom dtypes on all runs
            allow_all_runs = checker == "all"
            # Allow only on float16 datatypes
            allow_float16_runs = (
                checker == "float16" or checker == "torch.float16"
            ) and (
                dtype == torch.float16
                or os.environ.get("UNSLOTH_FORCE_FLOAT32", "0") == "1"
            )
            if allow_all_runs or allow_float16_runs:
                if eval(_dtype) is not None:
                    dtype = eval(_dtype)
                if eval(_bnb_compute_dtype) is not None:
                    bnb_compute_dtype = eval(_bnb_compute_dtype)
                correct_dtype = bnb_compute_dtype
                custom_datatype = _custom_datatype
                # Execute code as well
                if len(execute_code.strip()) != 0:
                    exec(execute_code)
            else:
                custom_datatype = None
                correct_dtype = None

        # Stop SDPA for some archs like Pixtral / Mistral3
        flex_attn_impl = None
        if auto_config is None:
            auto_config = AutoConfig.from_pretrained(
                model_name,
                token = token,
                trust_remote_code = trust_remote_code,
            )
        try:
            model_class = auto_model._model_mapping[auto_config.__class__]
        except Exception:
            model_class = None
        flex_attn_impl = prefer_flex_attn_if_supported(model_class, auto_config)

        # Handle FP8 models: get_model_name has already redirected this to BF16 sibling if the model ships with
        # FP8 weights. We just need to update it here for sanity.
        auto_config.model_name = model_name
        # Re-resolve model_class after potential config change
        try:
            model_class = auto_model._model_mapping[auto_config.__class__]
        except Exception:
            model_class = None

        model_type = str(getattr(auto_config, "model_type", "")).lower()
        if model_type.startswith("gemma3n"):
            # Gemma3N variants initialize timm-based vision towers which do
            # not support flex_attention, so default to eager unless overridden.
            default_attn_impl = "eager"
        else:
            default_attn_impl = "flex_attention" if flex_attn_impl else "sdpa"
        if not ("attn_implementation" in kwargs):
            kwargs["attn_implementation"] = default_attn_impl
        if not supports_sdpa and kwargs.get("attn_implementation") == "sdpa":
            if os.environ.get("UNSLOTH_ENABLE_FLEX_ATTENTION", "0") == "0":
                print(
                    f"Unsloth: {model_type_arch.title()} does not support SDPA - switching to fast eager."
                )
            del kwargs["attn_implementation"]

        bnb_config = None
        user_quantization_config = kwargs.get("quantization_config", None)
        if full_finetuning and (load_in_4bit or load_in_8bit):
            print(
                "Unsloth: You selected full finetuning support, but 4bit / 8bit is enabled - disabling LoRA / QLoRA."
            )
            load_in_4bit = False
            load_in_8bit = False
            load_in_16bit = False

        if int(load_in_4bit) + int(load_in_8bit) + int(load_in_16bit) >= 2:
            raise RuntimeError(
                "Unsloth: Can only load in 4bit or 8bit or 16bit, not a combination!"
            )
        _skip_modules = SKIP_QUANTIZATION_MODULES.copy()
        # Nemotron-H uses 'mixer' (not 'mamba') for Mamba layers.
        # Mamba fused kernels pass out_proj.weight directly to F.linear,
        # which fails with quantized Params4bit. Skip out_proj from quantization.
        if any(mt == "nemotron_h" for mt in (model_types or [])):
            _skip_modules.append("out_proj")

        if load_in_4bit:
            bnb_config = BitsAndBytesConfig(
                load_in_4bit = True,
                bnb_4bit_use_double_quant = True,
                bnb_4bit_quant_type = "nf4",
                bnb_4bit_compute_dtype = bnb_compute_dtype,
                llm_int8_skip_modules = _skip_modules,
            )
        elif load_in_8bit:
            bnb_config = BitsAndBytesConfig(
                load_in_8bit = True,
                llm_int8_skip_modules = _skip_modules,
            )
        elif load_in_16bit:
            bnb_config = None
        elif not load_in_4bit and not load_in_8bit and not full_finetuning:
            print(
                "Unsloth: QLoRA and full finetuning all not selected. Switching to 16bit LoRA."
            )

        if full_finetuning:
            os.environ["UNSLOTH_ENABLE_FULL_FINETUNING"] = "1"
            if dtype == torch.bfloat16:
                if float32_mixed_precision != True:
                    print(
                        f"Unsloth: Using bfloat16 full finetuning which cuts memory usage by 50%.\n"
                        f"To enable float32 training, use `float32_mixed_precision = True` during FastLanguageModel.from_pretrained"
                    )
                else:
                    print(
                        f"Unsloth: Using full float32 full finetuning. "
                        f"To enable bfloat16 training to reduce VRAM usage by 50% albeit with a slightly higher loss, do:\n"
                        "use `float32_mixed_precision = False` during FastLanguageModel.from_pretrained"
                    )
                    os.environ["UNSLOTH_BFLOAT16_MIXED_PRECISION"] = "1"
            else:
                print(
                    "Unsloth: Float16 full finetuning uses more memory since we upcast weights to float32."
                )
        else:
            os.environ["UNSLOTH_ENABLE_FULL_FINETUNING"] = "0"

        # Fix AttributeError: 'BitsAndBytesConfig' object has no attribute 'get_loading_attributes'
        if bnb_config is not None and not hasattr(bnb_config, "get_loading_attributes"):
            bnb_config.get_loading_attributes = lambda *args, **kwargs: {}

        # Cannot be None, since HF now checks for the config
        if load_in_4bit or load_in_8bit:
            # Ignore load_in_4bit / load_in_8bit for MXFP4 - best to get config file
            if (
                "gpt-oss-20b" in model_name.lower()
                or "gpt-oss-120b" in model_name.lower()
            ):
                pass
            else:
                if user_quantization_config is None:
                    kwargs["quantization_config"] = bnb_config
        else:
            if auto_config is None:
                auto_config = AutoConfig.from_pretrained(
                    model_name,
                    token = token,
                    trust_remote_code = trust_remote_code,
                )
            if hasattr(auto_config, "quantization_config"):
                from transformers.quantizers.auto import (
                    AUTO_QUANTIZATION_CONFIG_MAPPING,
                )

                quantization_config = auto_config.quantization_config
                quant_method = quantization_config["quant_method"]
                # Sometimes bitsandbytes_4bit + bitsandbytes_8bit is provided
                if (
                    quant_method == "bitsandbytes"
                    and "bitsandbytes" not in AUTO_QUANTIZATION_CONFIG_MAPPING
                ):
                    if "bitsandbytes_4bit" not in AUTO_QUANTIZATION_CONFIG_MAPPING:
                        raise KeyError(
                            "Unsloth: AUTO_QUANTIZATION_CONFIG_MAPPING does not have `bitsandbytes_4bit`"
                        )
                    quantizer = AUTO_QUANTIZATION_CONFIG_MAPPING["bitsandbytes_4bit"]
                else:
                    quantizer = AUTO_QUANTIZATION_CONFIG_MAPPING[quant_method]
                quantizer_kwargs = {}
                if quant_method == "compressed-tensors":
                    # Ignore these
                    pass
                else:
                    # We cannot dequantize since gpt-oss-20b MXFP4 will now be gpt-oss-20b-BF16
                    if (
                        load_in_16bit
                        and "dequantize" in inspect.signature(quantizer).parameters
                    ):
                        quantizer_kwargs["dequantize"] = True
                    try:
                        # Sometimes this fails so we wrap it in a try except
                        quantization_config = quantizer.from_dict(
                            quantization_config, **quantizer_kwargs
                        )
                    except:
                        pass
                    if user_quantization_config is None:
                        kwargs["quantization_config"] = quantization_config

        # Check if using forced float32 - we load it in bfloat16, then cast to float16!
        torch_dtype = dtype
        if do_forced_float32:
            torch_dtype = torch.bfloat16

        kwargs = add_dtype_kwargs(torch_dtype, kwargs)

        config_attn_impl = kwargs.get("attn_implementation", None)
        if config_attn_impl is None:
            config_attn_impl = "sdpa" if supports_sdpa else "eager"
        if auto_config is None:
            auto_config = AutoConfig.from_pretrained(
                model_name,
                token = token,
                trust_remote_code = trust_remote_code,
            )
        setattr(auto_config, "_attn_implementation", config_attn_impl)
        if hasattr(auto_config, "attn_implementation"):
            setattr(auto_config, "attn_implementation", config_attn_impl)
        model_config = auto_config

        verify_fp8_support_if_applicable(model_config)

        raise_handler = RaiseUninitialized()
        if not fast_inference:
            # Prevent load_in_fp8 from being forwarded into HF internal model loading
            load_in_fp8 = kwargs.pop("load_in_fp8", None)
            model = auto_model.from_pretrained(
                model_name,
                config = model_config,
                device_map = device_map,
                # torch_dtype           = torch_dtype, # Transformers removed torch_dtype
                # quantization_config   = bnb_config,
                token = token,
                trust_remote_code = trust_remote_code,
                # attn_implementation   = attn_implementation,
                **kwargs,
            )
            if hasattr(model, "generate"):
                model.fast_generate = make_fast_generate_wrapper(model.generate)
                model.fast_generate_batches = error_out_no_vllm
            if offload_embedding:
                if bool(
                    os.environ.get("WSL_DISTRO_NAME") or os.environ.get("WSL_INTEROP")
                ):
                    # WSL doesn't work with offloaded embeddings
                    pass
                elif os.name == "nt":
                    # Windows doesn't work with offloaded embeddings
                    pass
                else:
                    embed_tokens = model.get_input_embeddings()
                    nbytes = embed_tokens.weight.numel() * embed_tokens.weight.itemsize
                    ngb = round(nbytes / 1024 / 1024 / 1024, 2)
                    print(f"Unsloth: Offloading embeddings to RAM to save {ngb} GB.")
                    embed_tokens.to("cpu")

                    # Add hooks to move inputs to CPU and back to CUDA
                    # [TODO] Doesn't seem to work!
                    # def pre_hook(module, args):
                    #     args[0]._old_device = args[0].device
                    #     return (args[0].to("cpu", non_blocking = True))
                    # def post_hook(module, args, output):
                    #     old_device = getattr(args[0], "_old_device", "cuda")
                    #     return output.to(old_device, non_blocking = True)
                    # embed_tokens.register_forward_pre_hook(pre_hook,  prepend = True)
                    # embed_tokens.register_forward_hook    (post_hook, prepend = True)
                    # Must free GPU memory otherwise will not free!
                    torch.cuda.empty_cache()
                    gc.collect()
        else:
            from unsloth_zoo.vllm_utils import (
                load_vllm,
                get_vllm_state_dict,
                convert_vllm_to_huggingface,
                generate_batches,
                get_lora_supported_ranks,
            )

            if full_finetuning:
                max_lora_rank = max(get_lora_supported_ranks())
                raise NotImplementedError(
                    "Unsloth: `fast_inference=True` cannot be used together with `full_finetuning=True`.\n"
                    "Reason: fast_inference is optimized for inference-only workflows and "
                    "does not currently support full fine-tuning.\n"
                    "Workaround: disable fast_inference, or use parameter-efficient fine-tuning "
                    f"(e.g. LoRA with rank r={max_lora_rank})."
                )

            model_config.model_name = model_name

            if fast_inference:
                fast_inference, model_name = fast_inference_setup(
                    model_name, model_config
                )

            fp8_mode = None
            if load_in_fp8 != False:
                fp8_mode = _get_fp8_mode_and_check_settings(
                    load_in_fp8,
                    fast_inference,
                    full_finetuning,
                    load_in_4bit,
                    load_in_8bit,
                    load_in_16bit,
                )

            allowed_args = inspect.getfullargspec(load_vllm).args
            load_vllm_kwargs = dict(
                model_name = model_name,
                config = model_config,
                gpu_memory_utilization = gpu_memory_utilization,
                max_seq_length = max_seq_length,
                dtype = dtype,
                float8_kv_cache = float8_kv_cache,
                enable_lora = vllm_enable_lora,
                max_lora_rank = max_lora_rank,
                disable_log_stats = disable_log_stats,
                use_bitsandbytes = load_in_4bit,
                unsloth_vllm_standby = unsloth_vllm_standby,
                is_vision_model = is_vlm,
                fp8_mode = fp8_mode,
            )
            for allowed_arg in allowed_args:
                if allowed_arg not in load_vllm_kwargs and allowed_arg in kwargs:
                    load_vllm_kwargs[allowed_arg] = kwargs[allowed_arg]

            # Load vLLM first
            llm = load_vllm(**load_vllm_kwargs)

            # Convert to HF format
            _, quant_state_dict = get_vllm_state_dict(
                llm,
                config = model_config,
                is_vision_model = is_vlm,
                load_in_fp8 = load_in_fp8,
            )
            model = convert_vllm_to_huggingface(
                quant_state_dict,
                model_config,
                dtype,
                bnb_config,
                is_vision_model = is_vlm,
            )
            model.vllm_engine = llm
            model.fast_generate = model.vllm_engine.generate
            model.fast_generate_batches = functools.partial(
                generate_batches, model.vllm_engine
            )

        raise_handler.remove()

        # Return old flag
        os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = old_hf_transfer

        # Check float32 norm weights
        if os.environ.get("UNSLOTH_HIGH_PRECISION_LAYERNORM", "0") == "1":
            for jj, (name, module) in enumerate(model.named_modules()):
                if (
                    name.endswith(("norm", "norm1", "norm2", "norm3", "norm4"))
                    or "layernorm" in name
                    or "layer_norm" in name
                ) and hasattr(module, "weight"):
                    module._pre_set_compute_dtype = torch.float32
        # Edit data-types
        if custom_datatype is not None:
            with torch.no_grad():
                for jj, (name, module) in enumerate(model.named_modules()):
                    exec(custom_datatype)
        # Clear deleted GPU items
        for _ in range(3):
            gc.collect()
            if DEVICE_TYPE in ("cuda", "hip"):
                torch.cuda.empty_cache()
            elif DEVICE_TYPE == "xpu":
                torch.xpu.empty_cache()

        # Counteract saved tokenizers
        tokenizer_name = model_name if tokenizer_name is None else tokenizer_name

        # Fix _Unsloth_Patched_ prefix in local config files from old saves (issue #4085)
        if os.path.isdir(tokenizer_name):
            import json as _json

            for _cfg_name in (
                "processor_config.json",
                "preprocessor_config.json",
                "tokenizer_config.json",
            ):
                _cfg_path = os.path.join(tokenizer_name, _cfg_name)
                if os.path.exists(_cfg_path):
                    try:
                        with open(_cfg_path, "r", encoding = "utf-8") as _f:
                            _cfg = _json.load(_f)
                        if _cfg.get("processor_class", "").startswith(
                            "_Unsloth_Patched_"
                        ):
                            _cfg["processor_class"] = _cfg["processor_class"][
                                len("_Unsloth_Patched_") :
                            ]
                            with open(_cfg_path, "w", encoding = "utf-8") as _f:
                                _json.dump(_cfg, _f, indent = 2, ensure_ascii = False)
                    except Exception:
                        pass

        if (whisper_language and whisper_task) or auto_model.__name__.endswith(
            "ForConditionalGeneration"
        ):
            try:
                tokenizer = auto_processor.from_pretrained(
                    tokenizer_name,
                    padding_side = "left",
                    token = token,
                    language = whisper_language,
                    task = whisper_task,
                    trust_remote_code = trust_remote_code,
                )
            except Exception:
                tokenizer = None
        else:
            try:
                tokenizer = auto_processor.from_pretrained(
                    tokenizer_name,
                    padding_side = "left",
                    token = token,
                    trust_remote_code = trust_remote_code,
                )
            except:
                tokenizer = get_auto_processor(
                    tokenizer_name,
                    padding_side = "left",
                    token = token,
                    trust_remote_code = trust_remote_code,
                )

        # If processor loading failed (e.g., tokenizer class not found),
        # or if AutoProcessor silently degraded to a text-only tokenizer
        # instead of returning a full VLM processor (issue #4085),
        # try constructing the processor manually from separate components.
        _processor_is_degraded = (
            is_vlm
            and tokenizer is not None
            and not hasattr(tokenizer, "image_processor")
        )
        if (tokenizer is None or _processor_is_degraded) and is_vlm:
            _fallback = _construct_vlm_processor_fallback(
                tokenizer_name,
                model_type_arch,
                token,
                trust_remote_code,
            )
            if _fallback is not None:
                tokenizer = _fallback
            if tokenizer is None:
                import sys

                print(
                    f"Unsloth: Warning - VLM processor fallback returned None for model_type={model_type_arch}",
                    file = sys.stderr,
                )
        if hasattr(tokenizer, "tokenizer"):
            __tokenizer = tokenizer.tokenizer
            # Add padding side as well
            __tokenizer.padding_side = "left"
            # Check bos, eos, pad tokens
            if hasattr(__tokenizer, "bos_token"):
                tokenizer.bos_token = __tokenizer.bos_token
                tokenizer.bos_token_id = __tokenizer.bos_token_id
            if hasattr(__tokenizer, "eos_token"):
                tokenizer.eos_token = __tokenizer.eos_token
                tokenizer.eos_token_id = __tokenizer.eos_token_id
            if hasattr(__tokenizer, "pad_token"):
                tokenizer.pad_token = __tokenizer.pad_token
                tokenizer.pad_token_id = __tokenizer.pad_token_id
        # Fix other stuff like BnB compute data types
        model, tokenizer = patch_model_and_tokenizer(
            model,
            tokenizer,
            downcast_rope = False,
            fix_embeddings = False,
            do_forced_float32 = do_forced_float32,
            correct_dtype = correct_dtype,
        )

        try:
            model, tokenizer = patch_tokenizer(model, tokenizer)
        except Exception as _patch_err:
            # Some VLM processors (e.g., ERNIE VL) may fail during tokenizer patching.
            # Try loading tokenizer separately via AutoTokenizer as fallback.
            try:
                from transformers import AutoTokenizer as _AutoTokenizer

                _fallback_tok = _AutoTokenizer.from_pretrained(
                    tokenizer_name,
                    padding_side = "left",
                    token = token,
                    trust_remote_code = trust_remote_code,
                )
                model, _fallback_tok = patch_tokenizer(model, _fallback_tok)
                # Re-attach as processor wrapper if original was a processor
                if hasattr(tokenizer, "image_processor"):
                    tokenizer.tokenizer = _fallback_tok
                else:
                    tokenizer = _fallback_tok
            except Exception:
                # If fallback also fails, raise the original error
                raise _patch_err
        model = post_patch_loss_function(model)

        # Log Unsloth version for future fastpaths for inference
        if hasattr(model, "config"):
            model.config.update({"unsloth_version": __version__})
        patch_saving_functions(model, vision = True)
        if tokenizer is None:
            # Last resort: try loading tokenizer via AutoTokenizer, then PreTrainedTokenizerFast
            try:
                from transformers import AutoTokenizer as _AutoTokenizer

                tokenizer = _AutoTokenizer.from_pretrained(
                    tokenizer_name,
                    padding_side = "left",
                    token = token,
                    trust_remote_code = trust_remote_code,
                )
            except Exception:
                try:
                    from transformers import PreTrainedTokenizerFast

                    tokenizer = PreTrainedTokenizerFast.from_pretrained(
                        tokenizer_name,
                        padding_side = "left",
                        token = token,
                        trust_remote_code = trust_remote_code,
                    )
                except Exception:
                    del model
                    raise RuntimeError(
                        "Unsloth: The tokenizer is weirdly not loaded? Please check if there is one."
                    )
        patch_saving_functions(tokenizer, vision = True)

        # Fix gradient accumulation
        from transformers.trainer import Trainer

        patch_gradient_accumulation_fix(Trainer)

        # Save tokenizer for inference purposes
        tokenizer.padding_side = "left"  # Force inference
        if hasattr(tokenizer, "tokenizer"):
            tokenizer.tokenizer.padding_side = "left"  # Force inference
        m = model
        while hasattr(m, "model"):
            m.max_seq_length = max_seq_length
            m._saved_temp_tokenizer = tokenizer
            # Also set is_loaded_in_8bit to disable incorrect DDP
            m.is_loaded_in_8bit = True if not full_finetuning else False
            m = m.model
        m.max_seq_length = max_seq_length
        # Save to modules as well
        for module in model.modules():
            module.max_seq_length = max_seq_length
        m._saved_temp_tokenizer = tokenizer
        # Also set is_loaded_in_8bit to disable incorrect DDP
        m.is_loaded_in_8bit = True if not full_finetuning else False

        # Patch generate
        if os.environ.get("UNSLOTH_DISABLE_FAST_GENERATION", "0") == "0" and hasattr(
            model, "generate"
        ):
            if model.generate.__name__ != "unsloth_base_fast_generate":
                model._old_generate = model.generate
                unsloth_base_fast_generate.__doc__ = model._old_generate.__doc__
                model.generate = types.MethodType(unsloth_base_fast_generate, model)
        model._unsloth_trust_remote_code = trust_remote_code
        # Post patches
        model = FastBaseModel.post_patch_model(
            model,
            use_gradient_checkpointing = use_gradient_checkpointing,
            trust_remote_code = trust_remote_code,
            model_type = model_type_arch,
            tokenizer = tokenizer,
            float32_mixed_precision = float32_mixed_precision,
        )
        # Clear deleted GPU items
        for _ in range(3):
            gc.collect()
            if DEVICE_TYPE in ("cuda", "hip"):
                torch.cuda.empty_cache()
            elif DEVICE_TYPE == "xpu":
                torch.xpu.empty_cache()
        return model, tokenizer

    @staticmethod
    def get_peft_model(
        model,
        r = 16,
        target_modules = None,
        lora_alpha = 16,
        lora_dropout = 0.0,
        bias = "none",
        finetune_vision_layers = True,
        finetune_language_layers = True,
        finetune_attention_modules = True,
        finetune_mlp_modules = True,
        layers_to_transform = None,
        layers_pattern = None,
        use_gradient_checkpointing = "unsloth",
        random_state = 3407,
        max_seq_length = 2048,  # not used anymore
        use_rslora = False,
        modules_to_save = None,
        init_lora_weights = True,
        loftq_config = {},
        task_type = TaskType.CAUSAL_LM,
        temporary_location = "_unsloth_temporary_saved_buffers",
        qat_scheme = None,
        target_parameters = None,  # For MoE expert layers (nn.Parameter)
        ensure_weight_tying = False,  # [TODO] Add `ensure_weight_tying` for `modules_to_save` for vision models
        **kwargs,
    ):
        if os.environ.get("UNSLOTH_ENABLE_FULL_FINETUNING", "0") == "1":
            print(
                "Unsloth: Full finetuning is enabled, so .get_peft_model has no effect"
            )
            return model
        transformers_set_seed(random_state)

        if type(r) is not int:
            raise TypeError(f"Unsloth: Rank of {str(r)} must be an integer.")
        if r <= 0:
            raise TypeError(f"Unsloth: Rank of {str(r)} must be larger than 0.")

        if isinstance(model, PeftModelForCausalLM):
            raise RuntimeError(
                "Unsloth: You already added LoRA adapters to your model!"
            )

        if target_modules == "all-linear":
            finetune_vision_layers = True
            finetune_language_layers = True
            finetune_attention_modules = True
            finetune_mlp_modules = True
        if target_modules is None or target_modules == "all-linear":
            target_modules = get_peft_regex(
                model,
                finetune_vision_layers = finetune_vision_layers,
                finetune_language_layers = finetune_language_layers,
                finetune_attention_modules = finetune_attention_modules,
                finetune_mlp_modules = finetune_mlp_modules,
            )
        else:
            assert type(target_modules) in (
                list,
                tuple,
                str,
            )

        if hasattr(model, "vllm_engine"):
            if (
                hasattr(model.vllm_engine, "llm_engine")
                and hasattr(model.vllm_engine.llm_engine, "vllm_config")
                and getattr(
                    model.vllm_engine.llm_engine.vllm_config, "lora_config", None
                )
                is None
            ):
                # If vLLM is being used but lora is not enabled, throw an error
                # Ref https://github.com/vllm-project/vllm/blob/51ba839555a5d122eadd91e9c16463ac288f5fa1/vllm/v1/engine/processor.py#L148-L151
                raise RuntimeError("Unsloth: LoRA is not enabled for this model!")
            if finetune_vision_layers:
                # vLLM does not support LoRA on vision layers
                # https://github.com/vllm-project/vllm/blob/main/vllm/lora/models.py#L471-L477
                # TODO: Update this once vLLM V1 supports LoRA on vision layers (possibly not happening)
                raise RuntimeError(
                    "Unsloth: Finetuning vision layers is not supported for fast_inference. Only text layers are supported!"
                )
            if model.config.model_type in VLLM_NON_LORA_VLM:
                # mllama is still only in vllm v0 https://arc.net/l/quote/llwkfgmu
                # https://docs.vllm.ai/en/stable/models/supported_models.html#text-generation_1
                # vLLM V0 does not support LoRA on multi modal models.
                # TODO: Update this once vLLM V1 supports Llama 3.2 aka mllama
                raise RuntimeError(
                    "Unsloth: LoRA finetuning for Llama 3.2 aka mllama models is not supported with fast_inference!"
                )

        # Clear deleted GPU items
        for _ in range(3):
            gc.collect()
            if DEVICE_TYPE in ("cuda", "hip"):
                torch.cuda.empty_cache()
            elif DEVICE_TYPE == "xpu":
                torch.xpu.empty_cache()
        max_seq_length = model.max_seq_length
        # If we pass loftq_config = None we will get an error
        loftq_config = validate_loftq_config(
            loftq_config, lora_dropout, bias, init_lora_weights, model
        )

        # Auto-detect MoE models and populate target_parameters for expert layers
        if target_parameters is None:
            target_parameters = get_moe_target_parameters(model, target_modules)

        # Get only allowed parameters for LoraConfig
        local_variables = {
            **locals(),
            **kwargs,
        }
        del local_variables["kwargs"]
        allowed_parameters = inspect.signature(LoraConfig).parameters.keys()
        lora_config = LoraConfig(
            **{k: v for k, v in local_variables.items() if k in allowed_parameters},
        )
        model = prepare_model_for_kbit_training(
            model,
            use_gradient_checkpointing = use_gradient_checkpointing,
        )
        model = _get_peft_model(model, lora_config)
        # Apply QAT + LoRA if specified
        if qat_scheme is not None:
            print("Unsloth: Applying QAT to mitigate quantization degradation")
            model = _prepare_model_for_qat(model, qat_scheme)
        # Fix LoraConfig.auto_mapping is None
        fix_lora_auto_mapping(model)
        # Enable gradients on modules which are trainable
        requires_grad_for_gradient_checkpointing(model)
        trust_remote_code = getattr(model, "_unsloth_trust_remote_code", False)
        model = FastBaseModel.post_patch_model(
            model,
            use_gradient_checkpointing = use_gradient_checkpointing,
            trust_remote_code = trust_remote_code,
        )
        model.max_seq_length = max_seq_length
        # Save to modules as well
        for module in model.modules():
            module.max_seq_length = max_seq_length
        # Clear deleted GPU items
        for _ in range(3):
            gc.collect()
            if DEVICE_TYPE in ("cuda", "hip"):
                torch.cuda.empty_cache()
            elif DEVICE_TYPE == "xpu":
                torch.xpu.empty_cache()
        patch_saving_functions(model, vision = True)
        patch_peft_fast_inference(model)

        # Add for_inference and for_training
        model.for_training = functools.partial(FastBaseModel.for_training, model)
        model.for_inference = functools.partial(FastBaseModel.for_inference, model)
        m = model
        while hasattr(m, "model"):
            m.for_training = functools.partial(FastBaseModel.for_training, m)
            m.for_inference = functools.partial(FastBaseModel.for_inference, m)
            m = m.model
        return model

    @staticmethod
    def post_patch_model(
        model,
        use_gradient_checkpointing = True,
        trust_remote_code = False,
        model_type = None,
        tokenizer = None,
        float32_mixed_precision = None,
    ):
        full_finetuning = os.environ.get("UNSLOTH_ENABLE_FULL_FINETUNING", "0") == "1"

        if type(float32_mixed_precision) is bool:
            # Respect whatever it was set before
            pass
        else:
            float32_mixed_precision = True
            if (
                _get_dtype(dtype_from_config(model.config)) == torch.bfloat16
                and full_finetuning
            ):
                # Use bfloat16 precision for full finetuning
                float32_mixed_precision = False

        # VLMs can hit DDP "marked ready twice" with re-entrant checkpointing.
        # See: https://github.com/unslothai/unsloth/issues/3713.
        use_reentrant = not is_distributed()
        if not use_reentrant:
            # Under DDP, avoid the offloaded/re-entrant checkpoint patch.
            unpatch_unsloth_gradient_checkpointing()
            unpatch_unsloth_smart_gradient_checkpointing()
            # Force native checkpoint to default to non-reentrant for downstream calls.
            _orig_checkpoint = torch_checkpoint.checkpoint

            def _nonre_checkpoint(function, *args, **kwargs):
                kwargs["use_reentrant"] = False
                return _orig_checkpoint(function, *args, **kwargs)

            torch_checkpoint.checkpoint = _nonre_checkpoint
            hf_modeling_utils.checkpoint = _nonre_checkpoint

        model = prepare_model_for_training(
            model,
            use_gradient_checkpointing = use_gradient_checkpointing,
            use_reentrant = use_reentrant,
            full_finetuning = full_finetuning,
            train_layernorms = full_finetuning,
            train_embedding = full_finetuning,
            train_lm_head = full_finetuning,
            float32_mixed_precision = float32_mixed_precision,
            patch_modules_to_save = True,
        )

        from transformers.trainer import Trainer

        if (
            Trainer._inner_training_loop.__name__ != "_fast_inner_training_loop"
            and trust_remote_code == False
        ):
            raise RuntimeError("Unsloth: Unsuccessfully patched inner_training_loop")
        patch_saving_functions(model, vision = True)

        # Patch tokenizer to pad to the left
        m = model
        while hasattr(m, "model"):
            if hasattr(m, "_saved_temp_tokenizer"):
                if hasattr(m._saved_temp_tokenizer, "tokenizer"):
                    m._saved_temp_tokenizer.tokenizer.padding_side = "left"
            # Also set is_loaded_in_8bit to disable incorrect DDP
            m.is_loaded_in_8bit = True if not full_finetuning else False
            m = m.model
        if hasattr(m, "_saved_temp_tokenizer"):
            if hasattr(m._saved_temp_tokenizer, "tokenizer"):
                m._saved_temp_tokenizer.tokenizer.padding_side = "left"
        # Also set is_loaded_in_8bit to disable incorrect DDP
        m.is_loaded_in_8bit = True if not full_finetuning else False

        # Clear deleted GPU items
        for _ in range(3):
            gc.collect()
            if DEVICE_TYPE in ("cuda", "hip"):
                torch.cuda.empty_cache()
            elif DEVICE_TYPE == "xpu":
                torch.xpu.empty_cache()
        # Add for_inference and for_training
        model.for_training = functools.partial(FastBaseModel.for_training, model)
        model.for_inference = functools.partial(FastBaseModel.for_inference, model)
        m = model
        while hasattr(m, "model"):
            m.for_training = functools.partial(FastBaseModel.for_training, m)
            m.for_inference = functools.partial(FastBaseModel.for_inference, m)
            m = m.model
        # Set weight[padding_idx] = 0 for embeddings that are NOT tied with the
        # lm_head. When weights are tied, zeroing the padding row also zeros
        # the corresponding lm_head row, forcing logit = 0 for the pad token.
        # Only do this if tokenizer is defined since eos_token == pad_token sometimes!
        pad_token_id = getattr(tokenizer, "pad_token_id", None)
        lm_head = getattr(model, "lm_head", None)
        lm_head_weight = (
            getattr(lm_head, "weight", None) if lm_head is not None else None
        )
        if (
            tokenizer is not None
            and getattr(tokenizer, "eos_token_id", None) != pad_token_id
        ):
            with torch.no_grad():
                for name, module in model.named_modules():
                    if type(module) is torch.nn.Embedding:
                        if (
                            getattr(module, "weight", None) is not None
                            and getattr(module, "padding_idx", None) is not None
                        ):
                            if (
                                module.padding_idx == pad_token_id
                                and module.padding_idx < module.weight.shape[0]
                            ):
                                # Skip if tied to lm_head
                                if (
                                    lm_head_weight is not None
                                    and module.weight.data_ptr()
                                    == lm_head_weight.data_ptr()
                                ):
                                    continue
                                module.weight[module.padding_idx] = 0
        return model

    @staticmethod
    def for_inference(model):
        if not hasattr(model, "parameters"):
            raise TypeError(
                "Unsloth: I think you're passing a tokenizer, not the model to for_inference!"
            )

        def _for_inference(m):
            if hasattr(m, "gradient_checkpointing"):
                m.gradient_checkpointing = False
            if hasattr(m, "training"):
                m.training = False
            # Pad tokenizer to the left
            if hasattr(m, "_saved_temp_tokenizer"):
                m._saved_temp_tokenizer.padding_side = "left"
            # Set a flag for generation!
            m._flag_for_generation = True

        m = model
        while hasattr(m, "model"):
            _for_inference(m)
            m = m.model
        _for_inference(m)
        model.eval()  # to turn off training on modules deeper in

        # Since transformers 4.53, must turn off explicitly
        for module in model.modules():
            if hasattr(module, "gradient_checkpointing"):
                module.gradient_checkpointing = False

        # Also disable training for embeddings for NEFTune
        if hasattr(model, "get_input_embeddings"):
            embeddings = model.get_input_embeddings()
            if hasattr(embeddings, "training"):
                embeddings.training = False
        if hasattr(model, "get_output_embeddings"):
            embeddings = model.get_output_embeddings()
            if hasattr(embeddings, "training"):
                embeddings.training = False
        # Must disable returning hidden states in the case for GRPO
        os.environ["UNSLOTH_RETURN_HIDDEN_STATES"] = "0"
        # Must enable returning logits
        os.environ["UNSLOTH_RETURN_LOGITS"] = "1"
        # Turn off skip guards and set stance to default
        if torch_compiler_set_stance is not None:
            torch_compiler_set_stance(stance = "default", skip_guard_eval_unsafe = False)
        return model

    @staticmethod
    def for_training(model, use_gradient_checkpointing = True):
        if not hasattr(model, "parameters"):
            raise TypeError(
                "Unsloth: I think you're passing a tokenizer, not the model to for_training!"
            )

        # Delete all fast inference loras
        for param in model.parameters():
            if hasattr(param, "_fast_lora"):
                del param._fast_lora

        def _for_training(m):
            if hasattr(m, "gradient_checkpointing"):
                m.gradient_checkpointing = use_gradient_checkpointing
            if hasattr(m, "training"):
                m.training = True
            # Pad tokenizer to the left
            if hasattr(m, "_saved_temp_tokenizer"):
                m._saved_temp_tokenizer.padding_side = "right"
            # Set a flag for generation!
            if hasattr(m, "_flag_for_generation"):
                try:
                    # Weirdly sometimes cannot succeed so do a try except
                    del m._flag_for_generation
                except:
                    pass

        m = model
        while hasattr(m, "model"):
            _for_training(m)
            m = m.model
        _for_training(m)
        model.train()  # to turn on training on modules deeper in

        # Since transformers 4.53, must turn on explicitly
        for module in model.modules():
            if hasattr(module, "gradient_checkpointing"):
                module.gradient_checkpointing = use_gradient_checkpointing

        # Also re-enable training for embeddings for NEFTune
        if hasattr(model, "get_input_embeddings"):
            embeddings = model.get_input_embeddings()
            if hasattr(embeddings, "training"):
                embeddings.training = True
        if hasattr(model, "get_output_embeddings"):
            embeddings = model.get_output_embeddings()
            if hasattr(embeddings, "training"):
                embeddings.training = True
        # Can re-enable not returning logits
        os.environ["UNSLOTH_RETURN_LOGITS"] = "0"
        # Turn off skip guards and set stance to default
        if torch_compiler_set_stance is not None:
            torch_compiler_set_stance(stance = "default", skip_guard_eval_unsafe = False)
        return model


================================================
FILE: unsloth/ollama_template_mappers.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = [
    "OLLAMA_TEMPLATES",
    "OLLAMA_TEMPLATE_TO_MODEL_MAPPER",
    "MODEL_TO_OLLAMA_TEMPLATE_MAPPER",
]

OLLAMA_TEMPLATES = {}

# =========================================== Unsloth

unsloth_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}{{ .System }}
{{ end }}{{ if .Prompt }}>>> User: {{ .Prompt }}
{{ end }}>>> Assistant: {{ .Response }}{__EOS_TOKEN__}
"""
PARAMETER stop "{__EOS_TOKEN__}"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
SYSTEM """You are a helpful assistant to the user"""
'''

OLLAMA_TEMPLATES["unsloth"] = unsloth_ollama

# =========================================== Zephyr

zephyr_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}<|system|>
{{ .System }}{__EOS_TOKEN__}
{{ end }}{{ if .Prompt }}<|user|>
{{ .Prompt }}{__EOS_TOKEN__}
{{ end }}<|assistant|>
{{ .Response }}{__EOS_TOKEN__}
"""
PARAMETER stop "{__EOS_TOKEN__}"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

OLLAMA_TEMPLATES["zephyr"] = zephyr_ollama

# =========================================== ChatML
chatml_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}{{ if .Prompt }}<|im_start|>user
{{ .Prompt }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ .Response }}<|im_end|>
"""
PARAMETER stop "<|im_start|>"
PARAMETER stop "<|im_end|>"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

OLLAMA_TEMPLATES["chatml"] = chatml_ollama

# =========================================== Mistral-1
# Ollama from https://www.ollama.com/library/mistral
# Mistral v0.1 https://ollama.com/library/mistral:v0.1/blobs/22e1b2e8dc2f
# Mistral v0.2 https://ollama.com/library/mistral:v0.2/blobs/e6836092461f
mistral_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """[INST] {{ if .System }}{{ .System }} {{ end }}{{ .Prompt }} [/INST]"""
PARAMETER stop "[INST]"
PARAMETER stop "[/INST]"
'''

# mistral:v0.3 https://ollama.com/library/mistral:v0.3/blobs/1ff5b64b61b9
# mistral-large https://ollama.com/library/mistral-large:latest/blobs/96adabcf2c08
mistral_v03_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{- if .Messages }}
{{- range $index, $_ := .Messages }}
{{- if eq .Role "user" }}
{{- if and (eq (len (slice $.Messages $index)) 1) $.Tools }}[AVAILABLE_TOOLS] {{ $.Tools }}[/AVAILABLE_TOOLS]
{{- end }}[INST] {{ if and $.System (eq (len (slice $.Messages $index)) 1) }}{{ $.System }}

{{ end }}{{ .Content }}[/INST]
{{- else if eq .Role "assistant" }}
{{- if .Content }}{{ .Content }}
{{- else if .ToolCalls }}[TOOL_CALLS] [
{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{- end }}]
{{- end }}</s>
{{- else if eq .Role "tool" }}[TOOL_RESULTS] {"content": {{ .Content }}} [/TOOL_RESULTS]
{{- end }}
{{- end }}
{{- else }}[INST] {{ if .System }}{{ .System }}

{{ end }}{{ .Prompt }}[/INST]
{{- end }}{{ .Response }}
{{- if .Response }}</s>
{{- end }}"""
PARAMETER stop "[INST]"
PARAMETER stop "[/INST]"
PARAMETER stop "</s>"
'''

# Mistral-small https://ollama.com/library/mistral-small:latest/blobs/6db27cd4e277
mistral_small_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{- range $index, $_ := .Messages }}
{{- if eq .Role "system" }}[SYSTEM_PROMPT]{{ .Content }}[/SYSTEM_PROMPT]
{{- else if eq .Role "user" }}
{{- if and (le (len (slice $.Messages $index)) 2) $.Tools }}[AVAILABLE_TOOLS]{{ $.Tools }}[/AVAILABLE_TOOLS]
{{- end }}[INST]{{ .Content }}[/INST]
{{- else if eq .Role "assistant" }}
{{- if .Content }}{{ .Content }}
{{- if not (eq (len (slice $.Messages $index)) 1) }}</s>
{{- end }}
{{- else if .ToolCalls }}[TOOL_CALLS][
{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{- end }}]</s>
{{- end }}
{{- else if eq .Role "tool" }}[TOOL_RESULTS]{"content": {{ .Content }}}[/TOOL_RESULTS]
{{- end }}
{{- end }}"""
PARAMETER temperature 0.15
SYSTEM """You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris. Your knowledge base was last updated on 2023-10-01. When you're not sure about some information, you say that you don't have the information and don't make up anything. If the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. "What are some good restaurants around me?" => "Where are you?" or "When is the next flight to Tokyo" => "Where do you travel from?")"""
'''

# mistral-small-3.1 https://ollama.com/library/mistral-small3.1:latest/blobs/6db27cd4e277
mistral_small_31_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{- range $index, $_ := .Messages }}
{{- if eq .Role "system" }}[SYSTEM_PROMPT]{{ .Content }}[/SYSTEM_PROMPT]
{{- else if eq .Role "user" }}
{{- if and (le (len (slice $.Messages $index)) 2) $.Tools }}[AVAILABLE_TOOLS]{{ $.Tools }}[/AVAILABLE_TOOLS]
{{- end }}[INST]{{ .Content }}[/INST]
{{- else if eq .Role "assistant" }}
{{- if .Content }}{{ .Content }}
{{- if not (eq (len (slice $.Messages $index)) 1) }}</s>
{{- end }}
{{- else if .ToolCalls }}[TOOL_CALLS][
{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{- end }}]</s>
{{- end }}
{{- else if eq .Role "tool" }}[TOOL_RESULTS]{"content": {{ .Content }}}[/TOOL_RESULTS]
{{- end }}
{{- end }}"""
PARAMETER num_ctx 4096
SYSTEM """You are Mistral Small 3.1, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.
You power an AI assistant called Le Chat.
Your knowledge base was last updated on 2023-10-01.

When you're not sure about some information, you say that you don't have the information and don't make up anything.
If the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. "What are some good restaurants around me?" => "Where are you?" or "When is the next flight to Tokyo" => "Where do you travel from?").
You are always very attentive to dates, in particular you try to resolve dates (e.g. "yesterday" is {yesterday}) and when asked about information at specific dates, you discard information that is at another date.
You follow these instructions in all languages, and always respond to the user in the language they use or request.
Next sections describe the capabilities that you have.

# WEB BROWSING INSTRUCTIONS

You cannot perform any web search or access internet to open URLs, links etc. If it seems like the user is expecting you to do so, you clarify the situation and ask the user to copy paste the text directly in the chat.

# MULTI-MODAL INSTRUCTIONS

You have the ability to read images, but you cannot generate images. You also cannot transcribe audio files or videos.
You cannot read nor transcribe audio files or videos."""
'''

# mistral-small-3.2 https://ollama.com/library/mistral-small3.2:latest/blobs/706c4d1164f7
mistral_small_32_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{- range $index, $_ := .Messages }}
{{- if eq .Role "system" }}[SYSTEM_PROMPT]{{ .Content }}[/SYSTEM_PROMPT]
{{- else if eq .Role "user" }}
{{- if and (le (len (slice $.Messages $index)) 2) $.Tools }}[AVAILABLE_TOOLS]{{ $.Tools }}[/AVAILABLE_TOOLS]
{{- end }}[INST]{{ .Content }}[/INST]
{{- else if eq .Role "assistant" }}
{{- if .Content }}{{ .Content }}
{{- if not (eq (len (slice $.Messages $index)) 1) }}</s>
{{- end }}
{{- else if .ToolCalls }}
{{- range $i, $_ := .ToolCalls }}[TOOL_CALLS]{{ .Function.Name }}[CALL_ID]{{ $i }}[ARGS]{{ .Function.Arguments }}
{{- end }}</s>
{{- end }}
{{- else if eq .Role "tool" }}[TOOL_RESULTS]{"content": {{ .Content }}}[/TOOL_RESULTS]
{{- end }}
{{- end }}"""
PARAMETER temperature 0.15
SYSTEM """You are Mistral Small 3.2, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.
You power an AI assistant called Le Chat.
Your knowledge base was last updated on 2023-10-01.

When you're not sure about some information or when the user's request requires up-to-date or specific data, you must use the available tools to fetch the information. Do not hesitate to use tools whenever they can provide a more accurate or complete response. If no relevant tools are available, then clearly state that you don't have the information and avoid making up anything.
If the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. "What are some good restaurants around me?" => "Where are you?" or "When is the next flight to Tokyo" => "Where do you travel from?").
You are always very attentive to dates, in particular you try to resolve dates and when asked about information at specific dates, you discard information that is at another date.
You follow these instructions in all languages, and always respond to the user in the language they use or request.
Next sections describe the capabilities that you have.

# WEB BROWSING INSTRUCTIONS

You cannot perform any web search or access internet to open URLs, links etc. If it seems like the user is expecting you to do so, you clarify the situation and ask the user to copy paste the text directly in the chat.

# MULTI-MODAL INSTRUCTIONS

You have the ability to read images, but you cannot generate images. You also cannot transcribe audio files or videos.
You cannot read nor transcribe audio files or videos.

TOOL CALLING INSTRUCTIONS

You may have access to tools that you can use to fetch information or perform actions. You must use these tools in the following situations:

1. When the request requires up-to-date information.
2. When the request requires specific data that you do not have in your knowledge base.
3. When the request involves actions that you cannot perform without tools.

Always prioritize using tools to provide the most accurate and helpful response. If tools are not available, inform the user that you cannot perform the requested action at the moment."""
'''


# https://ollama.com/library/mixtral:latest/blobs/53d74de0d84c
mixtral_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """[INST] {{ if .System }}{{ .System }} {{ end }}{{ .Prompt }} [/INST] {{ .Response }}"""
PARAMETER stop "[INST]"
PARAMETER stop "[/INST]"
'''

# https://registry.ollama.ai/library/mistral-nemo:latest/blobs/438402ddac75
mistral_nemo_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """
{{- range $i, $_ := .Messages }}
{{- if eq .Role "user" }}
{{- if and $.Tools (le (len (slice $.Messages $i)) 2) }}[AVAILABLE_TOOLS]{{ $.Tools }}[/AVAILABLE_TOOLS]
{{- end }}[INST]{{ if and $.System (eq (len (slice $.Messages $i)) 1) }}{{ $.System }}

{{ end }}{{ .Content }}[/INST]
{{- else if eq .Role "assistant" }}
{{- if .Content }} {{ .Content }}{{ if not (eq (len (slice $.Messages $i)) 1) }}</s>{{ end }}
{{- else if .ToolCalls }}[TOOL_CALLS][
{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{- end }}]</s>
{{- end }}
{{- else if eq .Role "tool" }}[TOOL_RESULTS]{"content": {{ .Content }}}[/TOOL_RESULTS]
{{- end }}
{{- end }}"""
PARAMETER stop "[INST]"
PARAMETER stop "[/INST]"
'''

# https://ollama.com/library/codestral:latest/blobs/51707752a87c
codestral_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """
{{- if .Suffix }}[SUFFIX]{{ .Suffix }}[PREFIX] {{ .Prompt }}
{{- else if .Messages }}
{{- range $index, $_ := .Messages }}
{{- if eq .Role "user" }}[INST] {{ if and $.System (eq (len (slice $.Messages $index)) 1) }}{{ $.System }}

{{ end }}{{ .Content }}[/INST]
{{- else if eq .Role "assistant" }} {{ .Content }}</s>
{{- end }}
{{- end }}
{{- else }}[INST] {{ if .System }}{{ .System }}

{{ end }}{{ .Prompt }} [/INST]
{{- end }} {{ .Response }}
{{- if .Response }}</s>
{{- end }}
"""
PARAMETER stop "[INST]"
PARAMETER stop "[/INST]"
PARAMETER stop "[PREFIX]"
PARAMETER stop "[MIDDLE]"
PARAMETER stop "[SUFFIX]"
'''

# https://ollama.com/library/devstral:latest/blobs/ea9ec42474e0
devstral_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{- $lastUserIndex := -1 }}
{{- range $index, $_ := .Messages }}
{{- if eq .Role "user" }}{{ $lastUserIndex = $index }}{{ end }}
{{- end }}
{{- range $index, $_ := .Messages }}
{{- if eq .Role "system" }}[SYSTEM_PROMPT]{{ .Content }}[/SYSTEM_PROMPT]
{{- else if eq .Role "user" }}
{{- if and (eq $lastUserIndex $index) $.Tools }}[AVAILABLE_TOOLS]{{ $.Tools }}[/AVAILABLE_TOOLS]
{{- end }}[INST]{{ .Content }}[/INST]
{{- else if eq .Role "assistant" }}
{{- if .Content }}{{ .Content }}
{{- if not (eq (len (slice $.Messages $index)) 1) }}</s>
{{- end }}
{{- else if .ToolCalls }}[TOOL_CALLS][
{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{- end }}]</s>
{{- end }}
{{- else if eq .Role "tool" }}[TOOL_RESULTS]{"content": {{ .Content }}}[/TOOL_RESULTS]
{{- end }}
{{- end }}"""
SYSTEM """You are Devstral, a helpful agentic model trained by Mistral AI and using the OpenHands scaffold. You can interact with a computer to solve tasks.

<ROLE>
Your primary role is to assist users by executing commands, modifying code, and solving technical problems effectively. You should be thorough, methodical, and prioritize quality over speed.
* If the user asks a question, like "why is X happening", don't try to fix the problem. Just give an answer to the question.
</ROLE>

<EFFICIENCY>
* Each action you take is somewhat expensive. Wherever possible, combine multiple actions into a single action, e.g. combine multiple bash commands into one, using sed and grep to edit/view multiple files at once.
* When exploring the codebase, use efficient tools like find, grep, and git commands with appropriate filters to minimize unnecessary operations.
</EFFICIENCY>

<FILE_SYSTEM_GUIDELINES>
* When a user provides a file path, do NOT assume it's relative to the current working directory. First explore the file system to locate the file before working on it.
* If asked to edit a file, edit the file directly, rather than creating a new file with a different filename.
* For global search-and-replace operations, consider using `sed` instead of opening file editors multiple times.
</FILE_SYSTEM_GUIDELINES>

<CODE_QUALITY>
* Write clean, efficient code with minimal comments. Avoid redundancy in comments: Do not repeat information that can be easily inferred from the code itself.
* When implementing solutions, focus on making the minimal changes needed to solve the problem.
* Before implementing any changes, first thoroughly understand the codebase through exploration.
* If you are adding a lot of code to a function or file, consider splitting the function or file into smaller pieces when appropriate.
</CODE_QUALITY>

<VERSION_CONTROL>
* When configuring git credentials, use "openhands" as the user.name and "openhands@all-hands.dev" as the user.email by default, unless explicitly instructed otherwise.
* Exercise caution with git operations. Do NOT make potentially dangerous changes (e.g., pushing to main, deleting repositories) unless explicitly asked to do so.
* When committing changes, use `git status` to see all modified files, and stage all files necessary for the commit. Use `git commit -a` whenever possible.
* Do NOT commit files that typically shouldn't go into version control (e.g., node_modules/, .env files, build directories, cache files, large binaries) unless explicitly instructed by the user.
* If unsure about committing certain files, check for the presence of .gitignore files or ask the user for clarification.
</VERSION_CONTROL>

<PULL_REQUESTS>
* When creating pull requests, create only ONE per session/issue unless explicitly instructed otherwise.
* When working with an existing PR, update it with new commits rather than creating additional PRs for the same issue.
* When updating a PR, preserve the original PR title and purpose, updating description only when necessary.
</PULL_REQUESTS>

<PROBLEM_SOLVING_WORKFLOW>
1. EXPLORATION: Thoroughly explore relevant files and understand the context before proposing solutions
2. ANALYSIS: Consider multiple approaches and select the most promising one
3. TESTING:
   * For bug fixes: Create tests to verify issues before implementing fixes
   * For new features: Consider test-driven development when appropriate
   * If the repository lacks testing infrastructure and implementing tests would require extensive setup, consult with the user before investing time in building testing infrastructure
   * If the environment is not set up to run tests, consult with the user first before investing time to install all dependencies
4. IMPLEMENTATION: Make focused, minimal changes to address the problem
5. VERIFICATION: If the environment is set up to run tests, test your implementation thoroughly, including edge cases. If the environment is not set up to run tests, consult with the user first before investing time to run tests.
</PROBLEM_SOLVING_WORKFLOW>

<SECURITY>
* Only use GITHUB_TOKEN and other credentials in ways the user has explicitly requested and would expect.
* Use APIs to work with GitHub or other platforms, unless the user asks otherwise or your task requires browsing.
</SECURITY>

<ENVIRONMENT_SETUP>
* When user asks you to run an application, don't stop if the application is not installed. Instead, please install the application and run the command again.
* If you encounter missing dependencies:
  1. First, look around in the repository for existing dependency files (requirements.txt, pyproject.toml, package.json, Gemfile, etc.)
  2. If dependency files exist, use them to install all dependencies at once (e.g., `pip install -r requirements.txt`, `npm install`, etc.)
  3. Only install individual packages directly if no dependency files are found or if only specific packages are needed
* Similarly, if you encounter missing dependencies for essential tools requested by the user, install them when possible.
</ENVIRONMENT_SETUP>

<TROUBLESHOOTING>
* If you've made repeated attempts to solve a problem but tests still fail or the user reports it's still broken:
  1. Step back and reflect on 5-7 different possible sources of the problem
  2. Assess the likelihood of each possible cause
  3. Methodically address the most likely causes, starting with the highest probability
  4. Document your reasoning process
* When you run into any major issue while executing a plan from the user, please don't try to directly work around it. Instead, propose a new plan and confirm with the user before proceeding.
</TROUBLESHOOTING>"""
'''

# https://ollama.com/library/magistral:latest/blobs/35f7a1efc383
magistral_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1}}
{{- if eq .Role "system" }}[SYSTEM_PROMPT]{{ .Content }}[/SYSTEM_PROMPT]
{{- else if eq .Role "user" }}
{{- if and (le (len (slice $.Messages $i)) 2) $.Tools }}[AVAILABLE_TOOLS]{{ $.Tools }}[/AVAILABLE_TOOLS]
{{- end }}[INST]{{ .Content }}[/INST]
{{- else if eq .Role "assistant" }}
{{- if and $.IsThinkSet (and $last .Thinking) -}}
<think>
{{ .Thinking }}
</think>
{{ end }}
{{- if .Content }}{{ .Content }}
{{- end }}
{{- if .ToolCalls }}{{ range $i, $_ := .ToolCalls }}[TOOL_CALLS]{{ .Function.Name }}[CALL_ID]{{ $i }}[ARGS]{{ .Function.Arguments }}{{ end }}
{{- end }}
{{- if not (eq (len (slice $.Messages $i)) 1) }}</s>
{{- end }}
{{- else if eq .Role "tool" }}[TOOL_RESULTS]0[TOOL_CONTENT]{{ .Content }}[/TOOL_RESULTS]
{{- end }}
{{- if and $last (ne .Role "assistant") }}{{ if and $.IsThinkSet (not $.Think) -}}<think>
</think>
{{ end }}
{{- end }}
{{- end }}"""
PARAMETER temperature 0.7
PARAMETER top_p 0.95
SYSTEM """A user will ask you to solve a task. You should first draft your thinking process (inner monologue) until you have derived the final answer. Afterwards, write a self-contained summary of your thoughts (i.e. your summary should be succinct but contain all the critical steps you needed to reach the conclusion). You should use Markdown and Latex to format your response. Write both your thoughts and summary in the same language as the task posed by the user.

Your thinking process must follow the template below:
<think>
Your thoughts or/and draft, like working through an exercise on scratch paper. Be as casual and as long as you want until you are confident to generate a correct answer.
</think>

Here, provide a concise summary that reflects your reasoning and presents a clear final answer to the user.

Problem:"""
'''

OLLAMA_TEMPLATES["mistral"] = mistral_ollama
OLLAMA_TEMPLATES["mistral-v03"] = mistral_v03_ollama
OLLAMA_TEMPLATES["mistral-small"] = mistral_small_ollama
OLLAMA_TEMPLATES["mistral-small-31"] = mistral_small_31_ollama
OLLAMA_TEMPLATES["mistral-small-32"] = mistral_small_32_ollama
OLLAMA_TEMPLATES["mixtral"] = mixtral_ollama
OLLAMA_TEMPLATES["mistral-nemo"] = mistral_nemo_ollama
OLLAMA_TEMPLATES["devstral"] = devstral_ollama
OLLAMA_TEMPLATES["magistral"] = magistral_ollama
OLLAMA_TEMPLATES["codestral"] = codestral_ollama


# =========================================== Llama-2
# Ollama from https://www.ollama.com/library/llama3
llama_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """[INST] <<SYS>>{{ .System }}<</SYS>>

{{ .Prompt }} [/INST]"""
PARAMETER stop "{__EOS_TOKEN__}"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

OLLAMA_TEMPLATES["llama"] = llama_ollama

# ===========================================  Vicuna
# Ollama from https://www.ollama.com/library/vicuna
vicuna_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}{{ .System }} {{ end }}{{ if .Prompt }}USER: {{ .Prompt }} {{ end }}ASSISTANT: {{ .Response }} {__EOS_TOKEN__}"""
PARAMETER stop "{__EOS_TOKEN__}"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

OLLAMA_TEMPLATES["vicuna"] = vicuna_ollama

# =========================================== Vicuna Old
vicuna_old_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}{{ .System }}
{{ end }}{{ if .Prompt }}### Human: {{ .Prompt }}
{{ end }}### Assistant: {{ .Response }}{__EOS_TOKEN__}
"""
PARAMETER stop "{__EOS_TOKEN__}"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
SYSTEM """A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."""
'''

OLLAMA_TEMPLATES["vicuna_old"] = vicuna_old_ollama
OLLAMA_TEMPLATES["vicuna old"] = OLLAMA_TEMPLATES["vicuna_old"]

# =========================================== Alpaca multi turn
alpaca_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}{{ .System }}

{{ end }}{{ if .Prompt }}### Instruction:
{{ .Prompt }}{{ end }}

### Response:
{{ .Response }}{__EOS_TOKEN__}

"""
PARAMETER stop "{__EOS_TOKEN__}"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
SYSTEM """Below are some instructions that describe some tasks. Write responses that appropriately complete each request."""
'''

OLLAMA_TEMPLATES["alpaca"] = alpaca_ollama

# =========================================== Gemma
# Ollama from https://www.ollama.com/library/gemma
gemma_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """<start_of_turn>user
{{ if .System }}{{ .System }} {{ end }}{{ .Prompt }}<end_of_turn>
<start_of_turn>model
{{ .Response }}<end_of_turn>
"""
PARAMETER repeat_penalty 1
PARAMETER stop "<start_of_turn>"
PARAMETER stop "<end_of_turn>"
PARAMETER penalize_newline false
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

OLLAMA_TEMPLATES["gemma"] = gemma_ollama

# =========================================== Gemma with ChatML instead
gemma_chatml_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}{{ if .Prompt }}<|im_start|>user
{{ .Prompt }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ .Response }}<|im_end|>
"""
PARAMETER repeat_penalty 1
PARAMETER stop "<|im_start|>"
PARAMETER stop "<|im_end|>"
PARAMETER penalize_newline false
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

OLLAMA_TEMPLATES["gemma_chatml"] = gemma_chatml_ollama

# =========================================== Gemma 2
# Same as Gemma 1, but with sliding window attention!
# https://ollama.com/library/gemma2/blobs/6522ca797f47
gemma2_ollama = gemma_ollama + "PARAMETER num_ctx 4096\n"
OLLAMA_TEMPLATES["gemma2"] = gemma2_ollama

# =========================================== Gemma 2 with ChatML instead
gemma2_chatml_ollama = gemma_chatml_ollama + "PARAMETER num_ctx 4096\n"
OLLAMA_TEMPLATES["gemma2_chatml"] = gemma2_chatml_ollama

# =========================================== Llama-3
# Ollama from https://www.ollama.com/library/llama3
llama3_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>

{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>

{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>

{{ .Response }}<|eot_id|>"""
PARAMETER num_keep 24
PARAMETER stop "<|start_header_id|>"
PARAMETER stop "<|end_header_id|>"
PARAMETER stop "<|eot_id|>"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

OLLAMA_TEMPLATES["llama-3"] = llama3_ollama
OLLAMA_TEMPLATES["llama3"] = llama3_ollama


# =========================================== Phi-3
# Ollama from https://www.ollama.com/library/phi3
phi3_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}<|system|>
{{ .System }}<|end|>
{{ end }}{{ if .Prompt }}<|user|>
{{ .Prompt }}<|end|>
{{ end }}<|assistant|>
{{ .Response }}<|end|>
"""
PARAMETER stop "<|end|>"
PARAMETER stop "<|user|>"
PARAMETER stop "<|assistant|>"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

OLLAMA_TEMPLATES["phi-3"] = phi3_ollama
OLLAMA_TEMPLATES["phi-35"] = OLLAMA_TEMPLATES["phi-3"]
OLLAMA_TEMPLATES["phi-3.5"] = OLLAMA_TEMPLATES["phi-3"]

# =========================================== Llama-3.1
"""
No trimming in Llama 3.1 Instruct!
Also an extra newline for Cutting Knowledge Date
See https://colab.research.google.com/drive/1Xpqq5xpIgO-B00MQ-UccYMwN2J8QFgBM?usp=sharing

Also should be

import datetime
tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,
    tokenize = False,
    date_string = datetime.today().strftime("%d %B %Y")),
)
"""

# Ollama from https://ollama.com/library/llama3.1 (needs updating!)
llama31_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .Messages }}
{{- if or .System .Tools }}<|start_header_id|>system<|end_header_id|>
{{- if .System }}

{{ .System }}
{{- end }}
{{- if .Tools }}

You are a helpful assistant with tool calling capabilities. When you receive a tool call response, use the output to format an answer to the original use question.
{{- end }}
{{- end }}<|eot_id|>
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 }}
{{- if eq .Role "user" }}<|start_header_id|>user<|end_header_id|>
{{- if and $.Tools $last }}

Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.

Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. Do not use variables.

{{ $.Tools }}
{{- end }}

{{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|>

{{ end }}
{{- else if eq .Role "assistant" }}<|start_header_id|>assistant<|end_header_id|>
{{- if .ToolCalls }}

{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "parameters": {{ .Function.Arguments }}}{{ end }}
{{- else }}

{{ .Content }}{{ if not $last }}<|eot_id|>{{ end }}
{{- end }}
{{- else if eq .Role "tool" }}<|start_header_id|>ipython<|end_header_id|>

{{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|>

{{ end }}
{{- end }}
{{- end }}
{{- else }}
{{- if .System }}<|start_header_id|>system<|end_header_id|>

{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>

{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>

{{ end }}{{ .Response }}{{ if .Response }}<|eot_id|>{{ end }}"""
PARAMETER stop "<|start_header_id|>"
PARAMETER stop "<|end_header_id|>"
PARAMETER stop "<|eot_id|>"
PARAMETER stop "<|eom_id|>"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

# https://ollama.com/ajindal/llama3.1-storm:8b/blobs/1970553b62f4
llama_31_storm_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """
{{ if .Messages }}
{{- if or .System .Tools }}<|start_header_id|>system<|end_header_id|>
{{- if .System }}

{{ .System }}
{{- end }}
{{- if .Tools }}

You are a function calling AI model. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into function. The user may use the terms function calling or tool use interchangeably.

Here are the available functions:
<tools>{{ json .Tools }}</tools>

For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags in the format:
<tool_call>{"tool_name": <function-name>, "tool_arguments": <args-dict>}</tool_call>
{{- end }}
{{- end }}<|eot_id|>
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 }}
{{- if eq .Role "user" }}<|start_header_id|>user<|end_header_id|>

{{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|>
{{ end }}
{{- else if eq .Role "assistant" }}<|start_header_id|>assistant<|end_header_id|>
{{- if .ToolCalls }}

{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "parameters": {{ .Function.Arguments }}}{{ end }}
{{- else }}

{{ .Content }}{{ if not $last }}<|eot_id|>{{ end }}
{{- end }}
{{- else if eq .Role "tool" }}<|start_header_id|>ipython<|end_header_id|>

{{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|>
{{ end }}
{{- end }}
{{- end }}
{{- else }}
{{- if .System }}<|start_header_id|>system<|end_header_id|>

{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>

{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>

{{ end }}{{ .Response }}{{ if .Response }}<|eot_id|>{{ end }}
"""
PARAMETER stop "<|start_header_id|>"
PARAMETER stop "<|end_header_id|>"
PARAMETER stop "<|eot_id|>"
'''

# https://ollama.com/library/nemotron:latest/blobs/4863fe3335f3
llama_31_nemotron_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """<|start_header_id|>system<|end_header_id|>

{{ if .Tools }}You have access to the following functions. To call a function, please respond with JSON for a function call. Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. Do not use variables.

{{ range .Tools }}{{ . }}

{{ end }}
{{- end }}{{ .System }}<|eot_id|>
{{- range $i, $_ := .Messages }}
{{- $isLastMessage := eq (len (slice $.Messages $i)) 1 -}}
{{- if eq .Role "system" }}
{{- else if eq .Role "assistant" }}<|start_header_id|>assistant<|end_header_id|>

{{ if .Content }}{{ .Content }}
{{- else if .ToolCalls }}
{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "parameters": {{ .Function.Arguments }} }
{{- end }}
{{- end }}
{{- if not $isLastMessage }}<|eot_id|>
{{- end }}
{{- else if eq .Role "tool" }}<|start_header_id|>ipython<|end_header_id|>

{{ .Content }}<|eot_id|>
{{- if $isLastMessage }}<|start_header_id|>assistant<|end_header_id|>

{{ end }}
{{- else }}<|start_header_id|>{{ .Role }}<|end_header_id|>

{{ .Content }}<|eot_id|>
{{- if $isLastMessage }}<|start_header_id|>assistant<|end_header_id|>

{{ end }}
{{- end }}
{{- end }}
"""
PARAMETER stop "<|start_header_id|>"
PARAMETER stop "<|end_header_id|>"
PARAMETER stop "<|eot_id|>"
'''

# https://ollama.com/library/llama3.2-vision:latest/blobs/715415638c895a1f8e8c6
llama_32_vision_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{- range $index, $_ := .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>

{{ .Content }}
{{- if gt (len (slice $.Messages $index)) 1 }}<|eot_id|>
{{- else if ne .Role "assistant" }}<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{{ end }}
{{- end }}"""
PARAMETER temperature 0.6
PARAMETER top_p 0.9
'''

OLLAMA_TEMPLATES["llama-3.1"] = llama31_ollama
OLLAMA_TEMPLATES["llama-31"] = llama31_ollama
OLLAMA_TEMPLATES["llama-31-nemotron"] = llama_31_nemotron_ollama
OLLAMA_TEMPLATES["llama-31-storm"] = llama_31_storm_ollama
OLLAMA_TEMPLATES["llama-32-vision"] = llama_32_vision_ollama

for version in ("llama-3.2", "llama-3.3", "llama-32", "llama-33"):
    OLLAMA_TEMPLATES[version] = OLLAMA_TEMPLATES["llama-3.1"]

# =========================================== tinyllama
# tinyllama-chat https://ollama.com/library/tinyllama:latest/blobs/af0ddbdaaa26
tinyllama_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """<|system|>
{{ .System }}</s>
<|user|>
{{ .Prompt }}</s>
<|assistant|>"""
PARAMETER stop "<|system|>"
PARAMETER stop "<|user|>"
PARAMETER stop "<|assistant|>"
PARAMETER stop "</s>"
SYSTEM """You are a helpful AI assistant."""
'''

OLLAMA_TEMPLATES["tinyllama"] = tinyllama_ollama


# =========================================== Qwen 2/2.5
# Qwen2 https://ollama.com/library/qwen2:latest/blobs/77c91b422cc9
# Qwen2.5 from https://ollama.com/library/qwen2.5/blobs/eb4402837c78
qwen25_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{- if .Messages }}
{{- if or .System .Tools }}<|im_start|>system
{{- if .System }}
{{ .System }}
{{- end }}
{{- if .Tools }}

# Tools

You may call one or more functions to assist with the user query.

You are provided with function signatures within <tools></tools> XML tags:
<tools>
{{- range .Tools }}
{"type": "function", "function": {{ .Function }}}
{{- end }}
</tools>

For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>
{{- end }}<|im_end|>
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if eq .Role "user" }}<|im_start|>user
{{ .Content }}<|im_end|>
{{ else if eq .Role "assistant" }}<|im_start|>assistant
{{ if .Content }}{{ .Content }}
{{- else if .ToolCalls }}<tool_call>
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{ end }}</tool_call>
{{- end }}{{ if not $last }}<|im_end|>
{{ end }}
{{- else if eq .Role "tool" }}<|im_start|>user
<tool_response>
{{ .Content }}
</tool_response><|im_end|>
{{ end }}
{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
{{ end }}
{{- end }}
{{- else }}
{{- if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}{{ if .Prompt }}<|im_start|>user
{{ .Prompt }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}"""
PARAMETER stop "<|im_end|>"
PARAMETER stop "<|endoftext|>"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
SYSTEM """You are Qwen, created by Alibaba Cloud. You are a helpful assistant."""
'''

# https://ollama.com/library/qwen2.5-coder:latest/blobs/1e65450c3067
qwen_25_coder_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{- if .Suffix }}<|fim_prefix|>{{ .Prompt }}<|fim_suffix|>{{ .Suffix }}<|fim_middle|>
{{- else if .Messages }}
{{- if or .System .Tools }}<|im_start|>system
{{- if .System }}
{{ .System }}
{{- end }}
{{- if .Tools }}

# Tools

You may call one or more functions to assist with the user query.

You are provided with function signatures within <tools></tools>:
<tools>
{{- range .Tools }}
{"type": "function", "function": {{ .Function }}}
{{- end }}
</tools>

For each function call, return a json object with function name and arguments within <tool_call></tool_call> with NO other text. Do not include any backticks or ```json.
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>
{{- end }}<|im_end|>
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if eq .Role "user" }}<|im_start|>user
{{ .Content }}<|im_end|>
{{ else if eq .Role "assistant" }}<|im_start|>assistant
{{ if .Content }}{{ .Content }}
{{- else if .ToolCalls }}<tool_call>
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{ end }}</tool_call>
{{- end }}{{ if not $last }}<|im_end|>
{{ end }}
{{- else if eq .Role "tool" }}<|im_start|>user
<tool_response>
{{ .Content }}
</tool_response><|im_end|>
{{ end }}
{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
{{ end }}
{{- end }}
{{- else }}
{{- if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}{{ if .Prompt }}<|im_start|>user
{{ .Prompt }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}"""
SYSTEM """You are Qwen, created by Alibaba Cloud. You are a helpful assistant."""
'''

# https://ollama.com/library/qwen2.5vl:latest/blobs/a242d8dfdc8f
qwen_25_vl_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{- if .System -}}
<|im_start|>system
{{ .System }}<|im_end|>
{{- end -}}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if eq .Role "user" }}
<|im_start|>user
{{ .Content }}<|im_end|>
{{- else if eq .Role "assistant" }}
<|im_start|>assistant
{{ if .Content }}{{ .Content }}{{ if not $last }}<|im_end|>
{{- else -}}<|im_end|>{{- end -}}
{{- end -}}
{{- end -}}
{{- if and (ne .Role "assistant") $last }}
<|im_start|>assistant
{{ end -}}
{{- end }}"""
PARAMETER temperature 0.0001
SYSTEM """You are a helpful assistant."""
'''

# https://ollama.com/library/openthinker:latest/blobs/32695b892af8
openthinker_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
<|im_start|>{{ .Role }}<|im_sep|>
{{ .Content }}{{ if not $last }}<|im_end|>
{{ end }}
{{- if and (ne .Role "assistant") $last }}<|im_end|>
<|im_start|>assistant<|im_sep|>
{{ end }}
{{- end }}"""
'''


OLLAMA_TEMPLATES["qwen-25"] = qwen25_ollama
OLLAMA_TEMPLATES["qwen-2.5"] = qwen25_ollama
OLLAMA_TEMPLATES["qwen-25-coder"] = qwen_25_coder_ollama
OLLAMA_TEMPLATES["qwen-25-vl"] = qwen_25_vl_ollama
OLLAMA_TEMPLATES["openthinker"] = openthinker_ollama
OLLAMA_TEMPLATES["qwen-2"] = qwen25_ollama

# =========================================== Phi-4
_phi4_ollama_template = (
    "{{ if .System }}<|im_start|><|system|><|im_sep|>{{ .System }}<|im_end|>{{ end }}"
    "{{ if .Prompt }}<|im_start|><|user|><|im_sep|>{{ .Prompt }}<|im_end|>{{ end }}"
    "<|im_start|><|assistant|><|im_sep|>{{ .Response }}<|im_end|>"
)

# Ollama from https://www.ollama.com/library/phi4 is different
phi_4_ollama = f'''
FROM {{__FILE_LOCATION__}}
TEMPLATE """{_phi4_ollama_template}"""
PARAMETER stop "<|im_end|>"
PARAMETER stop "<|im_start|>"
PARAMETER stop "<|im_sep|>"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

# https://ollama.com/library/phi4-reasoning:latest/blobs/32695b892af8
phi_4_reasoning_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
<|im_start|>{{ .Role }}<|im_sep|>
{{ .Content }}{{ if not $last }}<|im_end|>
{{ end }}
{{- if and (ne .Role "assistant") $last }}<|im_end|>
<|im_start|>assistant<|im_sep|>
{{ end }}
{{- end }}"""
PARAMETER stop "<|im_start|>"
PARAMETER stop "<|im_end|>"
PARAMETER stop "<|im_sep|>"
'''

# https://ollama.com/library/phi4-mini:latest/blobs/813f53fdc6e5
phi_4_mini_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{- if or .System .Tools }}<|system|>{{ if .System }}{{ .System }}{{ end }}
{{- if .Tools }}{{ if not .System }}You are a helpful assistant with some tools.{{ end }}<|tool|>{{ .Tools }}<|/tool|><|end|>
{{- end }}
{{- end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if ne .Role "system" }}<|{{ .Role }}|>{{ .Content }}
{{- if .ToolCalls }}<|tool_call|>[{{ range .ToolCalls }}{"name":"{{ .Function.Name }}","arguments":{{ .Function.Arguments }}{{ end }}]<|/tool_call|>
{{- end }}
{{- if not $last }}<|end|>
{{- end }}
{{- if and (ne .Role "assistant") $last }}<|end|><|assistant|>{{ end }}
{{- end }}
{{- end }}"""
'''

# https://ollama.com/library/phi4-mini-reasoning:latest/blobs/c895a1f8e8c6
phi_4_mini_reasoning_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """
{{- if .System }}<|system|>{{ .System }}
{{- end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if ne .Role "system" }}<|{{ .Role }}|>{{ .Content }}
{{- if not $last }}<|end|>
{{- end }}
{{- if and (ne .Role "assistant") $last }}<|end|><|assistant|>{{ end }}
{{- end }}
{{- end }}"""
SYSTEM """Your name is Phi, an AI math expert developed by Microsoft."""
'''
OLLAMA_TEMPLATES["phi-4"] = phi_4_ollama
OLLAMA_TEMPLATES["phi-4-reasoning"] = phi_4_reasoning_ollama
OLLAMA_TEMPLATES["phi-4-mini"] = phi_4_mini_ollama
OLLAMA_TEMPLATES["phi-4-mini-reasoning"] = phi_4_mini_reasoning_ollama


# =========================================== Gemma-3
# Ollama from https://ollama.com/library/gemma3/blobs/e0a42594d802
gemma3_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 }}
{{- if or (eq .Role "user") (eq .Role "system") }}<start_of_turn>user
{{ .Content }}<end_of_turn>
{{ if $last }}<start_of_turn>model
{{ end }}
{{- else if eq .Role "assistant" }}<start_of_turn>model
{{ .Content }}{{ if not $last }}<end_of_turn>
{{ end }}
{{- end }}
{{- end }}"""
PARAMETER stop "<end_of_turn>"
PARAMETER stop "<eos>"
PARAMETER temperature 1.0
PARAMETER min_p 0.0
PARAMETER top_k 64
PARAMETER top_p 0.95
PARAMETER num_predict 32768
'''

# https://ollama.com/library/gemma3:270m/blobs/4b19ac7dd2fb
gemma3_270m_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{- $systemPromptAdded := false }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 }}
{{- if eq .Role "user" }}<start_of_turn>user
{{- if (and (not $systemPromptAdded) $.System) }}
{{- $systemPromptAdded = true }}
{{ $.System }}
{{ end }}
{{ .Content }}<end_of_turn>
{{ if $last }}<start_of_turn>model
{{ end }}
{{- else if eq .Role "assistant" }}<start_of_turn>model
{{ .Content }}{{ if not $last }}<end_of_turn>
{{ end }}
{{- end }}
{{- end }}
"""
PARAMETER stop "<end_of_turn>"
PARAMETER top_k 64
PARAMETER top_p 0.95
'''

OLLAMA_TEMPLATES["gemma-3"] = gemma3_ollama
OLLAMA_TEMPLATES["gemma3"] = gemma3_ollama
OLLAMA_TEMPLATES["gemma3-270m"] = gemma3_270m_ollama


# =========================================== Qwen-3
# Ollama template for Qwen-3 (see https://ollama.com/library/qwen3/blobs/eb4402837c78)
qwen3_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{- if .Messages }}
{{- if or .System .Tools }}<|im_start|>system
{{- if .System }}
{{ .System }}
{{- end }}
{{- if .Tools }}

# Tools

You may call one or more functions to assist with the user query.

You are provided with function signatures within <tools></tools> XML tags:
<tools>
{{- range .Tools }}
{"type": "function", "function": {{ .Function }}}
{{- end }}
</tools>

For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>
{{- end }}<|im_end|>
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if eq .Role "user" }}<|im_start|>user
{{ .Content }}<|im_end|>
{{ else if eq .Role "assistant" }}<|im_start|>assistant
{{ if .Content }}{{ .Content }}
{{- else if .ToolCalls }}<tool_call>
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{ end }}</tool_call>
{{- end }}{{ if not $last }}<|im_end|>
{{ end }}
{{- else if eq .Role "tool" }}<|im_start|>user
<tool_response>
{{ .Content }}
</tool_response><|im_end|>
{{ end }}
{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
{{ end }}
{{- end }}
{{- else }}
{{- if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}{{ if .Prompt }}<|im_start|>user
{{ .Prompt }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}"""
PARAMETER stop "<|im_end|>"
PARAMETER stop "<|im_start|>"
PARAMETER temperature 0.6
PARAMETER min_p 0.0
PARAMETER top_k 20
PARAMETER top_p 0.95
PARAMETER repeat_penalty 1
'''

qwen3_template_eos_token = "<|im_end|>"
OLLAMA_TEMPLATES["qwen-3"] = qwen3_ollama
OLLAMA_TEMPLATES["qwen3"] = qwen3_ollama


# =========================================== Gemma-3n
# Ollama from https://ollama.com/library/gemma3n/blobs/e0a42594d802
gemma3n_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 }}
{{- if or (eq .Role "user") (eq .Role "system") }}<start_of_turn>user
{{ .Content }}<end_of_turn>
{{ if $last }}<start_of_turn>model
{{ end }}
{{- else if eq .Role "assistant" }}<start_of_turn>model
{{ .Content }}{{ if not $last }}<end_of_turn>
{{ end }}
{{- end }}
{{- end }}"""
'''

OLLAMA_TEMPLATES["gemma-3n"] = gemma3n_ollama
OLLAMA_TEMPLATES["gemma3n"] = gemma3n_ollama

# =========================================== GPT-OSS

# Ollama from https://ollama.com/library/gpt-oss:latest/blobs/fa6710a93d78
gptoss_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """<|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI.
Knowledge cutoff: 2024-06
Current date: {{ currentDate }}
{{- if and .IsThinkSet .Think (ne .ThinkLevel "") }}

Reasoning: {{ .ThinkLevel }}
{{- else if or (not .IsThinkSet) (and .IsThinkSet .Think) }}

Reasoning: medium
{{- end }}

{{- $hasNonBuiltinTools := false }}
{{- if .Tools -}}
{{- $hasBrowserSearch := false }}
{{- $hasBrowserOpen := false }}
{{- $hasBrowserFind := false }}
{{- $hasPython := false }}
  {{- range .Tools }}
    {{- if eq .Function.Name "browser.search" -}}{{- $hasBrowserSearch = true -}}
    {{- else if eq .Function.Name "browser.open" -}}{{- $hasBrowserOpen = true -}}
    {{- else if eq .Function.Name "browser.find" -}}{{- $hasBrowserFind = true -}}
    {{- else if eq .Function.Name "python" -}}{{- $hasPython = true -}}
    {{- else }}{{ $hasNonBuiltinTools = true -}}
    {{- end }}
  {{- end }}
{{- if or $hasBrowserSearch $hasBrowserOpen $hasBrowserFind $hasPython }}

# Tools
{{- if or $hasBrowserSearch $hasBrowserOpen $hasBrowserFind }}

## browser

// Tool for browsing.
// The `cursor` appears in brackets before each browsing display: `[{cursor}]`.
// Cite information from the tool using the following format:
// `【{cursor}†L{line_start}(-L{line_end})?】`, for example: `【6†L9-L11】` or `【8†L3】`.
// Do not quote more than 10 words directly from the tool output.
// sources=web (default: web)
namespace browser {
{{- if $hasBrowserSearch }}

// Searches for information related to `query` and displays `topn` results.
type search = (_: {
query: string,
topn?: number, // default: 10
source?: string,
}) => any;
{{- end }}
{{- if $hasBrowserOpen }}

// Opens the link `id` from the page indicated by `cursor` starting at line number `loc`, showing `num_lines` lines.
// Valid link ids are displayed with the formatting: `【{id}†.*】`.
// If `cursor` is not provided, the most recent page is implied.
// If `id` is a string, it is treated as a fully qualified URL associated with `source`.
// If `loc` is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available.
// Use this function without `id` to scroll to a new location of an opened page.
type open = (_: {
id?: number | string, // default: -1
cursor?: number, // default: -1
loc?: number, // default: -1
num_lines?: number, // default: -1
view_source?: boolean, // default: false
source?: string,
}) => any;
{{- end }}
{{- if $hasBrowserFind }}

// Finds exact matches of `pattern` in the current page, or the page given by `cursor`.
type find = (_: {
pattern: string,
cursor?: number, // default: -1
}) => any;
{{- end }}

} // namespace browser
{{- end }}{{/* end if has browser tools */}}
{{- if $hasPython }}

## python

Use this tool to execute Python code in your chain of thought. The code will not be shown to the user. This tool should be used for internal reasoning, but not for code that is intended to be visible to the user (e.g. when creating plots, tables, or files).

When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 120.0 seconds. The drive at '/mnt/data' can be used to save and persist user files. Internet access for this session is UNKNOWN. Depends on the cluster.
{{- end }}{{/* end if hasPython */}}
{{- end }}{{/* end if has any built-in tools */}}
{{- end }}{{/* end if .Tools */}}

# Valid channels: analysis, commentary, final. Channel must be included for every message.{{ if $hasNonBuiltinTools }}
Calls to these tools must go to the commentary channel: 'functions'.
{{- end -}}<|end|>{{/* end of system */ -}}
{{- if or $hasNonBuiltinTools .System -}}
<|start|>developer<|message|>{{- if $hasNonBuiltinTools }}# Tools

## functions

namespace functions {
{{- range .Tools }}
{{- if not (or (eq .Function.Name "browser.search") (eq .Function.Name "browser.open") (eq .Function.Name "browser.find") (eq .Function.Name "python")) }}
{{if .Function.Description }}
// {{ .Function.Description }}
{{- end }}
{{- if and .Function.Parameters.Properties (gt (len .Function.Parameters.Properties) 0) }}
type {{ .Function.Name }} = (_: {
{{- range $name, $prop := .Function.Parameters.Properties }}
{{- if $prop.Description }}
  // {{ $prop.Description }}
{{- end }}
  {{ $name }}: {{ if gt (len $prop.Type) 1 }}{{ range $i, $t := $prop.Type }}{{ if $i }} | {{ end }}{{ $t }}{{ end }}{{ else }}{{ index $prop.Type 0 }}{{ end }},
{{- end }}
}) => any;
{{- else }}
type {{ .Function.Name }} = () => any;
{{- end }}
{{- end }}{{/* end if not browser tool */}}
{{- end }}{{/* end of range .Tools */}}

} // namespace functions
{{- end }}{{/* end if hasNonBuiltinTools */}}
{{- if .System}}

# Instructions

{{ .System }}
{{- end -}}
<|end|>
{{- end -}}
{{- /* Find the index of the last user message */ -}}
{{- $lastUserIdx := -1 }}
{{- $prefillingContent := false }}
{{- $prefillingThinkingOnly := false }}
{{- range $i, $msg := .Messages }}
  {{- $last := eq (len (slice $.Messages $i)) 1 -}}
  {{- if eq $msg.Role "user" }}
    {{- $lastUserIdx = $i }}
  {{- end -}}
  {{- if and $last (eq $msg.Role "assistant") (gt (len $msg.Content) 0) }}
    {{- $prefillingContent = true }}
  {{- else if and $last (eq $msg.Role "assistant") (gt (len $msg.Thinking) 0) }}
    {{- $prefillingThinkingOnly = true }}
  {{- end }}
{{- end -}}
{{- /* Now render messages */ -}}
{{- range $i, $msg := .Messages }}
  {{- $last := eq (len (slice $.Messages $i)) 1 -}}
  {{- if (ne $msg.Role "system") -}}
    {{- if eq $msg.Role "tool" -}}
      {{- if or (eq $msg.ToolName "python") (eq $msg.ToolName "browser.search") (eq $msg.ToolName "browser.open") (eq $msg.ToolName "browser.find") -}}
        <|start|>{{ $msg.ToolName }} to=assistant<|message|>{{ $msg.Content }}<|end|>
      {{- else -}}
        <|start|>functions.{{ $msg.ToolName }} to=assistant<|message|>{{ $msg.Content }}<|end|>
      {{- end -}}
    {{- else if eq $msg.Role "assistant" -}}
      {{- if and $msg.Thinking (gt $i $lastUserIdx) -}}{{- /* Show thinking only after last user message */ -}}
      <|start|>assistant<|channel|>analysis<|message|>{{ $msg.Thinking }}{{- if not $prefillingThinkingOnly -}}<|end|>{{- end -}}
      {{- end -}}
      {{- if gt (len $msg.Content) 0 -}}
        <|start|>assistant<|channel|>final<|message|>{{ $msg.Content }}{{- if not $prefillingContent -}}<|end|>{{- end -}}
      {{- end -}}
      {{- if gt (len $msg.ToolCalls) 0 -}}
        {{- range $j, $toolCall := $msg.ToolCalls -}}
          {{- $isBuiltin := or (eq $toolCall.Function.Name "python") (eq $toolCall.Function.Name "browser.search") (eq $toolCall.Function.Name "browser.open") (eq $toolCall.Function.Name "browser.find") -}}
          <|start|>assistant<|channel|>{{ if $isBuiltin }}analysis{{ else }}commentary{{ end }} to={{ if not $isBuiltin}}functions.{{end}}{{ $toolCall.Function.Name }} <|constrain|>json<|message|>{{ $toolCall.Function.Arguments }}<|call|>
        {{- end -}}
      {{- end -}}
    {{- else if eq $msg.Role "user" -}}
      <|start|>{{ $msg.Role }}<|message|>{{ $msg.Content }}<|end|>
    {{- end }}
  {{- else }}
  {{- end }}
{{- end -}}
{{- if not (or $prefillingContent $prefillingThinkingOnly) -}}
<|start|>assistant
{{- end -}}"""
PARAMETER temperature 1.0
PARAMETER top_k 0
PARAMETER top_p 1.0
'''

OLLAMA_TEMPLATES["gpt-oss"] = gptoss_ollama
OLLAMA_TEMPLATES["gptoss"] = gptoss_ollama


# =========================================== Qwen3

# Ollama from https://ollama.com/library/qwen3/blobs/53e4ea15e8f5
qwen3_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """
{{- $lastUserIdx := -1 -}}
{{- range $idx, $msg := .Messages -}}
{{- if eq $msg.Role "user" }}{{ $lastUserIdx = $idx }}{{ end -}}
{{- end }}
{{- if or .System .Tools }}<|im_start|>system
{{ if .System }}
{{ .System }}
{{- end }}
{{- if .Tools }}

# Tools

You may call one or more functions to assist with the user query.

You are provided with function signatures within <tools></tools> XML tags:
<tools>
{{- range .Tools }}
{"type": "function", "function": {{ .Function }}}
{{- end }}
</tools>

For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>
{{- end -}}
<|im_end|>
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if eq .Role "user" }}<|im_start|>user
{{ .Content }}<|im_end|>
{{ else if eq .Role "assistant" }}<|im_start|>assistant
{{ if (and $.IsThinkSet (and .Thinking (or $last (gt $i $lastUserIdx)))) -}}
<think>{{ .Thinking }}</think>
{{ end -}}
{{ if .Content }}{{ .Content }}
{{- else if .ToolCalls }}<tool_call>
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{ end }}</tool_call>
{{- end }}{{ if not $last }}<|im_end|>
{{ end }}
{{- else if eq .Role "tool" }}<|im_start|>user
<tool_response>
{{ .Content }}
</tool_response><|im_end|>
{{ end }}
{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
{{ end }}
{{- end }}
"""
'''

OLLAMA_TEMPLATES["qwen3-instruct"] = qwen3_ollama
OLLAMA_TEMPLATES["qwen3-thinking"] = qwen3_ollama


# =========================================== Starling-LM


# Ollama from https://ollama.com/library/starling-lm:7b/blobs/4b21bfc435b4
starling_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}GPT4 Correct System: {{ .System }}<|end_of_turn|>
{{ end }}{{ if .Prompt }}GPT4 Correct User: {{ .Prompt }}<|end_of_turn|>
{{ end }}GPT4 Correct Assistant: {{ .Response }}<|end_of_turn|>"""
PARAMETER stop "<|end_of_turn|>"
PARAMETER stop "GPT4 Correct User:"
PARAMETER stop "GPT4 Correct Assistant:"
PARAMETER stop "GPT4 Correct System:"
PARAMETER temperature 1.5
PARAMETER min_p 0.1
'''

OLLAMA_TEMPLATES["starling"] = starling_ollama


# =========================================== Yi-chat


# Ollama from https://ollama.com/library/yi:34b-chat/blobs/62fbfd9ed093
yi_chat_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{ if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}{{ if .Prompt }}<|im_start|>user
{{ .Prompt }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ .Response }}<|im_end|>"""
'''

OLLAMA_TEMPLATES["yi-chat"] = yi_chat_ollama

# =========================================== Granite

# Ollama from https://ollama.com/library/granite3.2:latest/blobs/3e7ca51acd6e
granite_32_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{- /*

------ MESSAGE PARSING ------

*/}}
{{- /*
Declare the prompt structure variables to be filled in from messages
*/}}
{{- $system := "" }}
{{- $documents := "" }}
{{- $documentCounter := 0 }}
{{- $thinking := false }}
{{- $citations := false }}
{{- $hallucinations := false }}
{{- $length := "" }}

{{- /*
Loop over messages and look for a user-provided system message and documents
*/ -}}
{{- range .Messages }}

    {{- /* User defined system prompt(s) */}}
    {{- if (eq .Role "system")}}
        {{- if (ne $system "") }}
            {{- $system = print $system " " }}
        {{- end}}
        {{- $system = print $system .Content }}
    {{- end}}

    {{- /*
    NOTE: Since Ollama collates consecutive roles, for control and documents, we
        work around this by allowing the role to contain a qualifier after the
        role string.
    */ -}}

    {{- /* Role specified thinking */ -}}
    {{- if (and (ge (len .Role) 7) (eq (slice .Role 0 7) "control")) }}
        {{- if (eq .Content "thinking")}}{{- $thinking = true }}{{- end}}
        {{- if (eq .Content "citations")}}{{- $citations = true }}{{- end}}
        {{- if (eq .Content "hallucinations")}}{{- $hallucinations = true }}{{- end}}
        {{- if (and (ge (len .Content) 7) (eq (slice .Content 0 7) "length "))}}
            {{- $length = print ` {"length": "` (slice .Content 7) `"}` }}
        {{- end}}
    {{- end}}

    {{- /* Role specified document */ -}}
    {{- if (and (ge (len .Role) 8) (eq (slice .Role 0 8) "document")) }}
        {{- if (ne $documentCounter 0)}}
            {{- $documents = print $documents " "}}
        {{- end}}
        {{- $identifier := $documentCounter}}
        {{- if (ge (len .Role) 9) }}
            {{- $identifier = (slice .Role 8)}}
        {{- end}}
        {{- $documents = print $documents "Document " $identifier "" .Content}}
        {{- $documentCounter = len (printf "a%*s" $documentCounter "")}}
    {{- end}}
{{- end}}

{{- /*
If no user message provided, build the default system message
*/ -}}
{{- if eq $system "" }}
    {{- $system = "Knowledge Cutoff Date: April 2024.You are Granite, developed by IBM."}}

    {{- /* Add Tools prompt */}}
    {{- if .Tools }}
        {{- $system = print $system " You are a helpful AI assistant with access to the following tools. When a tool is required to answer the user's query, respond with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request." }}
    {{- end}}

    {{- /* Add documents prompt */}}
    {{- if $documents }}
        {{- if .Tools }}
            {{- $system = print $system " "}}
        {{- else }}
            {{- $system = print $system " "}}
        {{- end}}
        {{- $system = print $system "Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data." }}
        {{- if $citations}}
            {{- $system = print $system " In your response, use the symbols <co> and </co> to indicate when a fact comes from a document in the search result, e.g <co>0</co> for a fact from document 0. Afterwards, list all the citations with their corresponding documents in an ordered list."}}
        {{- end}}
        {{- if $hallucinations}}
            {{- $system = print $system "Finally, after the response is written, include a numbered list of sentences from the response that are potentially hallucinated and not based in the documents."}}
        {{- end}}
    {{- end}}

    {{- /* Prompt without tools or documents */}}
    {{- if (and (not .Tools) (not $documents)) }}
        {{- $system = print $system " You are a helpful AI assistant."}}
        {{- if $thinking}}
            {{- $system = print $system "Respond to every user query in a comprehensive and detailed way. You can write down your thought process before responding. Write your thoughts after 'Here is my thought process:' and write your response after 'Here is my response:' for each user query."}}
        {{- end}}
    {{- end}}

    {{- /* Add thinking prompt if no tools or documents */}}
    {{- if (and $thinking (not .Tools) (not $documents)) }}
        {{- $system = print $system " You are a helpful AI assistant.Respond to every user query in a comprehensive and detailed way. You can write down your thought process before responding. Write your thoughts after 'Here is my thought process:' and write your response after 'Here is my response:' for each user query."}}
    {{- end}}

{{- end}}
{{- /*

------ TEMPLATE EXPANSION ------

*/}}
{{- /* System Prompt */ -}}
<|start_of_role|>system<|end_of_role|>{{- $system }}<|end_of_text|>

{{- /* Tools */ -}}
{{- if .Tools }}
<|start_of_role|>tools<|end_of_role|>[
{{- range $index, $_ := .Tools }}
{{ . }}
{{- if and (ne (len (slice $.Tools $index)) 1) (gt (len $.Tools) 1) }},
{{- end}}
{{- end }}
]
{{- end}}

{{- /* Documents */ -}}
{{- if $documents }}
<|start_of_role|>documents<|end_of_role|>
{{ $documents }}<|end_of_text|>
{{- end}}

{{- /* Standard Messages */}}
{{- range $index, $_ := .Messages }}
{{- if (and
    (ne .Role "system")
    (or (lt (len .Role) 7) (ne (slice .Role 0 7) "control"))
    (or (lt (len .Role) 8) (ne (slice .Role 0 8) "document"))
)}}
<|start_of_role|>
{{- if eq .Role "tool" }}tool_response
{{- else }}{{ .Role }}
{{- end }}<|end_of_role|>
{{- if .Content }}{{ .Content }}
{{- else if .ToolCalls }}<|tool_call|>
{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{- end }}
{{- end }}
{{- if eq (len (slice $.Messages $index)) 1 }}
{{- if eq .Role "assistant" }}
{{- else }}<|end_of_text|>
<|start_of_role|>assistant<|end_of_role|>
{{- end -}}
{{- else }}<|end_of_text|>
{{- end }}
{{- end }}
{{- end }}
"""
'''

# granite-3.2-vision https://ollama.com/library/granite3.2-vision:latest/blobs/579046ba1157
granite_32_vision_ollama = '''
FROM {__FILE_LOCATION__}
TEMPLATE """{{- /* Tools */ -}}
{{- if .Tools -}}
<|start_of_role|>available_tools<|end_of_role|>
{{- range $index, $_ := .Tools }}
{{- $last := eq (len (slice $.Tools $index)) 1 }}
{{ . }}
{{- if not $last }}
{{ end}}
{{- end -}}
<|end_of_text|>
{{ end }}

{{- /* System Prompt */ -}}
{{- if and (gt (len .Messages) 0) (eq (index .Messages 0).Role "system") -}}
<|system|>
{{(index .Messages 0).Content}}
{{- else -}}
<|system|>
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
{{- end }}

{{- /*Main message loop*/ -}}
{{- range $index, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $index)) 1 }}
{{- if eq .Role "system" }}

{{- else if eq .Role "user" }}
<|user|>
{{.Content}}

{{- else if eq .Role "assistant" }}
<|assistant|>
{{- if .Content }}
{{.Content}}
<|end_of_text|>
{{ end }}

{{- else if eq .Role "assistant_tool_call" }}
<|start_of_role|>assistant<|end_of_role|><|tool_call|>{{.Content}}<|end_of_text|>

{{- else if eq .Role "tool_response" }}
<|start_of_role|>tool_response<|end_of_role|>{{.Content}}<|end_of_text|>
{{- end }}

{{- /* Add generation prompt */ -}}
{{ if $last }}
{{- if eq .Role "assistant" }}
{{- else }}
<|assistant|>
{{- end }}
{{- end }}
{{- end }}"""
PARAMETER num_ctx 16384
PARAMETER temperature 0
SYSTEM """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions."""
'''

OLLAMA_TEMPLATES["granite-32"] = granite_32_ollama
OLLAMA_TEMPLATES["granite-32-vision"] = granite_32_vision_ollama


OLLAMA_TEMPLATE_TO_MODEL_MAPPER = {
    "phi-3.5": (
        "unsloth/Phi-3.5-mini-instruct-bnb-4bit",
        "unsloth/Phi-3.5-mini-instruct",
        "microsoft/Phi-3.5-mini-instruct",
    ),
    "phi-3": (
        "unsloth/Phi-3-mini-4k-instruct-bnb-4bit",
        "unsloth/Phi-3-mini-4k-instruct",
        "microsoft/Phi-3-mini-4k-instruct",
        "unsloth/Phi-3-medium-4k-instruct-bnb-4bit",
        "unsloth/Phi-3-medium-4k-instruct",
        "microsoft/Phi-3-medium-4k-instruct",
        "unsloth/Phi-3-mini-4k-instruct-v0-bnb-4bit",
        "unsloth/Phi-3-mini-4k-instruct-v0",
    ),
    "phi-4": (
        "unsloth/phi-4-unsloth-bnb-4bit",
        "unsloth/phi-4",
        "microsoft/phi-4",
        "unsloth/phi-4-bnb-4bit",
    ),
    "phi-4-reasoning": (
        "unsloth/phi-4-reasoning-unsloth-bnb-4bit",
        "unsloth/phi-4-reasoning",
        "microsoft/Phi-4-reasoning",
        "unsloth/phi-4-reasoning-bnb-4bit",
        "unsloth/phi-4-reasoning-plus-unsloth-bnb-4bit",
        "unsloth/phi-4-reasoning-plus",
        "microsoft/Phi-4-reasoning-plus",
        "unsloth/phi-4-reasoning-plus-bnb-4bit",
    ),
    "phi-4-mini": (
        "unsloth/Phi-4-mini-instruct-unsloth-bnb-4bit",
        "unsloth/Phi-4-mini-instruct",
        "microsoft/Phi-4-mini-instruct",
        "unsloth/Phi-4-mini-instruct-bnb-4bit",
    ),
    "phi-4-mini-reasoning": (
        "unsloth/phi-4-mini-reasoning-unsloth-bnb-4bit",
        "unsloth/phi-4-mini-reasoning",
        "microsoft/Phi-4-mini-reasoning",
        "unsloth/phi-4-mini-reasoning-bnb-4bit",
    ),
    "mistral": (
        "unsloth/mistral-7b-instruct-v0.1-bnb-4bit",
        "unsloth/mistral-7b-instruct-v0.1",
        "mistralai/Mistral-7B-Instruct-v0.1",
        "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
        "unsloth/mistral-7b-instruct-v0.2",
        "mistralai/Mistral-7B-Instruct-v0.2",
    ),
    "mistral-v03": (
        "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
        "unsloth/mistral-7b-instruct-v0.3",
        "mistralai/Mistral-7B-Instruct-v0.3",
        "unsloth/Mistral-Large-Instruct-2407-bnb-4bit",
        "mistralai/Mistral-Large-Instruct-2407",
    ),
    "mistral-small": (
        "unsloth/Mistral-Small-Instruct-2409-bnb-4bit",
        "unsloth/Mistral-Small-Instruct-2409",
        "mistralai/Mistral-Small-Instruct-2409",
        "unsloth/Mistral-Small-24B-Instruct-2501-unsloth-bnb-4bit",
        "unsloth/Mistral-Small-24B-Instruct-2501",
        "mistralai/Mistral-Small-24B-Instruct-2501",
        "unsloth/Mistral-Small-24B-Instruct-2501-bnb-4bit",
    ),
    "mistral-small-31": (
        "unsloth/Mistral-Small-3.1-24B-Instruct-2503-unsloth-bnb-4bit",
        "unsloth/Mistral-Small-3.1-24B-Instruct-2503",
        "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
        "unsloth/Mistral-Small-3.1-24B-Instruct-2503-bnb-4bit",
    ),
    "mistral-small-32": (
        "unsloth/Mistral-Small-3.2-24B-Instruct-2506-unsloth-bnb-4bit",
        "unsloth/Mistral-Small-3.2-24B-Instruct-2506",
        "mistralai/Mistral-Small-3.2-24B-Instruct-2506",
        "unsloth/Mistral-Small-3.2-24B-Instruct-2506-bnb-4bit",
    ),
    "mixtral": (
        "unsloth/Mixtral-8x7B-Instruct-v0.1-unsloth-bnb-4bit",
        "unsloth/Mixtral-8x7B-Instruct-v0.1",
        "mistralai/Mixtral-8x7B-Instruct-v0.1",
        "unsloth/Mixtral-8x7B-Instruct-v0.1-bnb-4bit",
    ),
    "mistral-nemo": (
        "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
        "unsloth/Mistral-Nemo-Instruct-2407",
        "mistralai/Mistral-Nemo-Instruct-2407",
    ),
    "codestral": (
        "mistralai/Codestral-22B-v0.1",
        "mistral-community/Codestral-22B-v0.1",
    ),
    "devstral": (
        "unsloth/Devstral-Small-2505-unsloth-bnb-4bit",
        "unsloth/Devstral-Small-2505",
        "mistralai/Devstral-Small-2505",
        "unsloth/Devstral-Small-2505-bnb-4bit",
        "unsloth/Devstral-Small-2507-unsloth-bnb-4bit",
        "unsloth/Devstral-Small-2507",
        "mistralai/Devstral-Small-2507",
        "unsloth/Devstral-Small-2507-bnb-4bit",
    ),
    "magistral": (
        "unsloth/Magistral-Small-2506-unsloth-bnb-4bit",
        "unsloth/Magistral-Small-2506",
        "mistralai/Magistral-Small-2506",
        "unsloth/Magistral-Small-2506-bnb-4bit",
        "unsloth/Magistral-Small-2507-unsloth-bnb-4bit",
        "unsloth/Magistral-Small-2507",
        "mistralai/Magistral-Small-2507",
        "unsloth/Magistral-Small-2507-bnb-4bit",
        "unsloth/Magistral-Small-2509-unsloth-bnb-4bit",
        "unsloth/Magistral-Small-2509",
        "mistralai/Magistral-Small-2509",
        "unsloth/Magistral-Small-2509-bnb-4bit",
    ),
    "tinyllama": (
        "unsloth/tinyllama-chat-bnb-4bit",
        "unsloth/tinyllama-chat",
        "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    ),
    "llama": (
        "unsloth/llama-2-7b-bnb-4bit",
        "unsloth/llama-2-7b",
        "meta-llama/Llama-2-7b-hf",
        "unsloth/llama-2-13b-bnb-4bit",
        "unsloth/llama-2-13b",
        "meta-llama/Llama-2-13b-hf",
        "unsloth/llama-2-7b-chat-bnb-4bit",
        "unsloth/llama-2-7b-chat",
        "meta-llama/Llama-2-7b-chat-hf",
    ),
    "llama3": (
        "unsloth/llama-3-8b-Instruct-bnb-4bit",
        "unsloth/llama-3-8b-Instruct",
        "meta-llama/Meta-Llama-3-8B-Instruct",
        "unsloth/llama-3-70b-Instruct-bnb-4bit",
        "meta-llama/Meta-Llama-3-70B-Instruct",
    ),
    "llama-3.1": (
        "unsloth/Meta-Llama-3.1-8B-Instruct-unsloth-bnb-4bit",
        "unsloth/Meta-Llama-3.1-8B-Instruct",
        "meta-llama/Meta-Llama-3.1-8B-Instruct",
        "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
        "unsloth/Llama-3.1-8B-Instruct-unsloth-bnb-4bit",
        "unsloth/Llama-3.1-8B-Instruct",
        "meta-llama/Llama-3.1-8B-Instruct",
        "unsloth/Llama-3.1-8B-Instruct-bnb-4bit",
        "unsloth/Meta-Llama-3.1-405B-Instruct-bnb-4bit",
        "meta-llama/Meta-Llama-3.1-405B-Instruct",
        "unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit",
        "unsloth/Meta-Llama-3.1-70B-Instruct",
        "meta-llama/Meta-Llama-3.1-70B-Instruct",
        "unsloth/Hermes-3-Llama-3.1-8B-bnb-4bit",
        "unsloth/Hermes-3-Llama-3.1-8B",
        "NousResearch/Hermes-3-Llama-3.1-8B",
        "unsloth/Hermes-3-Llama-3.1-70B-bnb-4bit",
        "unsloth/Hermes-3-Llama-3.1-70B",
        "NousResearch/Hermes-3-Llama-3.1-70B",
        "unsloth/Hermes-3-Llama-3.1-405B-bnb-4bit",
        "NousResearch/Hermes-3-Llama-3.1-405B",
        "unsloth/Llama-3.1-Tulu-3-8B-bnb-4bit",
        "unsloth/Llama-3.1-Tulu-3-8B",
        "allenai/Llama-3.1-Tulu-3-8B",
        "unsloth/Llama-3.1-Tulu-3-70B-bnb-4bit",
        "unsloth/Llama-3.1-Tulu-3-70B",
        "allenai/Llama-3.1-Tulu-3-70B",
    ),
    "llama-31-storm": (
        "unsloth/Llama-3.1-Storm-8B-bnb-4bit",
        "unsloth/Llama-3.1-Storm-8B",
        "akjindal53244/Llama-3.1-Storm-8B",
    ),
    "llama-31-nemotron": (
        "unsloth/Llama-3.1-Nemotron-70B-Instruct-bnb-4bit",
        "unsloth/Llama-3.1-Nemotron-70B-Instruct",
        "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
    ),
    "llama-3.2": (
        "unsloth/Llama-3.2-1B-Instruct-unsloth-bnb-4bit",
        "unsloth/Llama-3.2-1B-Instruct",
        "meta-llama/Llama-3.2-1B-Instruct",
        "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
        "unsloth/Llama-3.2-3B-Instruct-unsloth-bnb-4bit",
        "unsloth/Llama-3.2-3B-Instruct",
        "meta-llama/Llama-3.2-3B-Instruct",
        "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
    ),
    "llama-32-vision": (
        "unsloth/Llama-3.2-11B-Vision-Instruct-unsloth-bnb-4bit",
        "unsloth/Llama-3.2-11B-Vision-Instruct",
        "meta-llama/Llama-3.2-11B-Vision-Instruct",
        "unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit",
        "unsloth/Llama-3.2-90B-Vision-Instruct-bnb-4bit",
        "unsloth/Llama-3.2-90B-Vision-Instruct",
        "meta-llama/Llama-3.2-90B-Vision-Instruct",
    ),
    "llama-3.3": (
        "unsloth/Llama-3.3-70B-Instruct-bnb-4bit",
        "unsloth/Llama-3.3-70B-Instruct",
        "meta-llama/Llama-3.3-70B-Instruct",
    ),
    "gemma": (
        "unsloth/gemma-7b-it-bnb-4bit",
        "unsloth/gemma-7b-it",
        "google/gemma-7b-it",
        "google/gemma-2b-it",
        "unsloth/gemma-1.1-2b-it-bnb-4bit",
        "unsloth/gemma-1.1-2b-it",
        "google/gemma-1.1-2b-it",
        "unsloth/gemma-1.1-7b-it-bnb-4bit",
        "unsloth/gemma-1.1-7b-it",
        "google/gemma-1.1-7b-it",
    ),
    "gemma2": (
        "unsloth/gemma-2-9b-it-bnb-4bit",
        "unsloth/gemma-2-9b-it",
        "google/gemma-2-9b-it",
        "unsloth/gemma-2-27b-it-bnb-4bit",
        "unsloth/gemma-2-27b-it",
        "google/gemma-2-27b-it",
        "unsloth/gemma-2-2b-it-bnb-4bit",
        "unsloth/gemma-2-2b-it",
        "google/gemma-2-2b-it",
    ),
    "gemma-3": (
        "unsloth/gemma-3-1b-it-unsloth-bnb-4bit",
        "unsloth/gemma-3-1b-it",
        "google/gemma-3-1b-it",
        "unsloth/gemma-3-1b-it-bnb-4bit",
        "unsloth/gemma-3-4b-it-unsloth-bnb-4bit",
        "unsloth/gemma-3-4b-it",
        "google/gemma-3-4b-it",
        "unsloth/gemma-3-4b-it-bnb-4bit",
        "unsloth/gemma-3-12b-it-unsloth-bnb-4bit",
        "unsloth/gemma-3-12b-it",
        "google/gemma-3-12b-it",
        "unsloth/gemma-3-12b-it-bnb-4bit",
        "unsloth/gemma-3-27b-it-unsloth-bnb-4bit",
        "unsloth/gemma-3-27b-it",
        "google/gemma-3-27b-it",
        "unsloth/gemma-3-27b-it-bnb-4bit",
        "unsloth/medgemma-4b-it-unsloth-bnb-4bit",
        "unsloth/medgemma-4b-it",
        "google/medgemma-4b-it",
        "unsloth/medgemma-4b-it-bnb-4bit",
        "unsloth/medgemma-27b-text-it-unsloth-bnb-4bit",
        "unsloth/medgemma-27b-text-it",
        "google/medgemma-27b-text-it",
        "unsloth/medgemma-27b-text-it-bnb-4bit",
    ),
    "gemma3n": (
        "unsloth/gemma-3n-E4B-it-unsloth-bnb-4bit",
        "unsloth/gemma-3n-E4B-it",
        "google/gemma-3n-E4B-it",
        "unsloth/gemma-3n-E4B-it-unsloth-bnb-4bit",
        "unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit",
        "unsloth/gemma-3n-E2B-it",
        "google/gemma-3n-E2B-it",
        "unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit",
    ),
    "gemma3-270m": (
        "unsloth/gemma-3-270m-it-unsloth-bnb-4bit",
        "unsloth/gemma-3-270m-it",
        "google/gemma-3-270m-it",
        "unsloth/gemma-3-270m-it-bnb-4bit",
    ),
    "qwen-25": (
        "unsloth/Qwen2.5-0.5B-Instruct-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-0.5B-Instruct",
        "Qwen/Qwen2.5-0.5B-Instruct",
        "unsloth/Qwen2.5-0.5B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-1.5B-Instruct-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-1.5B-Instruct",
        "Qwen/Qwen2.5-1.5B-Instruct",
        "unsloth/Qwen2.5-1.5B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-3B-Instruct-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-3B-Instruct",
        "Qwen/Qwen2.5-3B-Instruct",
        "unsloth/Qwen2.5-3B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-7B-Instruct-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-7B-Instruct",
        "Qwen/Qwen2.5-7B-Instruct",
        "unsloth/Qwen2.5-7B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-14B-Instruct-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-14B-Instruct",
        "Qwen/Qwen2.5-14B-Instruct",
        "unsloth/Qwen2.5-14B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-32B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-32B-Instruct",
        "Qwen/Qwen2.5-32B-Instruct",
        "unsloth/Qwen2.5-72B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-72B-Instruct",
        "Qwen/Qwen2.5-72B-Instruct",
        "unsloth/Qwen2.5-Math-1.5B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Math-1.5B-Instruct",
        "Qwen/Qwen2.5-Math-1.5B-Instruct",
        "unsloth/Qwen2.5-Math-7B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Math-7B-Instruct",
        "Qwen/Qwen2.5-Math-7B-Instruct",
        "unsloth/Qwen2.5-Math-72B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Math-72B-Instruct",
        "Qwen/Qwen2.5-Math-72B-Instruct",
    ),
    "qwen-25-coder": (
        "unsloth/Qwen2.5-Coder-0.5B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Coder-0.5B-Instruct",
        "Qwen/Qwen2.5-Coder-0.5B-Instruct",
        "unsloth/Qwen2.5-Coder-1.5B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Coder-1.5B-Instruct",
        "Qwen/Qwen2.5-Coder-1.5B-Instruct",
        "unsloth/Qwen2.5-Coder-3B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Coder-3B-Instruct",
        "Qwen/Qwen2.5-Coder-3B-Instruct",
        "unsloth/Qwen2.5-Coder-7B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Coder-7B-Instruct",
        "Qwen/Qwen2.5-Coder-7B-Instruct",
        "unsloth/Qwen2.5-Coder-14B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Coder-14B-Instruct",
        "Qwen/Qwen2.5-Coder-14B-Instruct",
        "unsloth/Qwen2.5-Coder-32B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-Coder-32B-Instruct",
        "Qwen/Qwen2.5-Coder-32B-Instruct",
    ),
    "qwen-25-vl": (
        "unsloth/Qwen2.5-VL-3B-Instruct-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-VL-3B-Instruct",
        "Qwen/Qwen2.5-VL-3B-Instruct",
        "unsloth/Qwen2.5-VL-3B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-VL-7B-Instruct-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-VL-7B-Instruct",
        "Qwen/Qwen2.5-VL-7B-Instruct",
        "unsloth/Qwen2.5-VL-7B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-VL-32B-Instruct-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-VL-32B-Instruct",
        "Qwen/Qwen2.5-VL-32B-Instruct",
        "unsloth/Qwen2.5-VL-32B-Instruct-bnb-4bit",
        "unsloth/Qwen2.5-VL-72B-Instruct-unsloth-bnb-4bit",
        "unsloth/Qwen2.5-VL-72B-Instruct",
        "Qwen/Qwen2.5-VL-72B-Instruct",
        "unsloth/Qwen2.5-VL-72B-Instruct-bnb-4bit",
    ),
    "openthinker": (
        "unsloth/OpenThinker-7B-unsloth-bnb-4bit",
        "unsloth/OpenThinker-7B",
        "open-thoughts/OpenThinker-7B",
        "unsloth/OpenThinker-7B-bnb-4bit",
    ),
    "qwen-2": (
        "unsloth/Qwen2-0.5B-Instruct-bnb-4bit",
        "unsloth/Qwen2-0.5B-Instruct",
        "Qwen/Qwen2-0.5B-Instruct",
        "unsloth/Qwen2-1.5B-Instruct-bnb-4bit",
        "unsloth/Qwen2-1.5B-Instruct",
        "Qwen/Qwen2-1.5B-Instruct",
        "unsloth/Qwen2-7B-Instruct-bnb-4bit",
        "unsloth/Qwen2-7B-Instruct",
        "Qwen/Qwen2-7B-Instruct",
        "unsloth/Qwen2-70B-Instruct-bnb-4bit",
        "Qwen/Qwen2-70B-Instruct",
    ),
    "qwen3": (
        "unsloth/Qwen3-0.6B-unsloth-bnb-4bit",
        "unsloth/Qwen3-0.6B",
        "Qwen/Qwen3-0.6B",
        "unsloth/Qwen3-0.6B-bnb-4bit",
        "unsloth/Qwen3-1.7B-unsloth-bnb-4bit",
        "unsloth/Qwen3-1.7B",
        "Qwen/Qwen3-1.7B",
        "unsloth/Qwen3-1.7B-bnb-4bit",
        "unsloth/Qwen3-4B-unsloth-bnb-4bit",
        "unsloth/Qwen3-4B",
        "Qwen/Qwen3-4B",
        "unsloth/Qwen3-4B-bnb-4bit",
        "unsloth/Qwen3-8B-unsloth-bnb-4bit",
        "unsloth/Qwen3-8B",
        "Qwen/Qwen3-8B",
        "unsloth/Qwen3-8B-bnb-4bit",
        "unsloth/Qwen3-14B-unsloth-bnb-4bit",
        "unsloth/Qwen3-14B",
        "Qwen/Qwen3-14B",
        "unsloth/Qwen3-14B-bnb-4bit",
        "unsloth/Qwen3-32B-unsloth-bnb-4bit",
        "unsloth/Qwen3-32B",
        "Qwen/Qwen3-32B",
        "unsloth/Qwen3-32B-bnb-4bit",
        "unsloth/Qwen3-30B-A3B-unsloth-bnb-4bit",
        "unsloth/Qwen3-30B-A3B",
        "Qwen/Qwen3-30B-A3B",
        "unsloth/Qwen3-30B-A3B-bnb-4bit",
    ),
    "qwen3-instruct": (
        "unsloth/Qwen3-4B-Instruct-2507-unsloth-bnb-4bit",
        "unsloth/Qwen3-4B-Instruct-2507",
        "Qwen/Qwen3-4B-Instruct-2507",
        "unsloth/Qwen3-4B-Instruct-2507-bnb-4bit",
        "unsloth/Qwen3-30B-A3B-Instruct-2507",
        "Qwen/Qwen3-30B-A3B-Instruct-2507",
        "unsloth/Qwen3-Coder-30B-A3B-Instruct",
        "Qwen/Qwen3-Coder-30B-A3B-Instruct",
        "unsloth/Qwen3-4B-Instruct-2507-unsloth-bnb-4bit",
        "unsloth/Qwen3-4B-Instruct-2507",
        "Qwen/Qwen3-4B-Instruct-2507",
        "unsloth/Qwen3-4B-Instruct-2507-bnb-4bit",
    ),
    "qwen3-thinking": (
        "unsloth/QwQ-32B-Preview-bnb-4bit",
        "unsloth/QwQ-32B-Preview",
        "Qwen/QwQ-32B-Preview",
        "unsloth/QwQ-32B-unsloth-bnb-4bit",
        "unsloth/QwQ-32B",
        "Qwen/QwQ-32B",
        "unsloth/QwQ-32B-bnb-4bit",
        "unsloth/Qwen3-4B-Thinking-2507-unsloth-bnb-4bit",
        "unsloth/Qwen3-4B-Thinking-2507",
        "Qwen/Qwen3-4B-Thinking-2507",
        "unsloth/Qwen3-4B-Thinking-2507-bnb-4bit",
        "unsloth/Qwen3-30B-A3B-Thinking-2507",
        "Qwen/Qwen3-30B-A3B-Thinking-2507",
    ),
    "zephyr": (
        "unsloth/zephyr-sft-bnb-4bit",
        "unsloth/zephyr-sft",
        "HuggingFaceH4/mistral-7b-sft-beta",
    ),
    "chatml": (
        "unsloth/Hermes-2-Pro-Mistral-7B-bnb-4bit",
        "unsloth/Hermes-2-Pro-Mistral-7B",
        "NousResearch/Hermes-2-Pro-Mistral-7B",
        "unsloth/OpenHermes-2.5-Mistral-7B-bnb-4bit",
        "unsloth/OpenHermes-2.5-Mistral-7B",
        "teknium/OpenHermes-2.5-Mistral-7B",
    ),
    "gpt-oss": (
        "unsloth/gpt-oss-20b-unsloth-bnb-4bit",
        "unsloth/gpt-oss-20b",
        "openai/gpt-oss-20b",
        "unsloth/gpt-oss-20b-unsloth-bnb-4bit",
        "unsloth/gpt-oss-120b-unsloth-bnb-4bit",
        "unsloth/gpt-oss-120b",
        "openai/gpt-oss-120b",
        "unsloth/gpt-oss-120b-unsloth-bnb-4bit",
    ),
    "starling": (
        "unsloth/Starling-LM-7B-beta-bnb-4bit",
        "unsloth/Starling-LM-7B-beta",
        "Nexusflow/Starling-LM-7B-beta",
    ),
    "yi-chat": (
        "unsloth/yi-34b-chat-bnb-4bit",
        "01-ai/Yi-6B-Chat",
        "01-ai/Yi-34B-Chat",
    ),
    "granite-32": (
        "unsloth/granite-3.2-2b-instruct-unsloth-bnb-4bit",
        "unsloth/granite-3.2-2b-instruct",
        "ibm-granite/granite-3.2-2b-instruct",
        "unsloth/granite-3.2-2b-instruct-bnb-4bit",
        "unsloth/granite-3.2-8b-instruct-unsloth-bnb-4bit",
        "unsloth/granite-3.2-8b-instruct",
        "ibm-granite/granite-3.2-8b-instruct",
        "unsloth/granite-3.2-8b-instruct-bnb-4bit",
    ),
    "granite-32-vision": (
        "unsloth/granite-vision-3.2-2b-unsloth-bnb-4bit",
        "unsloth/granite-vision-3.2-2b",
        "ibm-granite/granite-vision-3.2-2b",
        "unsloth/granite-vision-3.2-2b-bnb-4bit",
    ),
}

MODEL_TO_OLLAMA_TEMPLATE_MAPPER = {}

for key, values in OLLAMA_TEMPLATE_TO_MODEL_MAPPER.items():
    for value in values:
        MODEL_TO_OLLAMA_TEMPLATE_MAPPER[value] = key

    # Get lowercased
    lowered_key = key.lower()
    for value in values:
        MODEL_TO_OLLAMA_TEMPLATE_MAPPER[value.lower()] = lowered_key


================================================
FILE: unsloth/registry/REGISTRY.md
================================================
## Model Registry

### Structure
```
unsloth
    -registry
        __init__.py
        registry.py
        _llama.py
        _mistral.py
        _phi.py
        ...
```

Each model is registered in a separate file within the `registry` module (e.g. `registry/_llama.py`).

Within each model registration file, a high-level `ModelMeta` is created for each model version, with the following structure:
```python
@dataclass
class ModelMeta:
    org: str
    base_name: str
    model_version: str
    model_info_cls: type[ModelInfo]
    model_sizes: list[str] = field(default_factory=list)
    instruct_tags: list[str] = field(default_factory=list)
    quant_types: list[QuantType] | dict[str, list[QuantType]] = field(default_factory=list)
    is_multimodal: bool = False
```

Each model then instantiates a global `ModelMeta` for its specific model version, defining how the model path (e.g. `unsloth/Llama-3.1-8B-Instruct`) is constructed since each model type has a different naming convention.
```python
LlamaMeta_3_1 = ModelMeta(
    org="meta-llama",
    base_name="Llama",
    instruct_tags=[None, "Instruct"],
    model_version="3.1",
    model_sizes=["8"],
    model_info_cls=LlamaModelInfo,
    is_multimodal=False,
    quant_types=[QuantType.NONE, QuantType.BNB, QuantType.UNSLOTH],
)
```

`LlamaModelInfo` is a subclass of `ModelInfo` that defines the model path for each model size and quant type.
```python
class LlamaModelInfo(ModelInfo):
    @classmethod
    def construct_model_name(cls, base_name, version, size, quant_type, instruct_tag):
        key = f"{base_name}-{version}-{size}B"
        return super().construct_model_name(base_name, version, size, quant_type, instruct_tag, key)
```

Once these constructs are defined, the model is registered by writing a register_xx_models function.
```python
def register_llama_3_1_models(include_original_model: bool = False):
    global _IS_LLAMA_3_1_REGISTERED
    if _IS_LLAMA_3_1_REGISTERED:
        return
    _register_models(LlamaMeta_3_1, include_original_model=include_original_model)
    _IS_LLAMA_3_1_REGISTERED = True
```

`_register_models` is a helper function that registers the model with the registry.  The global `_IS_XX_REGISTERED` is used to prevent duplicate registration.

Once a model is registered, registry.registry.MODEL_REGISTRY is updated with the model info and can be searched with `registry.search_models`.

### Tests

The `tests/test_model_registry.py` file contains tests for the model registry.

Also, each model registration file is an executable module that checks that all registered models are available on `huggingface_hub`.
```python
python unsloth.registry._llama.py
```

Prints the following (abridged) output:
```bash
✓ unsloth/Llama-3.1-8B
✓ unsloth/Llama-3.1-8B-bnb-4bit
✓ unsloth/Llama-3.1-8B-unsloth-bnb-4bit
✓ meta-llama/Llama-3.1-8B
✓ unsloth/Llama-3.1-8B-Instruct
✓ unsloth/Llama-3.1-8B-Instruct-bnb-4bit
✓ unsloth/Llama-3.1-8B-Instruct-unsloth-bnb-4bit
✓ meta-llama/Llama-3.1-8B-Instruct
✓ unsloth/Llama-3.2-1B
✓ unsloth/Llama-3.2-1B-bnb-4bit
✓ unsloth/Llama-3.2-1B-unsloth-bnb-4bit
✓ meta-llama/Llama-3.2-1B
...
```

### TODO
- Model Collections
    - [x] Gemma3
    - [ ] Llama3.1
    - [x] Llama3.2
    - [x] MistralSmall
    - [x] Qwen2.5
    - [x] Qwen2.5-VL
    - [ ] Qwen2.5 Coder
    - [x] QwenQwQ-32B
    - [x] Deepseek v3
    - [x] Deepseek R1
    - [x] Phi-4
    - [ ] Unsloth 4-bit Dynamic Quants
    - [ ] Vision/multimodal models
- Sync model uploads with registry
- Add utility methods for tracking model stats

================================================
FILE: unsloth/registry/__init__.py
================================================
from ._deepseek import register_deepseek_models as _register_deepseek_models
from ._gemma import register_gemma_models as _register_gemma_models
from ._llama import register_llama_models as _register_llama_models
from ._mistral import register_mistral_models as _register_mistral_models
from ._phi import register_phi_models as _register_phi_models
from ._qwen import register_qwen_models as _register_qwen_models
from .registry import MODEL_REGISTRY, ModelInfo, QuantType

_ARE_MODELS_REGISTERED = False


def register_models():
    global _ARE_MODELS_REGISTERED

    if _ARE_MODELS_REGISTERED:
        return
    _register_deepseek_models()
    _register_gemma_models()
    _register_llama_models()
    _register_mistral_models()
    _register_phi_models()
    _register_qwen_models()

    _ARE_MODELS_REGISTERED = True


def search_models(
    org: str = None,
    base_name: str = None,
    version: str = None,
    size: str = None,
    quant_types: list[QuantType] = None,
    search_pattern: str = None,
) -> list[ModelInfo]:
    """
    Get model info from the registry.

    See registry.ModelInfo for more fields.

    If search_pattern is provided, the full model path will be matched against the pattern, where the model path is the model_id on huggingface hub.

    """
    if not _ARE_MODELS_REGISTERED:
        register_models()

    model_infos = MODEL_REGISTRY.values()
    if org:
        model_infos = [
            model_info for model_info in model_infos if model_info.org == org
        ]
    if base_name:
        model_infos = [
            model_info
            for model_info in model_infos
            if model_info.base_name == base_name
        ]
    if version:
        model_infos = [
            model_info for model_info in model_infos if model_info.version == version
        ]
    if size:
        model_infos = [
            model_info for model_info in model_infos if model_info.size == size
        ]
    if quant_types:
        model_infos = [
            model_info
            for model_info in model_infos
            if any(model_info.quant_type == quant_type for quant_type in quant_types)
        ]
    if search_pattern:
        model_infos = [
            model_info
            for model_info in model_infos
            if search_pattern in model_info.model_path
        ]

    return model_infos


================================================
FILE: unsloth/registry/_deepseek.py
================================================
from unsloth.registry.registry import ModelInfo, ModelMeta, QuantType, _register_models

_IS_DEEPSEEK_V3_REGISTERED = False
_IS_DEEPSEEK_V3_0324_REGISTERED = False
_IS_DEEPSEEK_R1_REGISTERED = False
_IS_DEEPSEEK_R1_ZERO_REGISTERED = False
_IS_DEEPSEEK_R1_DISTILL_LLAMA_REGISTERED = False
_IS_DEEPSEEK_R1_DISTILL_QWEN_REGISTERED = False


class DeepseekV3ModelInfo(ModelInfo):
    @classmethod
    def construct_model_name(cls, base_name, version, size, quant_type, instruct_tag):
        key = f"{base_name}-V{version}"
        return super().construct_model_name(
            base_name, version, size, quant_type, instruct_tag, key
        )


class DeepseekR1ModelInfo(ModelInfo):
    @classmethod
    def construct_model_name(cls, base_name, version, size, quant_type, instruct_tag):
        key = f"{base_name}-{version}" if version else base_name
        if size:
            key = f"{key}-{size}B"
        return super().construct_model_name(
            base_name, version, size, quant_type, instruct_tag, key
        )


# Deepseek V3 Model Meta
DeepseekV3Meta = ModelMeta(
    org = "deepseek-ai",
    base_name = "DeepSeek",
    instruct_tags = [None],
    model_version = "3",
    model_sizes = [""],
    model_info_cls = DeepseekV3ModelInfo,
    is_multimodal = False,
    quant_types = [QuantType.NONE, QuantType.BF16],
)

DeepseekV3_0324Meta = ModelMeta(
    org = "deepseek-ai",
    base_name = "DeepSeek",
    instruct_tags = [None],
    model_version = "3-0324",
    model_sizes = [""],
    model_info_cls = DeepseekV3ModelInfo,
    is_multimodal = False,
    quant_types = [QuantType.NONE, QuantType.GGUF],
)

DeepseekR1Meta = ModelMeta(
    org = "deepseek-ai",
    base_name = "DeepSeek-R1",
    instruct_tags = [None],
    model_version = "",
    model_sizes = [""],
    model_info_cls = DeepseekR1ModelInfo,
    is_multimodal = False,
    quant_types = [QuantType.NONE, QuantType.BF16, QuantType.GGUF],
)

DeepseekR1ZeroMeta = ModelMeta(
    org = "deepseek-ai",
    base_name = "DeepSeek-R1",
    instruct_tags = [None],
    model_version = "Zero",
    model_sizes = [""],
    model_info_cls = DeepseekR1ModelInfo,
    is_multimodal = False,
    quant_types = [QuantType.NONE, QuantType.GGUF],
)

DeepseekR1DistillLlamaMeta = ModelMeta(
    org = "deepseek-ai",
    base_name = "DeepSeek-R1-Distill",
    instruct_tags = [None],
    model_version = "Llama",
    model_sizes = ["8", "70"],
    model_info_cls = DeepseekR1ModelInfo,
    is_multimodal = False,
    quant_types = {"8": [QuantType.UNSLOTH, QuantType.GGUF], "70": [QuantType.GGUF]},
)

# Deepseek R1 Distill Qwen Model Meta
DeepseekR1DistillQwenMeta = ModelMeta(
    org = "deepseek-ai",
    base_name = "DeepSeek-R1-Distill",
    instruct_tags = [None],
    model_version = "Qwen",
    model_sizes = ["1.5", "7", "14", "32"],
    model_info_cls = DeepseekR1ModelInfo,
    is_multimodal = False,
    quant_types = {
        "1.5": [QuantType.UNSLOTH, QuantType.BNB, QuantType.GGUF],
        "7": [QuantType.UNSLOTH, QuantType.BNB],
        "14": [QuantType.UNSLOTH, QuantType.BNB, QuantType.GGUF],
        "32": [QuantType.GGUF, QuantType.BNB],
    },
)


def register_deepseek_v3_models(include_original_model: bool = False):
    global _IS_DEEPSEEK_V3_REGISTERED
    if _IS_DEEPSEEK_V3_REGISTERED:
        return
    _register_models(DeepseekV3Meta, include_original_model = include_original_model)
    _IS_DEEPSEEK_V3_REGISTERED = True


def register_deepseek_v3_0324_models(include_original_model: bool = False):
    global _IS_DEEPSEEK_V3_0324_REGISTERED
    if _IS_DEEPSEEK_V3_0324_REGISTERED:
        return
    _register_models(DeepseekV3_0324Meta, include_original_model = include_original_model)
    _IS_DEEPSEEK_V3_0324_REGISTERED = True


def register_deepseek_r1_models(include_original_model: bool = False):
    global _IS_DEEPSEEK_R1_REGISTERED
    if _IS_DEEPSEEK_R1_REGISTERED:
        return
    _register_models(DeepseekR1Meta, include_original_model = include_original_model)
    _IS_DEEPSEEK_R1_REGISTERED = True


def register_deepseek_r1_zero_models(include_original_model: bool = False):
    global _IS_DEEPSEEK_R1_ZERO_REGISTERED
    if _IS_DEEPSEEK_R1_ZERO_REGISTERED:
        return
    _register_models(DeepseekR1ZeroMeta, include_original_model = include_original_model)
    _IS_DEEPSEEK_R1_ZERO_REGISTERED = True


def register_deepseek_r1_distill_llama_models(include_original_model: bool = False):
    global _IS_DEEPSEEK_R1_DISTILL_LLAMA_REGISTERED
    if _IS_DEEPSEEK_R1_DISTILL_LLAMA_REGISTERED:
        return
    _register_models(
        DeepseekR1DistillLlamaMeta, include_original_model = include_original_model
    )
    _IS_DEEPSEEK_R1_DISTILL_LLAMA_REGISTERED = True


def register_deepseek_r1_distill_qwen_models(include_original_model: bool = False):
    global _IS_DEEPSEEK_R1_DISTILL_QWEN_REGISTERED
    if _IS_DEEPSEEK_R1_DISTILL_QWEN_REGISTERED:
        return
    _register_models(
        DeepseekR1DistillQwenMeta, include_original_model = include_original_model
    )
    _IS_DEEPSEEK_R1_DISTILL_QWEN_REGISTERED = True


def register_deepseek_models(include_original_model: bool = False):
    register_deepseek_v3_models(include_original_model = include_original_model)
    register_deepseek_v3_0324_models(include_original_model = include_original_model)
    register_deepseek_r1_models(include_original_model = include_original_model)
    register_deepseek_r1_zero_models(include_original_model = include_original_model)
    register_deepseek_r1_distill_llama_models(
        include_original_model = include_original_model
    )
    register_deepseek_r1_distill_qwen_models(
        include_original_model = include_original_model
    )


def _list_deepseek_r1_distill_models():
    from unsloth.utils.hf_hub import ModelInfo as HfModelInfo
    from unsloth.utils.hf_hub import list_models

    models: list[HfModelInfo] = list_models(
        author = "unsloth", search = "Distill", limit = 1000
    )
    distill_models = []
    for model in models:
        model_id = model.id
        model_name = model_id.split("/")[-1]
        # parse out only the version
        version = model_name.removeprefix("DeepSeek-R1-Distill-")
        distill_models.append(version)

    return distill_models


register_deepseek_models(include_original_model = True)

if __name__ == "__main__":
    from unsloth.registry.registry import MODEL_REGISTRY, _check_model_info

    MODEL_REGISTRY.clear()

    register_deepseek_models(include_original_model = True)

    for model_id, model_info in MODEL_REGISTRY.items():
        model_info = _check_model_info(model_id)
        if model_info is None:
            print(f"\u2718 {model_id}")
        else:
            print(f"\u2713 {model_id}")
    # distill_models = _list_deepseek_r1_distill_models()
    # for model in sorted(distill_models):
    #     if "qwen" in model.lower():
    #         print(model)


================================================
FILE: unsloth/registry/_gemma.py
================================================
from unsloth.registry.registry import ModelInfo, ModelMeta, QuantType, _register_models

_IS_GEMMA_3_BASE_REGISTERED = False
_IS_GEMMA_3_INSTRUCT_REGISTERED = False


class GemmaModelInfo(ModelInfo):
    @classmethod
    def construct_model_name(cls, base_name, version, size, quant_type, instruct_tag):
        key = f"{base_name}-{version}-{size}B"
        return super().construct_model_name(
            base_name, version, size, quant_type, instruct_tag, key
        )


# Gemma3 Base Model Meta
GemmaMeta3Base = ModelMeta(
    org = "google",
    base_name = "gemma",
    instruct_tags = ["pt"],  # pt = base
    model_version = "3",
    model_sizes = ["1", "4", "12", "27"],
    model_info_cls = GemmaModelInfo,
    is_multimodal = True,
    quant_types = [QuantType.NONE, QuantType.BNB, QuantType.UNSLOTH],
)

# Gemma3 Instruct Model Meta
GemmaMeta3Instruct = ModelMeta(
    org = "google",
    base_name = "gemma",
    instruct_tags = ["it"],  # it = instruction tuned
    model_version = "3",
    model_sizes = ["1", "4", "12", "27"],
    model_info_cls = GemmaModelInfo,
    is_multimodal = True,
    quant_types = [QuantType.NONE, QuantType.BNB, QuantType.UNSLOTH, QuantType.GGUF],
)


def register_gemma_3_base_models(include_original_model: bool = False):
    global _IS_GEMMA_3_BASE_REGISTERED
    if _IS_GEMMA_3_BASE_REGISTERED:
        return
    _register_models(GemmaMeta3Base, include_original_model = include_original_model)
    _IS_GEMMA_3_BASE_REGISTERED = True


def register_gemma_3_instruct_models(include_original_model: bool = False):
    global _IS_GEMMA_3_INSTRUCT_REGISTERED
    if _IS_GEMMA_3_INSTRUCT_REGISTERED:
        return
    _register_models(GemmaMeta3Instruct, include_original_model = include_original_model)
    _IS_GEMMA_3_INSTRUCT_REGISTERED = True


def register_gemma_models(include_original_model: bool = False):
    register_gemma_3_base_models(include_original_model = include_original_model)
    register_gemma_3_instruct_models(include_original_model = include_original_model)


if __name__ == "__main__":
    from unsloth.registry.registry import MODEL_REGISTRY, _check_model_info

    MODEL_REGISTRY.clear()

    register_gemma_models(include_original_model = True)

    for model_id, model_info in MODEL_REGISTRY.items():
        model_info = _check_model_info(model_id)
        if model_info is None:
            print(f"\u2718 {model_id}")
        else:
            print(f"\u2713 {model_id}")


================================================
FILE: unsloth/registry/_llama.py
================================================
from unsloth.registry.registry import ModelInfo, ModelMeta, QuantType, _register_models

_IS_LLAMA_3_1_REGISTERED = False
_IS_LLAMA_3_2_REGISTERED = False
_IS_LLAMA_3_2_VISION_REGISTERED = False


class LlamaModelInfo(ModelInfo):
    @classmethod
    def construct_model_name(cls, base_name, version, size, quant_type, instruct_tag):
        key = f"{base_name}-{version}-{size}B"
        return super().construct_model_name(
            base_name, version, size, quant_type, instruct_tag, key
        )


class LlamaVisionModelInfo(ModelInfo):
    @classmethod
    def construct_model_name(cls, base_name, version, size, quant_type, instruct_tag):
        key = f"{base_name}-{version}-{size}B-Vision"
        return super().construct_model_name(
            base_name, version, size, quant_type, instruct_tag, key
        )


# Llama 3.1
LlamaMeta_3_1 = ModelMeta(
    org = "meta-llama",
    base_name = "Llama",
    instruct_tags = [None, "Instruct"],
    model_version = "3.1",
    model_sizes = ["8"],
    model_info_cls = LlamaModelInfo,
    is_multimodal = False,
    quant_types = [QuantType.NONE, QuantType.BNB, QuantType.UNSLOTH],
)

# Llama 3.2 Base Models
LlamaMeta_3_2_Base = ModelMeta(
    org = "meta-llama",
    base_name = "Llama",
    instruct_tags = [None],
    model_version = "3.2",
    model_sizes = ["1", "3"],
    model_info_cls = LlamaModelInfo,
    is_multimodal = False,
    quant_types = [QuantType.NONE, QuantType.BNB, QuantType.UNSLOTH],
)

# Llama 3.2 Instruction Tuned Models
LlamaMeta_3_2_Instruct = ModelMeta(
    org = "meta-llama",
    base_name = "Llama",
    instruct_tags = ["Instruct"],
    model_version = "3.2",
    model_sizes = ["1", "3"],
    model_info_cls = LlamaModelInfo,
    is_multimodal = False,
    quant_types = [QuantType.NONE, QuantType.BNB, QuantType.UNSLOTH, QuantType.GGUF],
)

# Llama 3.2 Vision
LlamaMeta_3_2_Vision = ModelMeta(
    org = "meta-llama",
    base_name = "Llama",
    instruct_tags = [None, "Instruct"],
    model_version = "3.2",
    model_sizes = ["11", "90"],
    model_info_cls = LlamaVisionModelInfo,
    is_multimodal = True,
    quant_types = {
        "11": [QuantType.NONE, QuantType.BNB, QuantType.UNSLOTH],
        "90": [QuantType.NONE],
    },
)


def register_llama_3_1_models(include_original_model: bool = False):
    global _IS_LLAMA_3_1_REGISTERED
    if _IS_LLAMA_3_1_REGISTERED:
        return
    _register_models(LlamaMeta_3_1, include_original_model = include_original_model)
    _IS_LLAMA_3_1_REGISTERED = True


def register_llama_3_2_models(include_original_model: bool = False):
    global _IS_LLAMA_3_2_REGISTERED
    if _IS_LLAMA_3_2_REGISTERED:
        return
    _register_models(LlamaMeta_3_2_Base, include_original_model = include_original_model)
    _register_models(
        LlamaMeta_3_2_Instruct, include_original_model = include_original_model
    )
    _IS_LLAMA_3_2_REGISTERED = True


def register_llama_3_2_vision_models(include_original_model: bool = False):
    global _IS_LLAMA_3_2_VISION_REGISTERED
    if _IS_LLAMA_3_2_VISION_REGISTERED:
        return
    _register_models(
        LlamaMeta_3_2_Vision, include_original_model = include_original_model
    )
    _IS_LLAMA_3_2_VISION_REGISTERED = True


def register_llama_models(include_original_model: bool = False):
    register_llama_3_1_models(include_original_model = include_original_model)
    register_llama_3_2_models(include_original_model = include_original_model)
    register_llama_3_2_vision_models(include_original_model = include_original_model)


if __name__ == "__main__":
    from unsloth.registry.registry import MODEL_REGISTRY, _check_model_info

    MODEL_REGISTRY.clear()

    register_llama_models(include_original_model = True)

    for model_id, model_info in MODEL_REGISTRY.items():
        model_info = _check_model_info(model_id)
        if model_info is None:
            print(f"\u2718 {model_id}")
        else:
            print(f"\u2713 {model_id}")


================================================
FILE: unsloth/registry/_mistral.py
================================================
import copy

from unsloth.registry.registry import ModelInfo, ModelMeta, QuantType, _register_models

_IS_MISTRAL_SMALL_REGISTERED = False

_MISTRAL_SMALL_03_25_VERSION = "2503"
_MISTRAL_SMALL_01_25_VERSION = "2501"
_MISTRAL_SMALL_09_24_VERSION = "2409"  # Not uploaded to unsloth


class MistralSmallModelInfo(ModelInfo):
    @classmethod
    def construct_model_name(cls, base_name, version, size, quant_type, instruct_tag):
        if version == _MISTRAL_SMALL_03_25_VERSION:
            key = f"{base_name}-3.1-{size}B-{instruct_tag}"
        else:
            key = f"{base_name}-{size}B-{instruct_tag}"
        key += f"-{version}"
        key = cls.append_quant_type(key, quant_type)

        return key


MistralSmall_2503_Base_Meta = ModelMeta(
    org = "mistralai",
    base_name = "Mistral-Small",
    instruct_tags = ["Base"],
    model_version = _MISTRAL_SMALL_03_25_VERSION,
    model_sizes = ["24"],
    model_info_cls = MistralSmallModelInfo,
    is_multimodal = False,
    quant_types = [QuantType.NONE, QuantType.UNSLOTH, QuantType.BNB],
)

MistralSmall_2503_Instruct_Meta = copy.deepcopy(MistralSmall_2503_Base_Meta)
MistralSmall_2503_Instruct_Meta.instruct_tags = ["Instruct"]
MistralSmall_2503_Instruct_Meta.quant_types = [
    QuantType.NONE,
    QuantType.UNSLOTH,
    QuantType.BNB,
    QuantType.GGUF,
]

MistralSmall_2501_Base_Meta = copy.deepcopy(MistralSmall_2503_Base_Meta)
MistralSmall_2501_Base_Meta.model_version = _MISTRAL_SMALL_01_25_VERSION

MistralSmall_2501_Instruct_Meta = copy.deepcopy(MistralSmall_2503_Instruct_Meta)
MistralSmall_2501_Instruct_Meta.model_version = _MISTRAL_SMALL_01_25_VERSION


def register_mistral_small_models(include_original_model: bool = False):
    global _IS_MISTRAL_SMALL_REGISTERED
    if _IS_MISTRAL_SMALL_REGISTERED:
        return
    _register_models(
        MistralSmall_2503_Base_Meta, include_original_model = include_original_model
    )
    _register_models(
        MistralSmall_2503_Instruct_Meta, include_original_model = include_original_model
    )
    _register_models(
        MistralSmall_2501_Base_Meta, include_original_model = include_original_model
    )
    _register_models(
        MistralSmall_2501_Instruct_Meta, include_original_model = include_original_model
    )

    _IS_MISTRAL_SMALL_REGISTERED = True


def register_mistral_models(include_original_model: bool = False):
    register_mistral_small_models(include_original_model = include_original_model)


if __name__ == "__main__":
    from unsloth.registry.registry import MODEL_REGISTRY, _check_model_info

    MODEL_REGISTRY.clear()

    register_mistral_models(include_original_model = True)

    for model_id, model_info in MODEL_REGISTRY.items():
        model_info = _check_model_info(model_id)
        if model_info is None:
            print(f"\u2718 {model_id}")
        else:
            print(f"\u2713 {model_id}")


================================================
FILE: unsloth/registry/_phi.py
================================================
from unsloth.registry.registry import ModelInfo, ModelMeta, QuantType, _register_models

_IS_PHI_4_REGISTERED = False
_IS_PHI_4_INSTRUCT_REGISTERED = False


class PhiModelInfo(ModelInfo):
    @classmethod
    def construct_model_name(cls, base_name, version, size, quant_type, instruct_tag):
        key = f"{base_name}-{version}"
        return super().construct_model_name(
            base_name, version, size, quant_type, instruct_tag, key
        )


# Phi Model Meta
PhiMeta4 = ModelMeta(
    org = "microsoft",
    base_name = "phi",
    instruct_tags = [None],
    model_version = "4",
    model_sizes = ["1"],  # Assuming only one size
    model_info_cls = PhiModelInfo,
    is_multimodal = False,
    quant_types = [QuantType.NONE, QuantType.BNB, QuantType.UNSLOTH],
)

# Phi Instruct Model Meta
PhiInstructMeta4 = ModelMeta(
    org = "microsoft",
    base_name = "phi",
    instruct_tags = ["mini-instruct"],
    model_version = "4",
    model_sizes = ["1"],  # Assuming only one size
    model_info_cls = PhiModelInfo,
    is_multimodal = False,
    quant_types = [QuantType.NONE, QuantType.BNB, QuantType.UNSLOTH, QuantType.GGUF],
)


def register_phi_4_models(include_original_model: bool = False):
    global _IS_PHI_4_REGISTERED
    if _IS_PHI_4_REGISTERED:
        return
    _register_models(PhiMeta4, include_original_model = include_original_model)
    _IS_PHI_4_REGISTERED = True


def register_phi_4_instruct_models(include_original_model: bool = False):
    global _IS_PHI_4_INSTRUCT_REGISTERED
    if _IS_PHI_4_INSTRUCT_REGISTERED:
        return
    _register_models(PhiInstructMeta4, include_original_model = include_original_model)
    _IS_PHI_4_INSTRUCT_REGISTERED = True


def register_phi_models(include_original_model: bool = False):
    register_phi_4_models(include_original_model = include_original_model)
    register_phi_4_instruct_models(include_original_model = include_original_model)


if __name__ == "__main__":
    from unsloth.registry.registry import MODEL_REGISTRY, _check_model_info

    MODEL_REGISTRY.clear()

    register_phi_models(include_original_model = True)

    for model_id, model_info in MODEL_REGISTRY.items():
        model_info = _check_model_info(model_id)
        if model_info is None:
            print(f"\u2718 {model_id}")
        else:
            print(f"\u2713 {model_id}")


================================================
FILE: unsloth/registry/_qwen.py
================================================
from unsloth.registry.registry import ModelInfo, ModelMeta, QuantType, _register_models

_IS_QWEN_2_5_REGISTERED = False
_IS_QWEN_2_5_VL_REGISTERED = False
_IS_QWEN_QWQ_REGISTERED = False


class QwenModelInfo(ModelInfo):
    @classmethod
    def construct_model_name(cls, base_name, version, size, quant_type, instruct_tag):
        key = f"{base_name}{version}-{size}B"
        return super().construct_model_name(
            base_name, version, size, quant_type, instruct_tag, key
        )


class QwenVLModelInfo(ModelInfo):
    @classmethod
    def construct_model_name(cls, base_name, version, size, quant_type, instruct_tag):
        key = f"{base_name}{version}-VL-{size}B"
        return super().construct_model_name(
            base_name, version, size, quant_type, instruct_tag, key
        )


class QwenQwQModelInfo(ModelInfo):
    @classmethod
    def construct_model_name(cls, base_name, version, size, quant_type, instruct_tag):
        key = f"{base_name}-{size}B"
        return super().construct_model_name(
            base_name, version, size, quant_type, instruct_tag, key
        )


class QwenQVQPreviewModelInfo(ModelInfo):
    @classmethod
    def construct_model_name(cls, base_name, version, size, quant_type, instruct_tag):
        key = f"{base_name}-{size}B-Preview"
        return super().construct_model_name(
            base_name, version, size, quant_type, instruct_tag, key
        )


# Qwen2.5 Model Meta
Qwen_2_5_Meta = ModelMeta(
    org = "Qwen",
    base_name = "Qwen",
    instruct_tags = [None, "Instruct"],
    model_version = "2.5",
    model_sizes = ["3", "7"],
    model_info_cls = QwenModelInfo,
    is_multimodal = False,
    quant_types = [QuantType.NONE, QuantType.BNB, QuantType.UNSLOTH],
)

# Qwen2.5 VL Model Meta
Qwen_2_5_VLMeta = ModelMeta(
    org = "Qwen",
    base_name = "Qwen",
    instruct_tags = ["Instruct"],  # No base, only instruction tuned
    model_version = "2.5",
    model_sizes = ["3", "7", "32", "72"],
    model_info_cls = QwenVLModelInfo,
    is_multimodal = True,
    quant_types = [QuantType.NONE, QuantType.BNB, QuantType.UNSLOTH],
)

# Qwen QwQ Model Meta
QwenQwQMeta = ModelMeta(
    org = "Qwen",
    base_name = "QwQ",
    instruct_tags = [None],
    model_version = "",
    model_sizes = ["32"],
    model_info_cls = QwenQwQModelInfo,
    is_multimodal = False,
    quant_types = [QuantType.NONE, QuantType.BNB, QuantType.UNSLOTH, QuantType.GGUF],
)

# Qwen QVQ Preview Model Meta
QwenQVQPreviewMeta = ModelMeta(
    org = "Qwen",
    base_name = "QVQ",
    instruct_tags = [None],
    model_version = "",
    model_sizes = ["72"],
    model_info_cls = QwenQVQPreviewModelInfo,
    is_multimodal = True,
    quant_types = [QuantType.NONE, QuantType.BNB],
)


def register_qwen_2_5_models(include_original_model: bool = False):
    global _IS_QWEN_2_5_REGISTERED
    if _IS_QWEN_2_5_REGISTERED:
        return
    _register_models(Qwen_2_5_Meta, include_original_model = include_original_model)
    _IS_QWEN_2_5_REGISTERED = True


def register_qwen_2_5_vl_models(include_original_model: bool = False):
    global _IS_QWEN_2_5_VL_REGISTERED
    if _IS_QWEN_2_5_VL_REGISTERED:
        return
    _register_models(Qwen_2_5_VLMeta, include_original_model = include_original_model)
    _IS_QWEN_2_5_VL_REGISTERED = True


def register_qwen_qwq_models(include_original_model: bool = False):
    global _IS_QWEN_QWQ_REGISTERED
    if _IS_QWEN_QWQ_REGISTERED:
        return
    _register_models(QwenQwQMeta, include_original_model = include_original_model)
    _register_models(QwenQVQPreviewMeta, include_original_model = include_original_model)
    _IS_QWEN_QWQ_REGISTERED = True


def register_qwen_models(include_original_model: bool = False):
    register_qwen_2_5_models(include_original_model = include_original_model)
    register_qwen_2_5_vl_models(include_original_model = include_original_model)
    register_qwen_qwq_models(include_original_model = include_original_model)


if __name__ == "__main__":
    from unsloth.registry.registry import MODEL_REGISTRY, _check_model_info

    MODEL_REGISTRY.clear()

    register_qwen_models(include_original_model = True)

    for model_id, model_info in MODEL_REGISTRY.items():
        model_info = _check_model_info(model_id)
        if model_info is None:
            print(f"\u2718 {model_id}")
        else:
            print(f"\u2713 {model_id}")


================================================
FILE: unsloth/registry/registry.py
================================================
import warnings
from dataclasses import dataclass, field
from enum import Enum


class QuantType(Enum):
    BNB = "bnb"
    UNSLOTH = "unsloth"  # dynamic 4-bit quantization
    GGUF = "GGUF"
    NONE = "none"
    BF16 = "bf16"  # only for Deepseek V3


# Tags for Hugging Face model paths
BNB_QUANTIZED_TAG = "bnb-4bit"
UNSLOTH_DYNAMIC_QUANT_TAG = "unsloth" + "-" + BNB_QUANTIZED_TAG
GGUF_TAG = "GGUF"
BF16_TAG = "bf16"

QUANT_TAG_MAP = {
    QuantType.BNB: BNB_QUANTIZED_TAG,
    QuantType.UNSLOTH: UNSLOTH_DYNAMIC_QUANT_TAG,
    QuantType.GGUF: GGUF_TAG,
    QuantType.NONE: None,
    QuantType.BF16: BF16_TAG,
}


# NOTE: models registered with org="unsloth" and QUANT_TYPE.NONE are aliases of QUANT_TYPE.UNSLOTH
@dataclass
class ModelInfo:
    org: str
    base_name: str
    version: str
    size: int
    name: str = None  # full model name, constructed from base_name, version, and size unless provided
    is_multimodal: bool = False
    instruct_tag: str = None
    quant_type: QuantType = None
    description: str = None

    def __post_init__(self):
        self.name = self.name or self.construct_model_name(
            self.base_name,
            self.version,
            self.size,
            self.quant_type,
            self.instruct_tag,
        )

    @staticmethod
    def append_instruct_tag(key: str, instruct_tag: str = None):
        if instruct_tag:
            key = "-".join([key, instruct_tag])
        return key

    @staticmethod
    def append_quant_type(key: str, quant_type: QuantType = None):
        if quant_type != QuantType.NONE:
            key = "-".join([key, QUANT_TAG_MAP[quant_type]])
        return key

    @classmethod
    def construct_model_name(
        cls, base_name, version, size, quant_type, instruct_tag, key = ""
    ):
        key = cls.append_instruct_tag(key, instruct_tag)
        key = cls.append_quant_type(key, quant_type)
        return key

    @property
    def model_path(
        self,
    ) -> str:
        return f"{self.org}/{self.name}"


@dataclass
class ModelMeta:
    org: str
    base_name: str
    model_version: str
    model_info_cls: type[ModelInfo]
    model_sizes: list[str] = field(default_factory = list)
    instruct_tags: list[str] = field(default_factory = list)
    quant_types: list[QuantType] | dict[str, list[QuantType]] = field(
        default_factory = list
    )
    is_multimodal: bool = False


MODEL_REGISTRY: dict[str, ModelInfo] = {}


def register_model(
    model_info_cls: ModelInfo,
    org: str,
    base_name: str,
    version: str,
    size: int,
    instruct_tag: str = None,
    quant_type: QuantType = None,
    is_multimodal: bool = False,
    name: str = None,
):
    name = name or model_info_cls.construct_model_name(
        base_name = base_name,
        version = version,
        size = size,
        quant_type = quant_type,
        instruct_tag = instruct_tag,
    )
    key = f"{org}/{name}"

    if key in MODEL_REGISTRY:
        raise ValueError(
            f"Model {key} already registered, current keys: {MODEL_REGISTRY.keys()}"
        )

    MODEL_REGISTRY[key] = model_info_cls(
        org = org,
        base_name = base_name,
        version = version,
        size = size,
        is_multimodal = is_multimodal,
        instruct_tag = instruct_tag,
        quant_type = quant_type,
        name = name,
    )


def _check_model_info(model_id: str, properties: list[str] = ["lastModified"]):
    from huggingface_hub import HfApi
    from huggingface_hub import ModelInfo as HfModelInfo
    from huggingface_hub.utils import RepositoryNotFoundError

    api = HfApi()

    try:
        model_info: HfModelInfo = api.model_info(model_id, expand = properties)
    except Exception as e:
        if isinstance(e, RepositoryNotFoundError):
            warnings.warn(f"{model_id} not found on Hugging Face")
            model_info = None
        else:
            raise e
    return model_info


def _register_models(model_meta: ModelMeta, include_original_model: bool = False):
    org = model_meta.org
    base_name = model_meta.base_name
    instruct_tags = model_meta.instruct_tags
    model_version = model_meta.model_version
    model_sizes = model_meta.model_sizes
    is_multimodal = model_meta.is_multimodal
    quant_types = model_meta.quant_types
    model_info_cls = model_meta.model_info_cls

    for size in model_sizes:
        for instruct_tag in instruct_tags:
            # Handle quant types per model size
            if isinstance(quant_types, dict):
                _quant_types = quant_types[size]
            else:
                _quant_types = quant_types
            for quant_type in _quant_types:
                # NOTE: models registered with org="unsloth" and QUANT_TYPE.NONE are aliases of QUANT_TYPE.UNSLOTH
                _org = "unsloth"  # unsloth models -- these are all quantized versions of the original model
                register_model(
                    model_info_cls = model_info_cls,
                    org = _org,
                    base_name = base_name,
                    version = model_version,
                    size = size,
                    instruct_tag = instruct_tag,
                    quant_type = quant_type,
                    is_multimodal = is_multimodal,
                )
            # include original model from releasing organization
            if include_original_model:
                register_model(
                    model_info_cls = model_info_cls,
                    org = org,
                    base_name = base_name,
                    version = model_version,
                    size = size,
                    instruct_tag = instruct_tag,
                    quant_type = QuantType.NONE,
                    is_multimodal = is_multimodal,
                )


================================================
FILE: unsloth/save.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from unsloth_zoo.utils import Version
from importlib.metadata import version as importlib_version
from unsloth_zoo.hf_utils import dtype_from_config, HAS_TORCH_DTYPE
from unsloth_zoo.llama_cpp import (
    convert_to_gguf,
    quantize_gguf,
    use_local_gguf,
    install_llama_cpp,
    check_llama_cpp,
    _download_convert_hf_to_gguf,
)

# H4: Defensive imports -- these were added in unsloth-zoo PR #526
# and may not exist on older versions
try:
    from unsloth_zoo.llama_cpp import LLAMA_CPP_DEFAULT_DIR, IS_WINDOWS
except ImportError:
    import sys

    IS_WINDOWS = sys.platform == "win32"
    LLAMA_CPP_DEFAULT_DIR = "llama.cpp"
from bitsandbytes.nn import Linear4bit as Bnb_Linear4bit
from peft.tuners.lora import Linear4bit as Peft_Linear4bit
from peft.tuners.lora import Linear as Peft_Linear
from typing import Optional, Callable, Union, List
import sys
import requests
import torch
import os
import shutil
import pickle
import gc
from transformers.models.llama.modeling_llama import logger
from .kernels import fast_dequantize, QUANT_STATE, get_lora_parameters_bias
import subprocess
import psutil
import re
from transformers.models.llama.modeling_llama import logger
from .tokenizer_utils import fix_sentencepiece_gguf
from .models.loader_utils import get_model_name
from .models._utils import _convert_torchao_model
from .ollama_template_mappers import OLLAMA_TEMPLATES, MODEL_TO_OLLAMA_TEMPLATE_MAPPER
from transformers import ProcessorMixin
from huggingface_hub import HfApi

try:
    from huggingface_hub import get_token
except:
    try:
        from huggingface_hub.utils import get_token
    except:
        # For older versions of huggingface_hub
        from huggingface_hub.utils._token import get_token
from pathlib import Path
from peft import PeftModelForCausalLM, PeftModel

__all__ = [
    "print_quantization_methods",
    "unsloth_save_model",
    "save_to_gguf",
    "patch_saving_functions",
    "create_huggingface_repo",
]

# llama.cpp specific targets - all takes 90s. Below takes 60s
LLAMA_CPP_TARGETS = [
    "llama-quantize",
    "llama-cli",
    "llama-server",
]

# Check environments
keynames = "\n" + "\n".join(os.environ.keys())
IS_COLAB_ENVIRONMENT = "\nCOLAB_" in keynames
IS_KAGGLE_ENVIRONMENT = "\nKAGGLE_" in keynames
KAGGLE_TMP = "/tmp"
del keynames

# Weights
LLAMA_WEIGHTS = (
    "self_attn.q_proj",
    "self_attn.k_proj",
    "self_attn.v_proj",
    "self_attn.o_proj",
    "mlp.gate_proj",
    "mlp.up_proj",
    "mlp.down_proj",
)
LLAMA_LAYERNORMS = (
    "input_layernorm",
    "post_attention_layernorm",
    "pre_feedforward_layernorm",
    "post_feedforward_layernorm",
    "self_attn.q_norm",
    "self_attn.k_norm",
)

# https://github.com/ggerganov/llama.cpp/blob/master/examples/quantize/quantize.cpp#L19
# From https://mlabonne.github.io/blog/posts/Quantize_Llama_2_models_using_ggml.html
ALLOWED_QUANTS = {
    "not_quantized": "Recommended. Fast conversion. Slow inference, big files.",
    "fast_quantized": "Recommended. Fast conversion. OK inference, OK file size.",
    "quantized": "Recommended. Slow conversion. Fast inference, small files.",
    "f32": "Not recommended. Retains 100% accuracy, but super slow and memory hungry.",
    "bf16": "Bfloat16 - Fastest conversion + retains 100% accuracy. Slow and memory hungry.",
    "f16": "Float16  - Fastest conversion + retains 100% accuracy. Slow and memory hungry.",
    "q8_0": "Fast conversion. High resource use, but generally acceptable.",
    "q4_k_m": "Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_K",
    "q5_k_m": "Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_K",
    "q2_k": "Uses Q4_K for the attention.vw and feed_forward.w2 tensors, Q2_K for the other tensors.",
    "q3_k_l": "Uses Q5_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K",
    "q3_k_m": "Uses Q4_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K",
    "q3_k_s": "Uses Q3_K for all tensors",
    "q4_0": "Original quant method, 4-bit.",
    "q4_1": "Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.",
    "q4_k_s": "Uses Q4_K for all tensors",
    "q4_k": "alias for q4_k_m",
    "q5_k": "alias for q5_k_m",
    "q5_0": "Higher accuracy, higher resource usage and slower inference.",
    "q5_1": "Even higher accuracy, resource usage and slower inference.",
    "q5_k_s": "Uses Q5_K for all tensors",
    "q6_k": "Uses Q8_K for all tensors",
    # "iq2_xxs" : "2.06 bpw quantization", # Not supported sadly
    # "iq2_xs"  : "2.31 bpw quantization",
    # "iq3_xxs" : "3.06 bpw quantization",
    "q3_k_xs": "3-bit extra small quantization",
}


def has_curl():
    return shutil.which("curl") is not None


CURL_FLAG = "-DLLAMA_CURL=ON" if has_curl() else "-DLLAMA_CURL=OFF"


def print_quantization_methods():
    for key, value in ALLOWED_QUANTS.items():
        print(f'"{key}"  ==> {value}')


def check_if_sentencepiece_model(
    model, temporary_location = "_unsloth_sentencepiece_temp"
):
    if not hasattr(model, "_saved_temp_tokenizer"):
        return False

    temp_tokenizer = model._saved_temp_tokenizer
    sentencepiece_model = False
    file_location = os.path.join(temporary_location, temp_tokenizer.name_or_path)
    created_folder = False
    if not os.path.exists(file_location):
        created_folder = True
        os.makedirs(file_location)
    temp_tokenizer.save_pretrained(file_location)
    if os.path.isfile(f"{file_location}/tokenizer.model"):
        sentencepiece_model = True
    if created_folder:
        shutil.rmtree(file_location, ignore_errors = True)
    return sentencepiece_model


def _free_cached_model(model):
    from huggingface_hub import scan_cache_dir

    cached_repos = list(scan_cache_dir().repos)

    # Go through every cached repo, and delete the one that matches the model we want to save.
    # Can save 4GB of disk space - useful for Kaggle systems.
    for cached_repo in cached_repos:
        if cached_repo.repo_id == model.config._name_or_path:
            remove_cache_commit = list(cached_repo.revisions)[0].commit_hash
            delete_strategy = scan_cache_dir().delete_revisions(
                remove_cache_commit,
            )

            logger.warning_once(
                "Unsloth: Will remove a cached repo with size "
                + delete_strategy.expected_freed_size_str,
            )

            delete_strategy.execute()


def _merge_lora(layer, name):
    bias = getattr(layer, "bias", None)
    if isinstance(layer, (Bnb_Linear4bit, Peft_Linear4bit, Peft_Linear)):
        # Is LoRA so we need to merge!
        W, quant_state, A, B, s, bias = get_lora_parameters_bias(layer)
        if quant_state is not None:
            dtype = (
                quant_state.dtype if type(quant_state) is not list else quant_state[2]
            )
            W = fast_dequantize(W, quant_state)
        else:
            dtype = W.dtype
        W = W.to(torch.float32).t()
        # W = W.t()

        if A is not None:
            # sAB = (A.t().to(torch.float32) @ (s * B.t().to(torch.float32)))
            # W += sAB
            W.addmm_(A.t().to(torch.float32), B.t().to(torch.float32), alpha = s)
            # W.addmm_(A.t().to(W.dtype), B.t().to(W.dtype), alpha = s)
            # if not torch.isfinite(W).all():
            maximum_element = torch.max(W.min().abs(), W.max())
            if not torch.isfinite(maximum_element).item():
                raise ValueError(
                    f"Unsloth: Merge failed.\n{name} has some elements = infinity."
                )
        W = W.t().to(dtype)
    else:
        W = layer.weight
    return W, bias


def fast_save_pickle(shard, name):
    # Use this if # CPUs is <= 2
    print(f"Unsloth: Saving {name}...")
    torch.save(
        shard,
        name,
        # HIGHEST_PROTOCOL seems to not work with Pytorch!
        # pickle_module   = pickle,
        # pickle_protocol = pickle.HIGHEST_PROTOCOL,
    )
    return


@torch.inference_mode
def unsloth_save_model(
    model,
    tokenizer,
    save_directory: Union[str, os.PathLike],
    save_method: str = "lora",  # ["lora", "merged_16bit", "merged_4bit"]
    push_to_hub: bool = False,
    token: Optional[Union[str, bool]] = None,
    is_main_process: bool = True,
    state_dict: Optional[dict] = None,
    save_function: Callable = torch.save,
    max_shard_size: Union[int, str] = "5GB",
    safe_serialization: bool = True,
    variant: Optional[str] = None,
    save_peft_format: bool = True,
    # Push to hub
    use_temp_dir: Optional[bool] = None,
    commit_message: Optional[str] = "Trained with Unsloth",
    private: Optional[bool] = None,
    create_pr: bool = False,
    revision: str = None,
    commit_description: str = "Upload model trained with Unsloth 2x faster",
    tags: List[str] = None,
    # Our functions
    temporary_location: str = "_unsloth_temporary_saved_buffers",
    maximum_memory_usage: float = 0.9,
    datasets: Optional[List[str]] = None,
):
    if token is None:
        token = get_token()

    if commit_message is None:
        commit_message = ""
    if "Unsloth" not in commit_message:
        commit_message += " (Trained with Unsloth)"
    commit_message = commit_message.lstrip()

    if commit_description is None:
        commit_description = "Upload model trained with Unsloth 2x faster"
    elif "Unsloth 2x faster" not in commit_description:
        commit_description += " (Trained with Unsloth 2x faster)"

    if save_method == "merged_4bit":
        raise RuntimeError(
            "Unsloth: Merging into 4bit will cause your model to lose accuracy if you plan\n"
            "to merge to GGUF or others later on. I suggest you to do this as a final step\n"
            "if you're planning to do multiple saves.\n"
            "If you are certain, change `save_method` to `merged_4bit_forced`."
        )
    elif save_method == "merged_4bit_forced":
        save_method = "merged_4bit"

    save_pretrained_settings = dict(locals())
    for deletion in (
        "model",
        "tokenizer",
        "save_method",
        "temporary_location",
        "maximum_memory_usage",
        "datasets",
    ):
        del save_pretrained_settings[deletion]

    # First check for a token!
    if push_to_hub:
        from huggingface_hub import whoami

        try:
            username = whoami(token = token)["name"]
        except:
            raise RuntimeError(
                "Unsloth: Please supply a token!\n"
                "Go to https://huggingface.co/settings/tokens"
            )

    assert maximum_memory_usage > 0 and maximum_memory_usage <= 0.95

    # Clean memory up first
    for _ in range(3):
        torch.cuda.empty_cache()
        gc.collect()

    save_method = save_method.lower().replace(" ", "_")
    if (
        save_method != "lora"
        and save_method != "merged_16bit"
        and save_method != "merged_4bit"
    ):
        raise RuntimeError(
            "Unsloth: You must select one of 3 options when saving models:\n"
            '"lora"         ==> This is the fastest and easiet. Just saves LoRA modules.\n'
            '"merged_16bit" ==> This merges LoRA weights and saves to float16. Needed for llama.cpp / GGUF.\n'
            '"merged_4bit"  ==> This merges LoRA weights and saves to 4bit. Useful for DPO / inference.'
        )

    if save_method == "merged_4bit":
        print("Unsloth: Merging 4bit and LoRA weights to 4bit...")
        print("This might take 5 minutes...")

        # Counteract no LoRA adapters!
        if hasattr(model, "merge_and_unload"):
            model = model.merge_and_unload()
        print("Done.")

    if tags is not None:
        assert isinstance(tags, (list, tuple))
        tags = list(tags) + [
            "unsloth",
        ]
    else:
        tags = [
            "unsloth",
        ]
    save_pretrained_settings["tags"] = tags

    if ((save_method == "lora") or (save_method == "merged_4bit")) and push_to_hub:
        if token is None:
            raise RuntimeError(
                "Unsloth: Pushing to HF requires a token. Pass `token = 'hf_....'`\n"
                "Go to https://huggingface.co/settings/tokens."
            )

        if save_method == "lora":
            print("Unsloth: Saving LoRA adapters. Please wait...")
        elif save_method == "merged_4bit":
            print("Unsloth: Saving 4bit Bitsandbytes model. Please wait...")

        # Update model tag
        _ = upload_to_huggingface(
            model,
            save_directory,
            token,
            "finetuned",
            "trl",
            file_location = None,
            old_username = None,
            private = private,
            datasets = datasets,
        )

        getattr(model, "original_push_to_hub", model.push_to_hub)(
            repo_id = save_directory,
            use_temp_dir = use_temp_dir,
            commit_message = commit_message,
            private = private,
            token = token,
            max_shard_size = max_shard_size,
            create_pr = create_pr,
            safe_serialization = safe_serialization,
            revision = revision,
            commit_description = commit_description,
            tags = tags,
        )
        if tokenizer is not None:
            # Set padding side to left for inference
            old_padding_side = tokenizer.padding_side
            tokenizer.padding_side = "left"

            getattr(tokenizer, "original_push_to_hub", tokenizer.push_to_hub)(
                repo_id = save_directory,
                use_temp_dir = use_temp_dir,
                commit_message = commit_message,
                private = private,
                token = token,
                max_shard_size = max_shard_size,
                create_pr = create_pr,
                safe_serialization = safe_serialization,
                revision = revision,
                commit_description = commit_description,
                tags = tags,
            )

            # Revert back padding side
            tokenizer.padding_side = old_padding_side

        if hasattr(model, "config"):
            print(
                f"Saved {save_method} model to https://huggingface.co/" + save_directory
            )
        return save_directory, None

    # Tokenizer has different saving arguments
    tokenizer_save_settings = {
        "save_directory": save_pretrained_settings["save_directory"],
        "legacy_format": None,
        "filename_prefix": None,
        "push_to_hub": save_pretrained_settings["push_to_hub"],
        "private": save_pretrained_settings["private"],
        "token": save_pretrained_settings["token"],
    }

    # Check if PEFT Model or not - if yes, 3 levels. If not 2 levels.
    from peft import PeftModelForCausalLM

    if isinstance(model, PeftModelForCausalLM):
        internal_model = model.model
    else:
        internal_model = model

    # Cannot be converted properly!
    if (
        (save_method == "merged_4bit")
        or (save_method == "lora")
        or (not hasattr(model, "model") or not hasattr(internal_model.model, "layers"))
    ):
        # Do general saving
        # Edit save_pretrained_settings
        # [TODO] _create_repo has errors due to **kwargs getting accepted
        # commit_description does not seem to work?
        what_to_delete = (
            (
                "use_temp_dir",
                "commit_message",
                "create_pr",
                "revision",
                "commit_description",
                "tags",
            )
            if save_pretrained_settings["push_to_hub"] is False
            else (
                "use_temp_dir",
                "create_pr",
                "revision",
                "tags",
                "commit_description",
            )
        )
        for deletion in what_to_delete:
            del save_pretrained_settings[deletion]
        if hasattr(model, "add_model_tags"):
            model.add_model_tags(
                [
                    "unsloth",
                ]
            )

        # Update model tag
        if push_to_hub:
            _ = upload_to_huggingface(
                model,
                save_pretrained_settings["save_directory"],
                token,
                "finetuned",
                "trl",
                file_location = None,
                old_username = None,
                private = private,
                datasets = datasets,
            )

        if tokenizer is not None:
            print("Unsloth: Saving tokenizer...", end = "")

            # Set padding side to left for inference
            old_padding_side = tokenizer.padding_side
            tokenizer.padding_side = "left"

            tokenizer.save_pretrained(**tokenizer_save_settings)

            # Revert back padding side
            tokenizer.padding_side = old_padding_side

            print(" Done.")
        else:
            print()

        print("Unsloth: Saving model...", end = "")
        if save_method != "lora":
            print(" This might take 10 minutes for Llama-7b...", end = "")

        # [TODO] Is this correct?
        if save_method == "lora":
            save_pretrained_settings["selected_adapters"] = None

        model.save_pretrained(**save_pretrained_settings)

        if push_to_hub and hasattr(model, "config"):
            print(
                "Saved to https://huggingface.co/"
                + save_pretrained_settings["save_directory"]
            )

        print(" Done.")
        return save_directory, None

    # If push_to_hub, we must remove the .../ part of a repo
    username = None
    if push_to_hub and "/" in save_directory:
        # +1 solves absolute path issues
        new_save_directory = save_directory
        username = new_save_directory[: new_save_directory.find("/")]
        new_save_directory = new_save_directory[new_save_directory.find("/") + 1 :]
        if IS_KAGGLE_ENVIRONMENT:
            new_save_directory = os.path.join(
                KAGGLE_TMP, new_save_directory[new_save_directory.find("/") + 1 :]
            )
            logger.warning_once(
                "Unsloth: You are pushing to hub in Kaggle environment.\n"
                f"To save memory, we shall move {save_directory} to {new_save_directory}"
            )
        else:
            logger.warning_once(
                f"Unsloth: You are pushing to hub, but you passed your HF username = {username}.\n"
                f"We shall truncate {save_directory} to {new_save_directory}"
            )

        save_pretrained_settings["save_directory"] = new_save_directory
        tokenizer_save_settings["save_directory"] = new_save_directory
        save_directory = new_save_directory

    print("Unsloth: Merging 4bit and LoRA weights to 16bit...")

    # Determine max RAM usage minus sharding
    max_ram = psutil.virtual_memory().available
    sharded_ram_usage = 5 * 1024 * 1024 * 1024
    if type(max_shard_size) is str:
        gb_found = re.match(
            r"([0-9]{1,})[\s]{0,}GB", max_shard_size, flags = re.IGNORECASE
        )
        mb_found = re.match(
            r"([0-9]{1,})[\s]{0,}MB", max_shard_size, flags = re.IGNORECASE
        )
        if gb_found:
            sharded_ram_usage = int(gb_found.group(1)) * 1024 * 1024 * 1024
        elif mb_found:
            sharded_ram_usage = int(mb_found.group(1)) * 1024 * 1024
    elif type(max_shard_size) is int:
        sharded_ram_usage = max_shard_size

    # Switch to our fast saving modules if it's a slow PC!
    n_cpus = psutil.cpu_count(logical = False)
    if n_cpus is None:
        n_cpus = psutil.cpu_count()
    if n_cpus is None:
        n_cpus = 1

    if safe_serialization is None:
        safe_serialization = True
        save_pretrained_settings["safe_serialization"] = safe_serialization

    elif safe_serialization and (n_cpus <= 2):
        logger.warning_once(
            f"Unsloth: You have {n_cpus} CPUs. Using `safe_serialization` is 10x slower.\n"
            f"We shall switch to Pytorch saving, which might take 3 minutes and not 30 minutes.\n"
            f"To force `safe_serialization`, set it to `None` instead.",
        )
        safe_serialization = False
        save_function = fast_save_pickle
        save_pretrained_settings["safe_serialization"] = safe_serialization
        save_pretrained_settings["save_function"] = save_function

    # Only safe_serialization uses more RAM
    if safe_serialization:
        max_ram -= sharded_ram_usage
    else:
        max_ram -= sharded_ram_usage * 0.25  # Uses much less

    max_ram = int(max(0, max_ram) * maximum_memory_usage)
    print(
        f"Unsloth: Will use up to "
        f"{round(max_ram/1024/1024/1024, 2)} out of "
        f"{round(psutil.virtual_memory().total/1024/1024/1024, 2)} RAM for saving."
    )

    # Move temporary_location to /tmp in Kaggle
    if IS_KAGGLE_ENVIRONMENT:
        temporary_location = os.path.join(KAGGLE_TMP, temporary_location)

    # Max directory for disk saving
    if not os.path.exists(temporary_location):
        os.makedirs(temporary_location)

    # Check if Kaggle or Colab, since only 20GB of Disk space allowed.
    if IS_KAGGLE_ENVIRONMENT or IS_COLAB_ENVIRONMENT:
        # We free up 4GB of space
        logger.warning_once(
            "Unsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded\n"
            "model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab."
        )
        _free_cached_model(internal_model)

    # HF also uses a OrderedDict
    from collections import OrderedDict

    state_dict = OrderedDict()

    torch_dtype = dtype_from_config(internal_model.config)
    if type(torch_dtype) is str:
        if torch_dtype == "float16":
            torch_dtype = torch.float16
        elif torch_dtype == "bfloat16":
            torch_dtype = torch.bfloat16

    # Check modules to save float32 dtype
    state_dict["model.embed_tokens.weight"] = (
        internal_model.model.embed_tokens.weight.data.to(torch_dtype)
    )

    max_vram = int(
        torch.cuda.get_device_properties(0).total_memory * maximum_memory_usage
    )

    print("Unsloth: Saving model... This might take 5 minutes ...")

    from tqdm import tqdm as ProgressBar

    for j, layer in enumerate(ProgressBar(internal_model.model.layers)):
        for item in LLAMA_WEIGHTS:
            proj = eval(f"layer.{item}")
            name = f"model.layers.{j}.{item}.weight"
            W, bias = _merge_lora(proj, name)

            # Bias term
            if bias is not None:
                state_dict[f"model.layers.{j}.{item}.bias"] = bias

            if (torch.cuda.memory_allocated() + W.nbytes) < max_vram:
                # Save to GPU memory
                state_dict[name] = W
            # [TODO] Saving to RAM seems to leak memory???
            # elif (max_ram - W.nbytes) > 0:
            #     # Save to CPU memory
            #     logger.warning_once(f"We will save to RAM and not VRAM now.")
            #     state_dict[name] = W.to("cpu", non_blocking = True, copy = True)
            #     max_ram = max(max_ram - W.nbytes, 0)
            else:
                # Save to Disk
                logger.warning_once("\nWe will save to Disk and not RAM now.")
                filename = os.path.join(temporary_location, f"{name}.pt")
                torch.save(
                    W,
                    filename,
                    pickle_module = pickle,
                    pickle_protocol = pickle.HIGHEST_PROTOCOL,
                )
                # weights_only = True weirdly fails?
                state_dict[name] = torch.load(
                    filename, map_location = "cpu", mmap = True, weights_only = False
                )
        for item in LLAMA_LAYERNORMS:
            try:
                # Skip for Gemma 2
                state_dict[f"model.layers.{j}.{item}.weight"] = eval(
                    f"layer.{item}.weight.data"
                )
            except:
                continue

    state_dict["model.norm.weight"] = internal_model.model.norm.weight.data
    # Check for modules_to_save float32 dtype

    # Check for tied weights
    if (
        internal_model.model.embed_tokens.weight.data_ptr()
        != internal_model.lm_head.weight.data_ptr()
    ):
        state_dict["lm_head.weight"] = internal_model.lm_head.weight.data.to(
            torch_dtype
        )

    # All tensors MUST be type torch.Tensor and not torch.nn.parameter.Parameter
    for key, value in state_dict.items():
        if hasattr(value, "data"):
            state_dict[key] = value = value.data
        if type(value) is not torch.Tensor:
            logger.warning_once(f"Unsloth: {key} is not a Tensor but a {type(value)}.")

    # Edit save_pretrained_settings
    # [TODO] _create_repo has errors due to **kwargs getting accepted
    save_pretrained_settings["state_dict"] = state_dict

    # commit_description does not seem to work?
    what_to_delete = (
        (
            "use_temp_dir",
            "commit_message",
            "create_pr",
            "revision",
            "commit_description",
            "tags",
        )
        if not push_to_hub
        else (
            "use_temp_dir",
            "create_pr",
            "revision",
            "tags",
            "commit_description",
        )
    )
    for deletion in what_to_delete:
        del save_pretrained_settings[deletion]
    if hasattr(model, "add_model_tags"):
        model.add_model_tags(
            [
                "unsloth",
            ]
        )

    # Update model tag
    if push_to_hub:
        _ = upload_to_huggingface(
            model,
            save_pretrained_settings["save_directory"],
            token,
            "finetuned",
            "trl",
            file_location = None,
            old_username = username,
            private = private,
            datasets = datasets,
        )

    # First check if we're pushing to an organization!
    save_directory = save_pretrained_settings["save_directory"]

    if save_pretrained_settings["push_to_hub"]:
        new_save_directory, new_username = _determine_username(
            save_directory, username, token
        )

        if token is not None:
            from huggingface_hub import whoami

            actual_username = whoami(token = token)["name"]
        else:
            actual_username = username

    # Check if pushing to an organization
    if save_pretrained_settings["push_to_hub"] and (username != actual_username):
        print(f"Unsloth: Saving to organization with address {new_save_directory}")
        # We upload everything at the end!
        tokenizer_save_settings["push_to_hub"] = False
        tokenizer_save_settings["save_directory"] = new_save_directory

    # Save tokenizer
    if tokenizer is not None:
        print("Unsloth: Saving tokenizer...", end = "")

        # Set padding side to left for inference
        old_padding_side = tokenizer.padding_side
        tokenizer.padding_side = "left"

        tokenizer.save_pretrained(**tokenizer_save_settings)

        # Revert back padding side
        tokenizer.padding_side = old_padding_side

        print(" Done.")
    else:
        print()

    # Since merged, edit quantization_config
    old_config = model.config
    new_config = model.config.to_dict()
    if "quantization_config" in new_config:
        del new_config["quantization_config"]
    original_model = model
    new_config = type(model.config).from_dict(new_config)
    while hasattr(original_model, "model"):
        original_model = original_model.model
        original_model.config = new_config
    model.config = new_config

    # Save!
    # [TODO] --> is this correct?
    # save_pretrained_settings["selected_adapters"] = None

    # Check if pushing to an organization
    if save_pretrained_settings["push_to_hub"] and (username != actual_username):
        print(f"Unsloth: Saving to organization with address {new_save_directory}")
        # Pushing to organization!
        # Sadly .save_pretrained doesn't work :(
        # We first save it via .save_pretrained, then upload manually!
        save_pretrained_settings["save_directory"] = new_save_directory
        save_pretrained_settings["push_to_hub"] = False
        internal_model.save_pretrained(**save_pretrained_settings)

        # Now manually go through each file and upload them manually!
        filenames = os.listdir(new_save_directory)

        hf_api = HfApi(token = save_pretrained_settings["token"])

        print("Unsloth: Uploading all files... Please wait...")
        hf_api.upload_folder(
            folder_path = new_save_directory,
            path_in_repo = ".",
            repo_id = new_save_directory,
            repo_type = "model",
            commit_message = "(Trained with Unsloth)",
            ignore_patterns = "*.md",
        )
    else:
        internal_model.save_pretrained(**save_pretrained_settings)

    # Revert config back
    original_model = model
    while hasattr(original_model, "model"):
        original_model = original_model.model
        original_model.config = old_config
    model.config = old_config
    print("Done.")

    if push_to_hub and hasattr(model, "config"):
        print(
            f"Saved merged model to https://huggingface.co/{username}/{save_directory.lstrip('/').split('/')[-1]}"
        )

    save_pretrained_settings["state_dict"] = None

    for j, (key, value) in enumerate(state_dict.items()):
        state_dict[key] = None
        if j % 10 == 0:
            torch.cuda.empty_cache()
            gc.collect()
    state_dict = None
    del state_dict
    torch.cuda.empty_cache()
    gc.collect()

    # Remove temporary location
    import shutil

    shutil.rmtree(temporary_location, ignore_errors = True)

    for _ in range(3):
        torch.cuda.empty_cache()
        gc.collect()
    return save_directory, username


def install_llama_cpp_clone_non_blocking():
    full_command = [
        "git",
        "clone",
        "--recursive",
        "https://github.com/ggerganov/llama.cpp",
    ]
    run_installer = subprocess.Popen(
        full_command, stdout = subprocess.DEVNULL, stderr = subprocess.STDOUT
    )
    return run_installer


def install_llama_cpp_make_non_blocking():
    # https://github.com/ggerganov/llama.cpp/issues/7062
    # Weirdly GPU conversion for GGUF breaks??
    # env = { **os.environ, "LLAMA_CUDA": "1", }
    # Force make clean
    check = os.system("make clean -C llama.cpp")
    IS_CMAKE = False
    if check == 0:
        # Uses old MAKE
        n_jobs = max(int((psutil.cpu_count() or 1) * 1.5), 1)
        full_command = ["make", "all", "-j" + str(n_jobs), "-C", "llama.cpp"]
        IS_CMAKE = False
    else:
        # Uses new CMAKE
        n_jobs = max(int(psutil.cpu_count() or 1), 1)  # Use less CPUs since 1.5x faster
        check = os.system(
            f"cmake llama.cpp -B llama.cpp/build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=OFF {CURL_FLAG}"
        )

        if check != 0:
            raise RuntimeError(
                f"*** Unsloth: Failed compiling llama.cpp using os.system(...) with error {check}. Please report this ASAP!"
            )
        # f"cmake --build llama.cpp/build --config Release -j{psutil.cpu_count()*2} --clean-first --target {' '.join(LLAMA_CPP_TARGETS)}",
        full_command = [
            "cmake",
            "--build",
            "llama.cpp/build",
            "--config",
            "Release",
            "-j" + str(n_jobs),
            "--clean-first",
            "--target",
        ] + LLAMA_CPP_TARGETS
        IS_CMAKE = True
    # https://github.com/ggerganov/llama.cpp/issues/7062
    # Weirdly GPU conversion for GGUF breaks??
    # run_installer = subprocess.Popen(full_command, env = env, stdout = subprocess.DEVNULL, stderr = subprocess.STDOUT)
    run_installer = subprocess.Popen(
        full_command, stdout = subprocess.DEVNULL, stderr = subprocess.STDOUT
    )
    return run_installer, IS_CMAKE


def install_python_non_blocking(packages = []):
    full_command = ["pip", "install"] + packages
    run_installer = subprocess.Popen(
        full_command, stdout = subprocess.DEVNULL, stderr = subprocess.STDOUT
    )
    return run_installer


def try_execute(commands, force_complete = False):
    for command in commands:
        with subprocess.Popen(
            command,
            shell = True,
            stdout = subprocess.PIPE,
            stderr = subprocess.STDOUT,
            bufsize = 1,
        ) as sp:
            for line in sp.stdout:
                line = line.decode("utf-8", errors = "replace")
                if "undefined reference" in line:
                    raise RuntimeError(
                        f"*** Unsloth: Failed compiling llama.cpp with {line}. Please report this ASAP!"
                    )
                elif "deprecated" in line:
                    return "CMAKE"
                elif "Unknown argument" in line:
                    raise RuntimeError(
                        f"*** Unsloth: Failed compiling llama.cpp with {line}. Please report this ASAP!"
                    )
                elif "***" in line:
                    raise RuntimeError(
                        f"*** Unsloth: Failed compiling llama.cpp with {line}. Please report this ASAP!"
                    )
                print(line, flush = True, end = "")
            if force_complete and sp.returncode is not None and sp.returncode != 0:
                raise subprocess.CalledProcessError(sp.returncode, sp.args)
    return None


def install_llama_cpp_old(version = -10):
    # Download the 10th latest release since the latest might be broken!
    # FALLBACK mechanism
    releases = subprocess.check_output(
        ["git", "ls-remote", "--tags", "https://github.com/ggerganov/llama.cpp.git"]
    )
    releases = releases.decode("utf-8").replace("\t", " ").split("\n")
    for i, x in enumerate(releases):
        if "refs/tags/b" not in x:
            break
    releases = releases[:i]
    latest = releases[-1]
    version = releases[version].split(" ")[0]

    # Check if the llama.cpp exists
    if os.path.exists("llama.cpp"):
        print(
            "**[WARNING]** You have a llama.cpp directory which is broken.\n"
            "Unsloth will DELETE the broken directory and install a new one.\n"
            "Press CTRL + C / cancel this if this is wrong. We shall wait 30 seconds.\n"
        )
        import time

        for i in range(30):
            print(f"**[WARNING]** Deleting llama.cpp directory... {30-i} seconds left.")
            time.sleep(1)
        import shutil

        shutil.rmtree("llama.cpp", ignore_errors = True)

    # Clone a specific commit
    # Also don't use the GPU!
    commands = [
        "git clone --recursive https://github.com/ggerganov/llama.cpp",
        f"cd llama.cpp && git reset --hard {version} && git clean -df",
    ]
    try_execute(commands)

    # Try using MAKE
    commands = [
        "make clean -C llama.cpp",
        f"make all -j{(psutil.cpu_count() or 1)*2} -C llama.cpp",
    ]
    if try_execute(commands) == "CMAKE":
        # Instead use CMAKE
        commands = [
            f"cmake llama.cpp -B llama.cpp/build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=OFF {CURL_FLAG}",
            f"cmake --build llama.cpp/build --config Release -j{(psutil.cpu_count() or 1)*2} --clean-first --target {' '.join(LLAMA_CPP_TARGETS)}",
            "cp llama.cpp/build/bin/llama-* llama.cpp",
            "rm -rf llama.cpp/build",
        ]

        try_execute(commands)

    # Check if successful
    if not (
        os.path.exists("llama.cpp/llama-quantize.exe")
        or os.path.exists("llama.cpp/llama-quantize")
        or os.path.exists("llama.cpp/quantize.exe")
        or os.path.exists("llama.cpp/quantize")
        or os.path.exists("llama.cpp/build/bin/llama-quantize")
        or os.path.exists("llama.cpp/build/bin/quantize")
    ):
        raise RuntimeError(
            "Unsloth: The file 'llama.cpp/llama-quantize' or `llama.cpp/quantize` does not exist.\n"
            "We've also double checked the building directory under 'llama.cpp/build/bin/'.\n"
            "But we expect this file to exist! Check if the file exists under llama.cpp and investigate the building process of llama.cpp (make/cmake)!"
        )


def install_llama_cpp_blocking(use_cuda = False):
    # https://github.com/ggerganov/llama.cpp/issues/7062
    # Weirdly GPU conversion for GGUF breaks??
    # use_cuda = "LLAMA_CUDA=1" if use_cuda else ""

    commands = [
        "git clone --recursive https://github.com/ggerganov/llama.cpp",
        "pip install gguf protobuf",
    ]
    if os.path.exists("llama.cpp"):
        return
    try_execute(commands)

    commands = [
        "make clean -C llama.cpp",
        # https://github.com/ggerganov/llama.cpp/issues/7062
        # Weirdly GPU conversion for GGUF breaks??
        # f"{use_cuda} make all -j{(psutil.cpu_count() or 1)*2} -C llama.cpp",
        f"make all -j{(psutil.cpu_count() or 1)*2} -C llama.cpp",
    ]
    if try_execute(commands) == "CMAKE":
        # Instead use CMAKE
        commands = [
            f"cmake llama.cpp -B llama.cpp/build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=OFF {CURL_FLAG}",
            f"cmake --build llama.cpp/build --config Release -j{(psutil.cpu_count() or 1)*2} --clean-first --target {' '.join(LLAMA_CPP_TARGETS)}",
            "cp llama.cpp/build/bin/llama-* llama.cpp",
            "rm -rf llama.cpp/build",
        ]
        try_execute(commands)


def get_executable(executables):
    # Get system locations (System Path).split(system separator)
    system_directories = os.environ.get("PATH").split(os.pathsep)

    for directory in system_directories:
        for executable in executables:
            path = os.path.join(directory, executable)
            # Check if the executable exists and is executable
            if os.path.exists(path) and os.access(path, os.X_OK):
                return path
    return None


def save_to_gguf(
    model_name: str,
    model_type: str,
    model_dtype: str,
    is_sentencepiece: bool = False,
    model_directory: str = "unsloth_finetuned_model",
    quantization_method = "fast_quantized",  # Can be a list of options! ["q4_k_m", "q8_0", "q5_k_m"]
    first_conversion: str = None,
    is_vlm: bool = False,
    is_gpt_oss: bool = False,
):
    """
    Orchestrates the complete GGUF conversion process.
    Handles installation, conversion, and quantization.
    """
    # print_output True only if UNSLOTH_ENABLE_LOGGING=1
    if os.environ.get("UNSLOTH_ENABLE_LOGGING", "0") == "1":
        print_output = True
    else:
        print_output = False

    # Validate model dtype
    assert model_dtype == "float16" or model_dtype == "bfloat16"
    model_dtype = "f16" if model_dtype == "float16" else "bf16"

    # Convert quantization_method to list
    if isinstance(quantization_method, list):
        pass
    elif isinstance(quantization_method, str):
        quantization_method = [
            quantization_method,
        ]
    elif isinstance(quantization_method, tuple):
        quantization_method = list(quantization_method)
    else:
        raise TypeError(
            "Unsloth: quantization_method can only be a string or a list of strings"
        )

    # Check if bfloat16 is supported
    if model_dtype == "bf16" and not torch.cuda.is_bf16_supported():
        logger.warning(
            "Unsloth: Cannot convert to bf16 GGUF since your computer doesn't support it.\n"
            "We shall switch instead to f16."
        )
        model_dtype = "f16"

    # Check first_conversion as well
    if first_conversion is None:
        first_conversion = model_dtype

    # Check I quants
    for quant_method in quantization_method:
        if quant_method.startswith("iq2"):
            raise RuntimeError(
                "Unsloth: Currently iq2 type quantizations aren't supported yet - sorry!"
            )

    # Map quant methods
    new_quantization_methods = []
    for quant_method in quantization_method:
        if quant_method == "not_quantized":
            quant_method = model_dtype
        elif quant_method == "fast_quantized":
            quant_method = "q8_0"
        elif quant_method == "quantized":
            quant_method = "q4_k_m"
        elif quant_method is None:
            quant_method = "q8_0"

        # Check if wrong method
        if quant_method not in ALLOWED_QUANTS.keys():
            error = f"Unsloth: Quant method = [{quant_method}] not supported. Choose from below:\n"
            for key, value in ALLOWED_QUANTS.items():
                error += f"[{key}] => {value}\n"
            raise RuntimeError(error)

        new_quantization_methods.append(quant_method)
    quantization_method = new_quantization_methods

    # Determine optimal first_conversion
    if is_gpt_oss:
        print("Unsloth: GPT-OSS model detected - using special conversion settings")
        first_conversion = "None"  # No quantization for GPT-OSS
        # Only keep one conversion method since GPT-OSS doesn't quantize
        quantization_method = ["None"]
    else:
        if first_conversion is None:
            # Check if q8_0 is the ONLY quantization method requested
            if len(quantization_method) == 1 and quantization_method[0] == "q8_0":
                first_conversion = "None"  # Let llama-quantize do the direct conversion
            else:
                # For all other cases, choose the highest precision format
                # that can be requantized to all requested formats
                strength = 0
                for quant_method in quantization_method:
                    if quant_method == "f32":
                        strength = max(strength, 3)
                    elif quant_method == "f16":
                        strength = max(strength, 2)
                    elif quant_method == "bf16":
                        strength = max(strength, 1)
                    # Note: we don't set strength for q8_0 here since we handle it above

                if strength >= 3:
                    first_conversion = "f32"
                elif strength >= 2:
                    first_conversion = "f16"
                elif strength >= 1:
                    first_conversion = "bf16"
                else:
                    first_conversion = "bf16"  # requantizing from q8_0 disallowed in new llama.cpp default to bf16.

    # Check bfloat16 support again for first_conversion
    if first_conversion == "bf16" and not torch.cuda.is_bf16_supported():
        logger.warning("Unsloth: Switching bf16 to f16 due to hardware limitations")
        first_conversion = "f16"

    first_conversion_dtype = "" if first_conversion == "None" else first_conversion
    # Print conversion info
    print_info = (
        f"==((====))==  Unsloth: Conversion from HF to GGUF information\n"
        f"   {chr(92)}{chr(92)}   /|    [0] Installing llama.cpp might take 3 minutes.\n"
        f"O^O/ {chr(92)}_/ {chr(92)}    [1] Converting HF to GGUF {first_conversion_dtype} might take 3 minutes.\n"
        f"{chr(92)}        /    [2] Converting GGUF {first_conversion_dtype} to {quantization_method} might take 10 minutes each.\n"
        f' "-____-"     In total, you will have to wait at least 16 minutes.\n'
    )
    print(print_info)

    # Step 1: Ensure llama.cpp is installed
    try:
        quantizer_location, converter_location = check_llama_cpp()
        print("Unsloth: llama.cpp found in the system. Skipping installation.")
    except:
        print("Unsloth: Installing llama.cpp. This might take 3 minutes...")
        if IS_KAGGLE_ENVIRONMENT:
            # Kaggle: no CUDA support due to environment limitations
            quantizer_location, converter_location = install_llama_cpp(
                gpu_support = False, print_output = print_output
            )
        else:
            quantizer_location, converter_location = install_llama_cpp(
                gpu_support = False,  # GGUF conversion doesn't need CUDA
                print_output = print_output,
            )

    # Step 2: Download and patch converter script
    print("Unsloth: Preparing converter script...")
    with use_local_gguf():
        converter_path, supported_text_archs, supported_vision_archs = (
            _download_convert_hf_to_gguf()
        )

        # Step 3: Initial GGUF conversion
        print(
            f"Unsloth: [1] Converting model into {first_conversion_dtype} GGUF format."
        )
        print(f"This might take 3 minutes...")

        initial_files, is_vlm_update = convert_to_gguf(
            model_name = model_name,
            input_folder = model_directory,
            model_dtype = model_dtype,
            quantization_type = first_conversion,
            converter_location = converter_path,
            supported_text_archs = supported_text_archs,
            supported_vision_archs = supported_vision_archs,
            is_vlm = is_vlm,
            is_gpt_oss = is_gpt_oss,
            max_shard_size = "50GB",
            print_output = print_output,
        )
    # update is_vlm switch
    is_vlm = is_vlm_update
    # Check conversion success
    for file in initial_files:
        if not os.path.exists(file):
            if IS_KAGGLE_ENVIRONMENT:
                raise RuntimeError(
                    f"Unsloth: Conversion failed for {file}\n"
                    "You are in a Kaggle environment with limited disk space (20GB).\n"
                    "Try saving to /tmp for more space or use a smaller model.\n"
                    "Alternatively, save the 16bit model first, then convert manually."
                )
            else:
                raise RuntimeError(
                    f"Unsloth: Conversion failed for {file}\n"
                    "Please check disk space and try again."
                )

    # Move initial GGUF files into a dedicated _gguf directory
    gguf_directory = f"{model_directory}_gguf"
    os.makedirs(gguf_directory, exist_ok = True)
    moved_files = []
    for fpath in initial_files:
        dst = os.path.join(gguf_directory, os.path.basename(fpath))
        shutil.move(fpath, dst)
        moved_files.append(dst)
    initial_files = moved_files

    print(f"Unsloth: Initial conversion completed! Files: {initial_files}")

    # Step 4: Additional quantizations using llama-quantize
    all_saved_locations = initial_files.copy()

    # Get CPU count for quantization
    n_cpus = psutil.cpu_count()
    if n_cpus is None:
        n_cpus = 1
    n_cpus *= 2

    if not is_gpt_oss:
        base_gguf = initial_files[0]
        quants_created = False
        for quant_method in quantization_method:
            if quant_method != first_conversion:
                print(
                    f"Unsloth: [2] Converting GGUF {first_conversion_dtype} into {quant_method}. This might take 10 minutes..."
                )
                output_location = os.path.join(
                    gguf_directory, f"{model_name}.{quant_method.upper()}.gguf"
                )
                try:
                    # Use the quantize_gguf function we created
                    quantized_file = quantize_gguf(
                        input_gguf = base_gguf,
                        output_gguf = output_location,
                        quant_type = quant_method,
                        quantizer_location = quantizer_location,
                        print_output = print_output,
                    )
                    all_saved_locations.append(quantized_file)
                    quants_created = True
                except Exception as e:
                    if IS_KAGGLE_ENVIRONMENT:
                        raise RuntimeError(
                            f"Unsloth: Quantization failed for {output_location}\n"
                            "You are in a Kaggle environment, which might be the reason this is failing.\n"
                            "Kaggle only provides 20GB of disk space in the working directory.\n"
                            "Merging to 16bit for 7b models use 16GB of space.\n"
                            "This means using `model.{save_pretrained/push_to_hub}_merged` works, but\n"
                            "`model.{save_pretrained/push_to_hub}_gguf will use too much disk space.\n"
                            "You can try saving it to the `/tmp` directory for larger disk space.\n"
                            "I suggest you to save the 16bit model first, then use manual llama.cpp conversion.\n"
                            f"Error: {e}"
                        )
                    else:
                        if IS_WINDOWS:
                            build_instructions = (
                                f'cd "{LLAMA_CPP_DEFAULT_DIR}"\n'
                                f"cmake -S . -B build -DBUILD_SHARED_LIBS=OFF\n"
                                f"cmake --build build --config Release"
                            )
                        else:
                            build_instructions = f'cd "{LLAMA_CPP_DEFAULT_DIR}" && make clean && make all -j'

                        raise RuntimeError(
                            f"Unsloth: Quantization failed for {output_location}\n"
                            "You might have to compile llama.cpp yourself, then run this again.\n"
                            "You do not need to close this Python program. Run the following commands in a new terminal:\n"
                            f'git clone --recursive https://github.com/ggerganov/llama.cpp "{LLAMA_CPP_DEFAULT_DIR}"\n'
                            f"{build_instructions}\n"
                            "Once that's done, redo the quantization.\n"
                            f"Error: {e}"
                        )
        print("Unsloth: Model files cleanup...")
        if quants_created:
            all_saved_locations.remove(base_gguf)
            Path(base_gguf).unlink(missing_ok = True)

            # flip the list to get [text_model, mmproj] order. for text models stays the same.
            all_saved_locations.reverse()
    else:
        print("Unsloth: GPT-OSS model - skipping additional quantizations")

    if is_gpt_oss:
        want_full_precision = True
    else:
        want_full_precision = first_conversion in frozenset(quantization_method)

    print(f"Unsloth: All GGUF conversions completed successfully!")
    print(f"Generated files: {all_saved_locations}")

    return all_saved_locations, want_full_precision, is_vlm


def unsloth_save_pretrained_merged(
    self,
    save_directory: Union[str, os.PathLike],
    tokenizer = None,
    save_method: str = "merged_16bit",  # ["lora", "merged_16bit", "merged_4bit"]
    push_to_hub: bool = False,
    token: Optional[Union[str, bool]] = None,
    is_main_process: bool = True,
    state_dict: Optional[dict] = None,
    save_function: Callable = torch.save,
    max_shard_size: Union[int, str] = "5GB",
    safe_serialization: bool = True,
    variant: Optional[str] = None,
    save_peft_format: bool = True,
    tags: List[str] = None,
    temporary_location: str = "_unsloth_temporary_saved_buffers",
    maximum_memory_usage: float = 0.75,
    datasets: Optional[List[str]] = None,
):
    """
    Same as .save_pretrained(...) except 4bit weights are auto
    converted to float16 with as few overhead as possible.

    Choose for `save_method` to be either:
    1. `16bit`: Merge LoRA into float16 weights. Useful for GGUF / llama.cpp.
    2.  `4bit`: Merge LoRA into int4 weights. Useful for DPO / HF inference.
    3.  `lora`: Save LoRA adapters with no merging. Useful for HF inference.
    """
    if tokenizer is None:
        logger.warning_once(
            "Unsloth: You're not saving a tokenizer as well?\n"
            "You can do it separately via `tokenizer.save_pretrained(...)`"
        )

    arguments = dict(locals())
    arguments["model"] = self
    del arguments["self"]
    unsloth_save_model(**arguments)
    for _ in range(3):
        gc.collect()


def unsloth_push_to_hub_merged(
    self,
    repo_id: str,
    tokenizer = None,
    save_method: str = "merged_16bit",  # ["lora", "merged_16bit", "merged_4bit"]
    use_temp_dir: Optional[bool] = None,
    commit_message: Optional[str] = "Trained with Unsloth",
    private: Optional[bool] = None,
    token: Union[bool, str, None] = None,
    max_shard_size: Union[int, str, None] = "5GB",
    create_pr: bool = False,
    safe_serialization: bool = True,
    revision: str = None,
    commit_description: str = "Upload model trained with Unsloth 2x faster",
    tags: Optional[List[str]] = None,
    temporary_location: str = "_unsloth_temporary_saved_buffers",
    maximum_memory_usage: float = 0.75,
    datasets: Optional[List[str]] = None,
):
    """
    Same as .push_to_hub(...) except 4bit weights are auto
    converted to float16 with as few overhead as possible.

    Choose for `save_method` to be either:
    1. `16bit`: Merge LoRA into float16 weights. Useful for GGUF / llama.cpp.
    2.  `4bit`: Merge LoRA into int4 weights. Useful for DPO / HF inference.
    3.  `lora`: Save LoRA adapters with no merging. Useful for HF inference.
    """
    if tokenizer is None:
        logger.warning_once(
            "Unsloth: You're not saving a tokenizer as well?\n"
            "You can do it separately via `tokenizer.push_to_hub(...)`"
        )

    arguments = dict(locals())
    arguments["model"] = self
    arguments["save_directory"] = repo_id
    arguments["push_to_hub"] = True
    del arguments["self"]
    del arguments["repo_id"]
    unsloth_save_model(**arguments)
    for _ in range(3):
        gc.collect()


MODEL_CARD = """---
base_model: {base_model}
tags:
- text-generation-inference
- transformers
- unsloth
- {model_type}
- {extra}
license: apache-2.0
language:
- en
---

# Uploaded {method} model

- **Developed by:** {username}
- **License:** apache-2.0
- **Finetuned from model :** {base_model}

This {model_type} model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth)

[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
"""


def _determine_username(save_directory, old_username, token):
    username = ""
    save_directory = save_directory.lstrip("./")
    if "/" not in save_directory:
        from huggingface_hub import whoami

        try:
            username = whoami(token = token)["name"]
            if type(old_username) is str and username != old_username:
                username = old_username
            save_directory = f"{username}/{save_directory}"
        except:
            raise RuntimeError(
                f"Unsloth: {save_directory} is not a Huggingface directory."
            )
    else:
        username = save_directory.split("/")[0]
    return save_directory, username


def create_huggingface_repo(
    model,
    save_directory,
    token = None,
    private = False,
    datasets = None,
):
    if token is None:
        token = get_token()
    save_directory, username = _determine_username(save_directory, None, token)

    from huggingface_hub import create_repo

    try:
        create_repo(
            repo_id = save_directory,
            token = token,
            repo_type = "model",
            exist_ok = False,
            private = private,
        )

        # Create model card
        from huggingface_hub import ModelCard

        content = MODEL_CARD.format(
            username = username,
            base_model = model.config._name_or_path,
            model_type = model.config.model_type,
            method = "",
            extra = "unsloth",
        )
        card = ModelCard(content)
        if datasets:
            card.data.datasets = datasets
        card.push_to_hub(save_directory, token = token)
    except:
        # Repo already exists — update datasets metadata separately
        if datasets:
            try:
                from huggingface_hub import metadata_update

                metadata_update(
                    save_directory, {"datasets": datasets}, overwrite = True, token = token
                )
            except Exception as e:
                logger.warning_once(
                    f"Unsloth: Could not update datasets metadata for {save_directory}: {e}"
                )
    hf_api = HfApi(token = token)
    return save_directory, hf_api


def upload_to_huggingface(
    model,
    save_directory,
    token,
    method,
    extra = "",
    file_location = None,
    old_username = None,
    private = None,
    create_config = True,
    datasets = None,
):
    save_directory, username = _determine_username(save_directory, old_username, token)

    from huggingface_hub import create_repo

    try:
        create_repo(
            repo_id = save_directory,
            token = token,
            repo_type = "model",
            exist_ok = False,
            private = private,
        )

        # Create model card
        from huggingface_hub import ModelCard

        content = MODEL_CARD.format(
            username = username,
            base_model = model.config._name_or_path,
            model_type = model.config.model_type,
            method = "",
            extra = extra,
        )
        card = ModelCard(content)
        if datasets:
            card.data.datasets = datasets
        card.push_to_hub(save_directory, token = token)
    except:
        # Repo already exists — update datasets metadata separately
        if datasets:
            try:
                from huggingface_hub import metadata_update

                metadata_update(
                    save_directory, {"datasets": datasets}, overwrite = True, token = token
                )
            except Exception as e:
                logger.warning_once(
                    f"Unsloth: Could not update datasets metadata for {save_directory}: {e}"
                )

    if file_location is not None:
        # Now upload file
        hf_api = HfApi(token = token)

        if "/" in file_location:
            uploaded_location = file_location[file_location.rfind("/") + 1 :]
        else:
            uploaded_location = file_location

        # find ftevent file from tensorboard and upload it
        import glob

        ftevent_files = glob.glob("*out.tfevents*", recursive = True)
        if len(ftevent_files) > 0:
            print(
                "Unsloth: Uploading tensorboard files... Please wait...",
                file_location + "*out.tfevents*",
            )
            for ftevent_file in ftevent_files:
                hf_api.upload_file(
                    path_or_fileobj = ftevent_file,
                    path_in_repo = ftevent_file.replace(file_location, ""),
                    repo_id = save_directory,
                    repo_type = "model",
                    commit_message = "(Trained with Unsloth)",
                )

        hf_api.upload_file(
            path_or_fileobj = file_location,
            path_in_repo = uploaded_location,
            repo_id = save_directory,
            repo_type = "model",
            commit_message = "(Trained with Unsloth)",
        )

        # We also upload a config.json file
        if create_config:
            import json

            with open("_temporary_unsloth_config.json", "w", encoding = "utf-8") as file:
                json.dump({"model_type": model.config.model_type}, file, indent = 4)
            hf_api.upload_file(
                path_or_fileobj = "_temporary_unsloth_config.json",
                path_in_repo = "config.json",
                repo_id = save_directory,
                repo_type = "model",
                commit_message = "(Trained with Unsloth)",
            )
            os.remove("_temporary_unsloth_config.json")
    return username


def fix_tokenizer_bos_token(tokenizer):
    # Check if BOS added already, then warn
    fix_bos_token = False
    chat_template = getattr(tokenizer, "chat_template", None)

    if tokenizer("A").input_ids[0] == getattr(tokenizer, "bos_token_id", None):
        if chat_template is not None and (
            tokenizer.bos_token in chat_template
            or "{bos_token}" in chat_template.replace(" ", "")
            or "{bos_token+" in chat_template.replace(" ", "")
        ):
            fix_bos_token = True
            logger.warning(
                "Unsloth: ##### The current model auto adds a BOS token.\n"
                "Unsloth: ##### Your chat template has a BOS token. We shall remove it temporarily."
            )

            # Remove {{bos_token}}
            new_chat_template = re.sub(
                r"\{[\s]{0,}\{[\s]{0,}bos\_token[\s]{0,}\}[\s]{0,}\}", "", chat_template
            )
            # Remove {{bos_token +
            new_chat_template = re.sub(
                r"\{[\s]{0,}\{[\s]{0,}bos\_token[\s]{0,}\+[\s]{0,}",
                "",
                new_chat_template,
            )

            tokenizer.chat_template = new_chat_template

    return fix_bos_token, chat_template


def create_ollama_modelfile(tokenizer, base_model_name, model_location):
    """
    Creates an Ollama Modelfile.
    Use ollama.create(model = "new_ollama_model", modelfile = modelfile)
    """
    ollama_template_name = MODEL_TO_OLLAMA_TEMPLATE_MAPPER.get(base_model_name)
    if not ollama_template_name:
        print(
            f"Unsloth: No Ollama template mapping found for model '{base_model_name}'. Skipping Ollama Modelfile"
        )
        return None
    ollama_modelfile = OLLAMA_TEMPLATES.get(ollama_template_name)
    if not ollama_modelfile:
        print(
            f"Unsloth: No Ollama template mapping found for model '{base_model_name}'. Skipping Ollama Modelfile"
        )
        return None
    tokenizer._ollama_modelfile = (
        ollama_modelfile  # This comes from the unpacking above
    )
    modelfile = ollama_modelfile

    FILE_LOCATION_REPLACER = "⚫@✅#🦥__FILE_LOCATION__⚡@🦥#⛵"
    EOS_TOKEN_REPLACER = "⚫@✅#🦥__EOS_TOKEN__⚡@🦥#⛵"
    LEFT_BRACKET_REPLACER = "⚫@✅#🦥"
    RIGHT_BRACKET_REPLACER = "⚡@🦥#⛵"

    # Fixes https://github.com/unslothai/unsloth/issues/1087
    # We must convert all {'s and }'s but keep {__FILE_LOCATION__} intact
    modelfile = (
        modelfile.replace("{__FILE_LOCATION__}", FILE_LOCATION_REPLACER)
        .replace("{__EOS_TOKEN__}", EOS_TOKEN_REPLACER)
        .replace("{", LEFT_BRACKET_REPLACER)
        .replace("}", RIGHT_BRACKET_REPLACER)
    )

    # Revert {__FILE_LOCATION__} back
    modelfile = modelfile.replace(
        FILE_LOCATION_REPLACER, "{__FILE_LOCATION__}"
    ).replace(EOS_TOKEN_REPLACER, "{__EOS_TOKEN__}")

    if "__EOS_TOKEN__" in modelfile:
        modelfile = modelfile.format(
            __FILE_LOCATION__ = model_location,
            __EOS_TOKEN__ = tokenizer.eos_token,
        )
    else:
        modelfile = modelfile.format(
            __FILE_LOCATION__ = model_location,
        )

    modelfile = modelfile.replace("⚫@✅#🦥", "{").replace("⚡@🦥#⛵", "}").rstrip()

    return modelfile


def create_ollama_model(username: str, model_name: str, tag: str, modelfile_path: str):
    try:
        init_check = subprocess.run(
            ["curl", "http://localhost:11434"],
            capture_output = True,
            text = True,
            timeout = 3,
        )
        if init_check.returncode == 0:
            print(init_check.stdout.strip())
        else:
            print("Ollama Server is not Running")
    except subprocess.TimeoutExpired:
        return "Ollama Request Timeout"

    process = subprocess.Popen(
        [
            "ollama",
            "create",
            f"{username}/{model_name}:{tag}",
            "-f",
            f"{modelfile_path}",
        ],
        stdout = subprocess.PIPE,
        stderr = subprocess.STDOUT,
        text = True,
        bufsize = 1,
        universal_newlines = True,
    )

    for line in iter(process.stdout.readline, ""):
        print(line, end = "")
        sys.stdout.flush()

    return_code = process.wait()

    if return_code != 0:
        print(f"\nMODEL CREATED FAILED WITH RETURN CODE {return_code}")
    else:
        print("\nMODEL CREATED SUCCESSFULLY")


def push_to_ollama_hub(username: str, model_name: str, tag: str):
    try:
        init_check = subprocess.run(
            ["curl", "http://localhost:11434"],
            capture_output = True,
            text = True,
            timeout = 3,
        )
        if init_check.returncode == 0:
            print(init_check.stdout.strip())
        else:
            print("Ollama Server is not Running")
    except subprocess.TimeoutExpired:
        return "Ollama Request Timeout"

    process = subprocess.Popen(
        ["ollama", "push", f"{username}/{model_name}:{tag}"],
        stdout = subprocess.PIPE,
        stderr = subprocess.STDOUT,
        text = True,
        bufsize = 1,
        universal_newlines = True,
    )

    for line in iter(process.stdout.readline, ""):
        print(line, end = "")
        sys.stdout.flush()

    return_code = process.wait()

    if return_code != 0:
        print(f"\nMODEL PUBLISHED FAILED WITH RETURN CODE {return_code}")
    else:
        print("\nMODEL PUBLISHED SUCCESSFULLY")


def push_to_ollama(tokenizer, gguf_location, username: str, model_name: str, tag: str):
    model_file = create_ollama_modelfile(
        tokenizer = tokenizer, gguf_location = gguf_location
    )

    with open(f"Modelfile_{model_name}", "w", encoding = "utf-8") as f:
        f.write(model_file)
        f.close()

    create_ollama_model(
        username = username,
        model_name = model_name,
        tag = tag,
        modelfile_path = f"Modelfile_{model_name}",
    )

    push_to_ollama_hub(username = username, model_name = model_name, tag = tag)

    print("Successfully pushed to ollama")


def unsloth_save_pretrained_gguf(
    self,
    save_directory: Union[str, os.PathLike],
    tokenizer = None,
    quantization_method = "fast_quantized",
    first_conversion: str = None,
    push_to_hub: bool = False,
    token: Optional[Union[str, bool]] = None,
    private: Optional[bool] = None,
    is_main_process: bool = True,
    state_dict: Optional[dict] = None,
    save_function: Callable = torch.save,
    max_shard_size: Union[int, str] = "5GB",
    safe_serialization: bool = True,
    variant: Optional[str] = None,
    save_peft_format: bool = True,
    tags: List[str] = None,
    temporary_location: str = "_unsloth_temporary_saved_buffers",
    maximum_memory_usage: float = 0.85,
):
    """
    Same as .save_pretrained(...) except 4bit weights are auto
    converted to float16 then converted to GGUF / llama.cpp format.

    Choose for `quantization_method` to be:
    "not_quantized"  : "Recommended. Fast conversion. Slow inference, big files.",
    "fast_quantized" : "Recommended. Fast conversion. OK inference, OK file size.",
    "quantized"      : "Recommended. Slow conversion. Fast inference, small files.",
    "f32"     : "Not recommended. Retains 100% accuracy, but super slow and memory hungry.",
    "f16"     : "Fastest conversion + retains 100% accuracy. Slow and memory hungry.",
    "q8_0"    : "Fast conversion. High resource use, but generally acceptable.",
    "q4_k_m"  : "Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_K",
    "q5_k_m"  : "Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_K",
    "q2_k"    : "Uses Q4_K for the attention.vw and feed_forward.w2 tensors, Q2_K for the other tensors.",
    "q3_k_l"  : "Uses Q5_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K",
    "q3_k_m"  : "Uses Q4_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K",
    "q3_k_s"  : "Uses Q3_K for all tensors",
    "q4_0"    : "Original quant method, 4-bit.",
    "q4_1"    : "Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.",
    "q4_k_s"  : "Uses Q4_K for all tensors",
    "q4_k"    : "alias for q4_k_m",
    "q5_k"    : "alias for q5_k_m",
    "q5_0"    : "Higher accuracy, higher resource usage and slower inference.",
    "q5_1"    : "Even higher accuracy, resource usage and slower inference.",
    "q5_k_s"  : "Uses Q5_K for all tensors",
    "q6_k"    : "Uses Q8_K for all tensors",
    "iq2_xxs" : "2.06 bpw quantization",
    "iq2_xs"  : "2.31 bpw quantization",
    "iq3_xxs" : "3.06 bpw quantization",
    "q3_k_xs" : "3-bit extra small quantization",
    """
    if tokenizer is None:
        raise ValueError("Unsloth: Saving to GGUF must have a tokenizer.")

    try:
        base_model_name = get_model_name(self.config._name_or_path, load_in_4bit = False)
        model_name = base_model_name.split("/")[-1]
    except:
        base_model_name = self.config._name_or_path
        model_name = base_model_name.split("/")[-1]

    # Check if push_to_hub is requested
    if push_to_hub:
        raise ValueError(
            "Unsloth: Please use .push_to_hub_gguf() instead of .save_pretrained_gguf() with push_to_hub=True"
        )

    # Step 1: Check if this is a VLM (Vision-Language Model) and check if gpt-oss
    is_vlm = False
    if hasattr(self, "config") and hasattr(self.config, "architectures"):
        is_vlm = any(
            x.endswith(("ForConditionalGeneration", "ForVisionText2Text"))
            for x in self.config.architectures
        )
        is_vlm = is_vlm or hasattr(self.config, "vision_config")

    is_processor = is_vlm and isinstance(tokenizer, ProcessorMixin)

    is_gpt_oss = (
        True
        if (
            hasattr(self.config, "architectures")
            and self.config.architectures == "GptOssForCausalLM"
        )
        or (
            hasattr(self.config, "model_type")
            and self.config.model_type in ["gpt-oss", "gpt_oss"]
        )
        else False
    )
    # Step 2: Prepare arguments for model saving
    arguments = dict(locals())
    arguments["model"] = self
    arguments["tokenizer"] = tokenizer
    arguments["push_to_hub"] = False  # We handle upload ourselves
    # GPT-OSS needs mxfp4 save method
    if is_gpt_oss:
        if quantization_method is not None:
            _qm = (
                quantization_method
                if isinstance(quantization_method, (list, tuple))
                else [quantization_method]
            )
            _ignored = [q for q in _qm if str(q).lower() != "mxfp4"]
            if _ignored:
                logger.warning_once(
                    f"Unsloth: GPT-OSS does not support GGUF quantization "
                    f"(requested: {', '.join(str(q) for q in _ignored)}). "
                    f"Overriding to MXFP4 format. "
                    f"Pass quantization_method=None to suppress this warning."
                )
        arguments["save_method"] = "mxfp4"
    else:
        arguments["save_method"] = "merged_16bit"
    del arguments["self"]
    del arguments["quantization_method"]
    del arguments["first_conversion"]
    del arguments["is_vlm"]
    del arguments["is_gpt_oss"]
    del arguments["model_name"]
    del arguments["base_model_name"]
    del arguments["is_processor"]

    # Step 3: Fix tokenizer BOS token if needed
    if is_processor:
        fix_bos_token, old_chat_template = fix_tokenizer_bos_token(tokenizer.tokenizer)
    else:
        fix_bos_token, old_chat_template = fix_tokenizer_bos_token(tokenizer)

    # Step 4: Save/merge model to 16-bit format
    print(
        f'Unsloth: Merging model weights to {"mxfp4" if is_gpt_oss else "16-bit"} format...'
    )
    try:
        # Call unsloth_generic_save directly (it's in the same file)
        unsloth_generic_save(**arguments)

    except Exception as e:
        raise RuntimeError(f"Failed to save/merge model: {e}")

    if is_processor:
        tokenizer = tokenizer.tokenizer

    # Use old chat template if the bos is removed
    if fix_bos_token:
        tokenizer.chat_template = old_chat_template

    # Step 6: Clean up memory
    for _ in range(3):
        import gc

        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

    # Step 7: Get model dtype and type
    try:
        model_dtype = dtype_from_config(self.config)
        model_type = self.config.model_type
        if type(model_dtype) is str:
            assert model_dtype == "float16" or model_dtype == "bfloat16"
        elif model_dtype == torch.float16:
            model_dtype = "float16"
        elif model_dtype == torch.bfloat16:
            model_dtype = "bfloat16"
        else:
            raise TypeError("Unsloth: Model dtype can only be float16 or bfloat16")
    except Exception as e:
        # Fallback if dtype_from_config fails
        print(f"Unsloth: Could not determine dtype ({e}), defaulting to float16")
        model_dtype = "float16"

    # Step 8: Convert to GGUF format
    print("Unsloth: Converting to GGUF format...")

    # Convert quantization_method to list if string
    # Use old style quantization_method
    quantization_methods = []
    if quantization_method is not None:
        # Convert quantization_method to list
        if isinstance(quantization_method, list):
            pass
        elif isinstance(quantization_method, str):
            quantization_method = [
                quantization_method,
            ]
        elif isinstance(quantization_method, tuple):
            quantization_method = list(quantization_method)
        else:
            raise TypeError(
                "Unsloth: quantization_method can only be a string or a list of strings"
            )
        for i, quant_method in enumerate(quantization_method):
            quant_method = quant_method.lower()
            if quant_method == "not_quantized":
                quant_method = "f16"
            elif quant_method == "fast_quantized":
                quant_method = "q8_0"
            elif quant_method == "quantized":
                quant_method = "q4_k_m"
            elif quant_method is None:
                quant_method = "q8_0"
            quantization_methods.append(quant_method.lower())

    try:
        all_file_locations, want_full_precision, is_vlm_update = save_to_gguf(
            model_name = model_name,
            model_type = model_type,
            model_dtype = model_dtype,
            is_sentencepiece = False,
            model_directory = save_directory,
            quantization_method = quantization_methods,
            first_conversion = first_conversion,
            is_vlm = is_vlm,  # Pass VLM flag
            is_gpt_oss = is_gpt_oss,  # Pass gpt_oss Flag
        )
    except Exception as e:
        if IS_KAGGLE_ENVIRONMENT:
            raise RuntimeError(
                f"Unsloth: GGUF conversion failed in Kaggle environment.\n"
                f"This is likely due to the 20GB disk space limit.\n"
                f"Try saving to /tmp directory or use a smaller model.\n"
                f"Error: {e}"
            )
        else:
            raise RuntimeError(f"Unsloth: GGUF conversion failed: {e}")

    # Step 9: Create Ollama modelfile
    gguf_directory = f"{save_directory}_gguf"
    modelfile_location = None
    ollama_success = False
    if all_file_locations:
        try:
            if is_vlm_update:
                modelfile = create_ollama_modelfile(tokenizer, base_model_name, ".")
            else:
                modelfile = create_ollama_modelfile(
                    tokenizer,
                    base_model_name,
                    os.path.basename(all_file_locations[0]),
                )
            if modelfile is not None:
                modelfile_location = os.path.join(gguf_directory, "Modelfile")
                with open(modelfile_location, "w", encoding = "utf-8") as file:
                    file.write(modelfile)
                ollama_success = True
        except Exception as e:
            print(f"Warning: Could not create Ollama modelfile: {e}")

    # Step 10: Show BOS token warning if applicable
    if fix_bos_token:
        logger.warning(
            "Unsloth: ##### The current model auto adds a BOS token.\n"
            "Unsloth: ##### We removed it in GGUF's chat template for you."
        )

    _exe = ".exe" if IS_WINDOWS else ""
    if IS_WINDOWS:
        _bin_dir = os.path.join(LLAMA_CPP_DEFAULT_DIR, "build", "bin", "Release")
    else:
        _bin_dir = LLAMA_CPP_DEFAULT_DIR

    if is_vlm_update:
        print("\n")
        print(
            f"Unsloth: example usage for Multimodal LLMs: {os.path.join(_bin_dir, 'llama-mtmd-cli' + _exe)} -m {all_file_locations[0]} --mmproj {all_file_locations[-1]}"
        )
        print("Unsloth: load image inside llama.cpp runner: /image test_image.jpg")
        print("Unsloth: Prompt model to describe the image")
    else:
        print(
            f'Unsloth: example usage for text only LLMs: {os.path.join(_bin_dir, "llama-cli" + _exe)} --model {all_file_locations[0]} -p "why is the sky blue?"'
        )

    if ollama_success:
        print(f"Unsloth: Saved Ollama Modelfile to {modelfile_location}")
        print(
            f"Unsloth: convert model to ollama format by running - ollama create model_name -f {modelfile_location}"
        )

    # Return a dict with all needed info for push_to_hub
    return {
        "save_directory": save_directory,
        "gguf_directory": gguf_directory,
        "gguf_files": all_file_locations,
        "modelfile_location": modelfile_location,
        "want_full_precision": want_full_precision,
        "is_vlm": is_vlm_update,
        "fix_bos_token": fix_bos_token,
    }


def unsloth_push_to_hub_gguf(
    self,
    repo_id: str,
    tokenizer = None,
    quantization_method = "fast_quantized",
    first_conversion: str = None,
    use_temp_dir: Optional[bool] = None,
    commit_message: Optional[str] = "Trained with Unsloth",
    private: Optional[bool] = None,
    token: Union[bool, str, None] = None,
    max_shard_size: Union[int, str, None] = "5GB",
    create_pr: bool = False,
    safe_serialization: bool = True,
    revision: str = None,
    commit_description: str = "Upload model trained with Unsloth 2x faster",
    tags: Optional[List[str]] = None,
    temporary_location: str = "_unsloth_temporary_saved_buffers",
    maximum_memory_usage: float = 0.85,
    datasets: Optional[List[str]] = None,
):
    """
    Same as .push_to_hub(...) except 4bit weights are auto
    converted to float16 then converted to GGUF / llama.cpp format.

    Choose for `quantization_method` to be:
    "not_quantized"  : "Recommended. Fast conversion. Slow inference, big files.",
    "fast_quantized" : "Recommended. Fast conversion. OK inference, OK file size.",
    "quantized"      : "Recommended. Slow conversion. Fast inference, small files.",
    "f32"     : "Not recommended. Retains 100% accuracy, but super slow and memory hungry.",
    "f16"     : "Fastest conversion + retains 100% accuracy. Slow and memory hungry.",
    "q8_0"    : "Fast conversion. High resource use, but generally acceptable.",
    "q4_k_m"  : "Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_K",
    "q5_k_m"  : "Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_K",
    "q2_k"    : "Uses Q4_K for the attention.vw and feed_forward.w2 tensors, Q2_K for the other tensors.",
    "q3_k_l"  : "Uses Q5_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K",
    "q3_k_m"  : "Uses Q4_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else Q3_K",
    "q3_k_s"  : "Uses Q3_K for all tensors",
    "q4_0"    : "Original quant method, 4-bit.",
    "q4_1"    : "Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.",
    "q4_k_s"  : "Uses Q4_K for all tensors",
    "q5_0"    : "Higher accuracy, higher resource usage and slower inference.",
    "q5_1"    : "Even higher accuracy, resource usage and slower inference.",
    "q5_k_s"  : "Uses Q5_K for all tensors",
    "q6_k"    : "Uses Q8_K for all tensors",
    """
    if tokenizer is None:
        raise ValueError("Unsloth: Saving to GGUF must have a tokenizer.")

    # Step 1: Determine save directory
    model_name = repo_id.split("/")[-1] if "/" in repo_id else repo_id

    if use_temp_dir or use_temp_dir is None:
        import tempfile

        temp_dir = tempfile.mkdtemp(prefix = "unsloth_gguf_")
        save_directory = temp_dir
        cleanup_temp = True
    else:
        save_directory = model_name  # Use model name, not repo_id
        cleanup_temp = False

    # Step 2: Call save_pretrained_gguf to do the conversion
    print(f"Unsloth: Converting model to GGUF format...")

    try:
        # Call save_pretrained_gguf - it returns all the info we need
        result = unsloth_save_pretrained_gguf(
            self = self,
            save_directory = save_directory,
            tokenizer = tokenizer,
            quantization_method = quantization_method,
            first_conversion = first_conversion,
            push_to_hub = False,  # Never push from here
            token = None,  # Don't need token for local save
            max_shard_size = max_shard_size,
            safe_serialization = safe_serialization,
            temporary_location = temporary_location,
            maximum_memory_usage = maximum_memory_usage,
        )

        # Extract results
        all_file_locations = result["gguf_files"]
        modelfile_location = result["modelfile_location"]
        want_full_precision = result["want_full_precision"]
        is_vlm = result["is_vlm"]
        fix_bos_token = result["fix_bos_token"]
        actual_save_directory = result["save_directory"]

    except Exception as e:
        if cleanup_temp:
            import shutil

            for d in [save_directory, f"{save_directory}_gguf"]:
                try:
                    shutil.rmtree(d)
                except:
                    pass
        raise RuntimeError(f"Failed to convert model to GGUF: {e}")

    # Step 3: Upload to HuggingFace Hub
    print("Unsloth: Uploading GGUF to Huggingface Hub...")

    try:
        from huggingface_hub import HfApi

        api = HfApi(token = token)

        # Get full repo id
        if "/" not in repo_id:
            username = api.whoami()["name"]
            full_repo_id = f"{username}/{repo_id}"
        else:
            full_repo_id = repo_id

        # Create repo
        api.create_repo(
            repo_id = full_repo_id,
            repo_type = "model",
            private = private,
            exist_ok = True,
        )

        # Upload GGUF files
        for file_location in all_file_locations:
            original_name = os.path.basename(file_location)
            # Replace temp directory name with proper model name
            if cleanup_temp and "unsloth_gguf_" in original_name:
                # Extract the quantization part (e.g., ".Q8_0.gguf" or ".Q8_0-mmproj.gguf")
                quant_suffix = (
                    original_name.split(".", 1)[1]
                    if "." in original_name
                    else original_name
                )
                proper_name = f"{model_name}.{quant_suffix}"
            else:
                proper_name = original_name.replace(
                    os.path.basename(save_directory), model_name
                )

            print(f"Uploading {proper_name}...")

            api.upload_file(
                path_or_fileobj = file_location,
                path_in_repo = proper_name,
                repo_id = full_repo_id,
                repo_type = "model",
                commit_message = commit_message,
                commit_description = commit_description,
                create_pr = create_pr,
                revision = revision,
            )

        # Upload config.json if exists
        config_path = os.path.join(actual_save_directory, "config.json")
        if os.path.exists(config_path):
            print("Uploading config.json...")
            api.upload_file(
                path_or_fileobj = config_path,
                path_in_repo = "config.json",
                repo_id = full_repo_id,
                repo_type = "model",
                commit_message = f"{commit_message} - config",
                create_pr = create_pr,
                revision = revision,
            )

        # Upload Modelfile if exists
        if modelfile_location and os.path.exists(modelfile_location):
            print("Uploading Ollama Modelfile...")
            api.upload_file(
                path_or_fileobj = modelfile_location,
                path_in_repo = "Modelfile",
                repo_id = full_repo_id,
                repo_type = "model",
                commit_message = f"{commit_message} - Ollama Modelfile",
                create_pr = create_pr,
                revision = revision,
            )

        # Create and upload README
        readme_content = f"""---
tags:
- gguf
- llama.cpp
- unsloth
{"- vision-language-model" if is_vlm else ""}
---

# {repo_id.split("/")[-1]} : GGUF

This model was finetuned and converted to GGUF format using [Unsloth](https://github.com/unslothai/unsloth).

**Example usage**:
- For text only LLMs:    `llama-cli -hf {repo_id} --jinja`
- For multimodal models: `llama-mtmd-cli -hf {repo_id} --jinja`

## Available Model files:
"""
        for file in all_file_locations:
            # Fix filename in README too
            original_name = os.path.basename(file)
            if cleanup_temp and "unsloth_gguf_" in original_name:
                quant_suffix = (
                    original_name.split(".", 1)[1]
                    if "." in original_name
                    else original_name
                )
                proper_name = f"{model_name}.{quant_suffix}"
            else:
                proper_name = original_name.replace(
                    os.path.basename(save_directory), model_name
                )
            readme_content += f"- `{proper_name}`\n"

        # Special note for VLM with Modelfile
        if is_vlm and modelfile_location:
            readme_content += "\n## ⚠️ Ollama Note for Vision Models\n"
            readme_content += "**Important:** Ollama currently does not support separate mmproj files for vision models.\n\n"
            readme_content += "To create an Ollama model from this vision model:\n"
            readme_content += "1. Place the `Modelfile` in the same directory as the finetuned bf16 merged model\n"
            readme_content += "3. Run: `ollama create model_name -f ./Modelfile`\n"
            readme_content += "   (Replace `model_name` with your desired name)\n\n"
            readme_content += (
                "This will create a unified bf16 model that Ollama can use.\n"
            )
        elif modelfile_location:
            readme_content += "\n## Ollama\n"
            readme_content += "An Ollama Modelfile is included for easy deployment.\n"

        if fix_bos_token:
            readme_content += "\n## Note\n"
            readme_content += (
                "The model's BOS token behavior was adjusted for GGUF compatibility.\n"
            )

        readme_content += (
            "This was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth)\n"
            '[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)\n'
        )

        readme_path = os.path.join(actual_save_directory, "README.md")
        with open(readme_path, "w") as f:
            f.write(readme_content)

        api.upload_file(
            path_or_fileobj = readme_path,
            path_in_repo = "README.md",
            repo_id = full_repo_id,
            repo_type = "model",
            commit_message = "Add README",
            create_pr = create_pr,
            revision = revision,
        )

        print(
            f"Unsloth: Successfully uploaded GGUF to https://huggingface.co/{full_repo_id}"
        )

        # Add tags
        if tags is None:
            tags = []
        tags.extend(["gguf", "llama-cpp", "unsloth"])
        if is_vlm:
            tags.append("vision-language-model")

        try:
            api.add_tags(
                repo_id = full_repo_id,
                tags = tags,
                repo_type = "model",
            )
        except:
            pass

        if datasets:
            try:
                from huggingface_hub import metadata_update

                metadata_update(
                    full_repo_id, {"datasets": datasets}, overwrite = True, token = token
                )
            except Exception as e:
                logger.warning_once(
                    f"Unsloth: Could not update datasets metadata for {full_repo_id}: {e}"
                )

    except Exception as e:
        raise RuntimeError(f"Failed to upload to Hugging Face Hub: {e}")

    finally:
        # Clean up temporary directory
        if cleanup_temp:
            print("Unsloth: Cleaning up temporary files...")
            import shutil

            for d in [save_directory, f"{save_directory}_gguf"]:
                if os.path.exists(d):
                    try:
                        shutil.rmtree(d)
                    except:
                        pass

    return full_repo_id


# Corrected function to save LoRA to a custom directory
def save_lora_to_custom_dir(model, tokenizer, save_directory):
    # Create the custom directory if it doesn't exist
    os.makedirs(save_directory, exist_ok = True)

    # Call the unsloth_save_model function with the custom directory
    unsloth_save_model(
        model,
        tokenizer,
        save_directory = save_directory,
        save_method = "lora",
        push_to_hub = False,
    )


# Corrected method within the model class to convert LoRA to GGML and push to Hugging Face Hub
def unsloth_convert_lora_to_ggml_and_push_to_hub(
    self,
    tokenizer,
    repo_id: str,
    use_temp_dir: Optional[bool] = None,
    commit_message: Optional[str] = "Converted LoRA to GGML with Unsloth",
    private: Optional[bool] = None,
    token: Union[bool, str, None] = None,
    create_pr: bool = False,
    revision: str = None,
    commit_description: str = "Convert LoRA to GGML format using Unsloth",
    temporary_location: str = "_unsloth_temporary_saved_buffers",
    maximum_memory_usage: float = 0.85,
):
    if not os.path.exists("llama.cpp"):
        if IS_KAGGLE_ENVIRONMENT:
            python_install = install_python_non_blocking(["protobuf"])
            python_install.wait()
            install_llama_cpp_blocking(use_cuda = False)
            makefile = None
        else:
            git_clone = install_llama_cpp_clone_non_blocking()
            python_install = install_python_non_blocking(["protobuf"])
            git_clone.wait()
            makefile = install_llama_cpp_make_non_blocking()
            python_install.wait()
    else:
        makefile = None

    for _ in range(3):
        gc.collect()

    lora_directory_push = "lora-to-ggml-push"
    save_lora_to_custom_dir(self, tokenizer, lora_directory_push)

    model_type = self.config.model_type
    output_file = os.path.join(lora_directory_push, "ggml-adapter-model.bin")

    print(
        f"Unsloth: Converting auto-saved LoRA adapters at {lora_directory_push} to GGML format."
    )
    print(f"The output file will be {output_file}")

    command = f"python3 llama.cpp/convert-lora-to-ggml.py {lora_directory_push} {output_file} llama"

    try:
        with subprocess.Popen(
            command,
            shell = True,
            stdout = subprocess.PIPE,
            stderr = subprocess.PIPE,
            bufsize = 1,
            universal_newlines = True,
        ) as sp:
            for line in sp.stdout:
                print(line, end = "", flush = True)
            for line in sp.stderr:
                print(line, end = "", flush = True)
            sp.wait()
            if sp.returncode != 0:
                raise subprocess.CalledProcessError(sp.returncode, command)
    except subprocess.CalledProcessError as e:
        print(f"Error: Conversion failed with return code {e.returncode}")
        return

    print(f"Unsloth: Conversion completed! Output file: {output_file}")

    print("Unsloth: Uploading GGML file to Hugging Face Hub...")
    username = upload_to_huggingface(
        self,
        repo_id,
        token,
        "GGML converted LoRA",
        "ggml",
        output_file,
        None,
        private,
    )
    link = f"{repo_id.lstrip('/')}"
    print("Unsloth: Done.")
    print(f"Converted LoRA to GGML and uploaded to https://huggingface.co/{link}")
    print(
        "\nThis GGML making function was made by Maheswar. Ping him @Maheswar on the Unsloth Discord or on HuggingFace (@mahiatlinux) if you like this!"
    )


def unsloth_convert_lora_to_ggml_and_save_locally(
    self,
    save_directory: str,  # Added parameter for the folder name
    tokenizer,
    temporary_location: str = "_unsloth_temporary_saved_buffers",
    maximum_memory_usage: float = 0.85,
):
    if not os.path.exists("llama.cpp"):
        if IS_KAGGLE_ENVIRONMENT:
            python_install = install_python_non_blocking(["protobuf"])
            python_install.wait()
            install_llama_cpp_blocking(use_cuda = False)
            makefile = None
        else:
            git_clone = install_llama_cpp_clone_non_blocking()
            python_install = install_python_non_blocking(["protobuf"])
            git_clone.wait()
            makefile = install_llama_cpp_make_non_blocking()
            python_install.wait()
    else:
        makefile = None

    for _ in range(3):
        gc.collect()

    # Use the provided save_directory for local saving
    save_lora_to_custom_dir(self, tokenizer, save_directory)

    model_type = self.config.model_type
    output_file = os.path.join(save_directory, "ggml-adapter-model.bin")

    print(
        f"Unsloth: Converting auto-saved LoRA adapters at {save_directory} to GGML format."
    )
    print(f"The output file will be {output_file}")

    command = f"python3 llama.cpp/convert-lora-to-ggml.py {save_directory} {output_file} llama"

    try:
        with subprocess.Popen(
            command,
            shell = True,
            stdout = subprocess.PIPE,
            stderr = subprocess.PIPE,
            bufsize = 1,
            universal_newlines = True,
        ) as sp:
            for line in sp.stdout:
                print(line, end = "", flush = True)
            for line in sp.stderr:
                print(line, end = "", flush = True)
            sp.wait()
            if sp.returncode != 0:
                raise subprocess.CalledProcessError(sp.returncode, command)
    except subprocess.CalledProcessError as e:
        print(f"Error: Conversion failed with return code {e.returncode}")
        return
    print("Unsloth: Done.")
    print(f"Unsloth: Conversion completed! Output file: {output_file}")
    print(
        "\nThis GGML making function was made by Maheswar. Ping him @Maheswar on the Unsloth Discord or on HuggingFace (@mahiatlinux) if you like this!"
    )


from .models.loader_utils import get_model_name
from unsloth_zoo.saving_utils import (
    merge_and_overwrite_lora,
    prepare_saving,
)
from unsloth_zoo.llama_cpp import (
    install_llama_cpp,
    convert_to_gguf as _convert_to_gguf,
)


@torch.inference_mode
def save_to_gguf_generic(
    model,
    save_directory,
    tokenizer,
    quantization_method = None,
    quantization_type = "Q8_0",
    repo_id = None,
    token = None,
):
    if token is None and repo_id is not None:
        token = get_token()
    if repo_id is not None and token is None:
        raise RuntimeError("Unsloth: Please specify a token for uploading!")

    if not os.path.exists(os.path.join("llama.cpp", "unsloth_convert_hf_to_gguf.py")):
        install_llama_cpp(just_clone_repo = True)

    # Use old style quantization_method
    new_quantization_methods = []
    if quantization_method is not None:
        # Convert quantization_method to list
        if isinstance(quantization_method, list):
            pass
        elif isinstance(quantization_method, str):
            quantization_method = [
                quantization_method,
            ]
        elif isinstance(quantization_method, tuple):
            quantization_method = list(quantization_method)
        else:
            raise TypeError(
                "Unsloth: quantization_method can only be a string or a list of strings"
            )
        for i, quant_method in enumerate(quantization_method):
            quant_method = quant_method.lower()
            if quant_method == "not_quantized":
                quant_method = "f16"
            elif quant_method == "fast_quantized":
                quant_method = "q8_0"
            elif quant_method == "quantized":
                quant_method = "q4_k_m"
            elif quant_method is None:
                quant_method = "q8_0"
            new_quantization_methods.append(quant_method.lower())
    else:
        new_quantization_methods.append(quantization_type.lower())
    # Check if wrong method
    for quant_method in new_quantization_methods:
        if quant_method not in ALLOWED_QUANTS.keys():
            error = f"Unsloth: Quant method = [{quant_method}] not supported. Choose from below:\n"
            for key, value in ALLOWED_QUANTS.items():
                error += f"[{key}] => {value}\n"
            raise RuntimeError(error)

    # Go through all types and save individually - somewhat inefficient
    # since we save F16 / BF16 multiple times
    for quantization_type in new_quantization_methods:
        metadata = _convert_to_gguf(
            save_directory,
            print_output = True,
            quantization_type = quantization_type,
        )
        if repo_id is not None:
            prepare_saving(
                model,
                repo_id,
                push_to_hub = True,
                max_shard_size = "50GB",
                private = True,
                token = token,
            )

            from huggingface_hub import HfApi

            api = HfApi(token = token)
            api.upload_folder(
                folder_path = save_directory,
                repo_id = repo_id,
                repo_type = "model",
                allow_patterns = ["*.gguf"],
            )
    return metadata


@torch.inference_mode
def unsloth_generic_save(
    model,
    tokenizer,
    save_directory: Union[str, os.PathLike] = "unsloth_finetuned_merge",
    save_method: str = "lora",  # ["lora", "merged_16bit", "merged_4bit"]
    push_to_hub: bool = False,
    token: Optional[Union[str, bool]] = None,
    is_main_process: bool = True,
    state_dict: Optional[dict] = None,
    save_function: Callable = torch.save,
    max_shard_size: Union[int, str] = "5GB",
    safe_serialization: bool = True,
    variant: Optional[str] = None,
    save_peft_format: bool = True,
    # Push to hub
    use_temp_dir: Optional[bool] = None,
    commit_message: Optional[str] = "Trained with Unsloth",
    private: Optional[bool] = None,
    create_pr: bool = False,
    revision: str = None,
    commit_description: str = "Upload model trained with Unsloth 2x faster",
    tags: List[str] = None,
    # Our functions
    temporary_location: str = "_unsloth_temporary_saved_buffers",
    maximum_memory_usage: float = 0.9,
    datasets: Optional[List[str]] = None,
):
    if token is None and push_to_hub:
        token = get_token()

    if save_method == "merged_4bit":
        raise RuntimeError(
            "Unsloth: Merging into 4bit will cause your model to lose accuracy if you plan\n"
            "to merge to GGUF or others later on. I suggest you to do this as a final step\n"
            "if you're planning to do multiple saves.\n"
            "If you are certain, change `save_method` to `merged_4bit_forced`."
        )
    elif save_method == "merged_4bit_forced":
        save_method = "merged_4bit"

    merge_and_overwrite_lora(
        get_model_name,
        model = model,
        tokenizer = tokenizer,
        save_directory = save_directory,
        push_to_hub = push_to_hub,
        private = private,
        token = token,
        save_method = save_method,
        output_dtype = None,
        low_disk_space_usage = True,
        use_temp_file = False,
    )

    if push_to_hub and datasets:
        try:
            from huggingface_hub import metadata_update

            save_dir, _ = _determine_username(save_directory, None, token)
            metadata_update(
                save_dir, {"datasets": datasets}, overwrite = True, token = token
            )
        except Exception as e:
            logger.warning_once(
                f"Unsloth: Could not update datasets metadata for {save_directory}: {e}"
            )

    return


def unsloth_generic_save_pretrained_merged(
    self,
    save_directory: Union[str, os.PathLike],
    tokenizer = None,
    save_method: str = "merged_16bit",  # ["lora", "merged_16bit", "merged_4bit"]
    push_to_hub: bool = False,
    token: Optional[Union[str, bool]] = None,
    is_main_process: bool = True,
    state_dict: Optional[dict] = None,
    save_function: Callable = torch.save,
    max_shard_size: Union[int, str] = "5GB",
    safe_serialization: bool = True,
    variant: Optional[str] = None,
    save_peft_format: bool = True,
    tags: List[str] = None,
    temporary_location: str = "_unsloth_temporary_saved_buffers",
    maximum_memory_usage: float = 0.75,
    datasets: Optional[List[str]] = None,
):
    """
    Same as .push_to_hub(...) except 4bit weights are auto
    converted to float16 with as few overhead as possible.

    Choose for `save_method` to be either:
    1. `16bit`: Merge LoRA into float16 weights. Useful for GGUF / llama.cpp.
    2.  `4bit`: Merge LoRA into int4 weights. Useful for DPO / HF inference.
    3.  `lora`: Save LoRA adapters with no merging. Useful for HF inference.
    """
    if tokenizer is None:
        logger.warning_once(
            "Unsloth: You're not saving a tokenizer as well?\n"
            "You can do it separately via `tokenizer.save_pretrained(...)`"
        )

    arguments = dict(locals())
    arguments["model"] = self
    del arguments["self"]
    unsloth_generic_save(**arguments)
    for _ in range(3):
        gc.collect()


def unsloth_generic_push_to_hub_merged(
    self,
    repo_id: str,
    tokenizer = None,
    save_method: str = "merged_16bit",  # ["lora", "merged_16bit", "merged_4bit"]
    use_temp_dir: Optional[bool] = None,
    commit_message: Optional[str] = "Trained with Unsloth",
    private: Optional[bool] = None,
    token: Union[bool, str, None] = None,
    max_shard_size: Union[int, str, None] = "5GB",
    create_pr: bool = False,
    safe_serialization: bool = True,
    revision: str = None,
    commit_description: str = "Upload model trained with Unsloth 2x faster",
    tags: Optional[List[str]] = None,
    temporary_location: str = "_unsloth_temporary_saved_buffers",
    maximum_memory_usage: float = 0.75,
    datasets: Optional[List[str]] = None,
):
    """
    Same as .push_to_hub(...) except 4bit weights are auto
    converted to float16 with as few overhead as possible.

    Choose for `save_method` to be either:
    1. `16bit`: Merge LoRA into float16 weights. Useful for GGUF / llama.cpp.
    2.  `4bit`: Merge LoRA into int4 weights. Useful for DPO / HF inference.
    3.  `lora`: Save LoRA adapters with no merging. Useful for HF inference.
    """
    if tokenizer is None:
        logger.warning_once(
            "Unsloth: You're not saving a tokenizer as well?\n"
            "You can do it separately via `tokenizer.push_to_hub(...)`"
        )

    arguments = dict(locals())
    arguments["model"] = self
    arguments["save_directory"] = repo_id
    arguments["push_to_hub"] = True
    del arguments["self"]
    del arguments["repo_id"]
    unsloth_generic_save(**arguments)
    for _ in range(3):
        gc.collect()


def _unsloth_save_torchao_with_attached_config(
    model,
    save_directory: Union[str, os.PathLike],
    tokenizer,
    push_to_hub: bool = False,
    token: Optional[Union[str, bool]] = None,
):
    """Save a QAT-trained model by converting fake-quantized weights to real quantized weights."""
    # Convert QAT fake-quantized weights to real quantized weights
    _convert_torchao_model(model)
    # PEFT models also might come here, so parse it
    if isinstance(model, PeftModelForCausalLM):
        _unsloth_save_torchao_with_given_config(
            model = model,
            save_directory = save_directory,
            tokenizer = tokenizer,
            torchao_config = model.config.quantization_config,
            push_to_hub = push_to_hub,
            token = token,
        )
        return

    # TorchAO does not support safe_serialization reliably
    safe_serialization = False

    if push_to_hub:
        model.push_to_hub(
            save_directory, safe_serialization = safe_serialization, token = token
        )
        tokenizer.push_to_hub(save_directory, token = token)
    else:
        model.save_pretrained(save_directory, safe_serialization = safe_serialization)
        tokenizer.save_pretrained(save_directory)


def _unsloth_save_torchao_with_given_config(
    model,
    save_directory: Union[str, os.PathLike],
    tokenizer,
    torchao_config,
    push_to_hub: bool = False,
    token: Optional[Union[str, bool]] = None,
):
    """Quantizes the model with torchao and saves a torchao quantized checkpoint

    Args
      `save_directory`: local folder path or huggingface hub ID when `push_to_hub` is set to True, e.g. `my_model`
      `torchao_config` (TorchAOBaseConfig): configuration for torchao quantization, full list: https://docs.pytorch.org/ao/main/api_ref_quantization.html#inference-apis-for-quantize
      `push_to_hub` (bool): whether to push the checkpoint to huggingface hub or save locally
    """

    if push_to_hub:
        assert token is not None, "Unsloth: Please specify a token for uploading!"

    assert (
        torchao_config is not None
    ), "Unsloth: Please specify a torchao_config for post-training quantization!"

    # first merge the lora weights
    arguments = dict(locals())
    arguments["push_to_hub"] = False  # We save ourselves
    arguments["save_method"] = "merged_16bit"  # Must be 16bit
    del arguments["torchao_config"]

    if not isinstance(model, PeftModelForCausalLM) and not isinstance(model, PeftModel):
        model.save_pretrained(save_directory)
        tokenizer.save_pretrained(save_directory)
    else:
        unsloth_generic_save(**arguments)

    for _ in range(3):
        gc.collect()

    from transformers import (
        AutoModelForCausalLM,
        AutoTokenizer,
        TorchAoConfig,
        AutoModelForImageTextToText,
        AutoProcessor,
    )
    from torchao import quantize_

    if isinstance(torchao_config, TorchAoConfig):
        quantization_config = torchao_config
    else:
        quantization_config = TorchAoConfig(quant_type = torchao_config)

    # Determine if this is a VLM
    is_vlm = False
    if hasattr(model, "config") and hasattr(model.config, "architectures"):
        is_vlm = any(
            x.endswith(("ForConditionalGeneration", "ForVisionText2Text"))
            for x in model.config.architectures
        )
        is_vlm = is_vlm or hasattr(model.config, "vision_config")
    auto_model = AutoModelForImageTextToText if is_vlm else AutoModelForCausalLM
    auto_processor = AutoProcessor if is_vlm else AutoTokenizer

    tokenizer = auto_processor.from_pretrained(save_directory)

    # TorchAO must only use bfloat16 for loading (float16 fails)
    if HAS_TORCH_DTYPE:
        kwargs = {"torch_dtype": torch.bfloat16}
    else:
        kwargs = {"dtype": torch.bfloat16}

    # Reload with quantization applied
    quantized_model = auto_model.from_pretrained(
        save_directory,
        device_map = "auto",
        quantization_config = quantization_config,
        **kwargs,
    )

    torchao_save_directory = save_directory + "-torchao"

    # TorchAO does not support safe_serialization right now 0.14.0 seems broken!
    safe_serialization = Version(importlib_version("torchao")) > Version("0.14.0")
    safe_serialization = False

    if push_to_hub:
        quantized_model.push_to_hub(
            torchao_save_directory, safe_serialization = safe_serialization, token = token
        )
        tokenizer.push_to_hub(torchao_save_directory, token = token)
    else:
        quantized_model.save_pretrained(
            torchao_save_directory, safe_serialization = safe_serialization
        )
        tokenizer.save_pretrained(torchao_save_directory)

    # Clean up the intermediate unquantized model
    if os.path.exists(save_directory):
        try:
            shutil.rmtree(save_directory)
        except:
            pass


def unsloth_save_pretrained_torchao(
    self,
    save_directory: Union[str, os.PathLike],
    tokenizer = None,
    torchao_config = None,
    push_to_hub: bool = False,
    token: Optional[Union[str, bool]] = None,
):
    """Saves a torchao quantized model checkpoint.

    This function handles two mutually exclusive workflows:

    1. **QAT (Quantization-Aware Training)**: If the model was trained with `qat_scheme`
       parameter, do NOT pass `torchao_config`. The function will convert the QAT
       fake-quantized weights to real quantized weights and save directly.

    2. **PTQ (Post-Training Quantization)**: If you want to apply quantization to a
       regular model, pass a `torchao_config`. The model must NOT have been trained
       with `qat_scheme`.

    Args:
      `save_directory`: local folder path or huggingface hub ID when `push_to_hub` is True
      `tokenizer`: the tokenizer to save alongside the model
      `torchao_config` (TorchAOBaseConfig): configuration for torchao quantization.
          Required for PTQ, must be None for QAT models.
          Options: https://docs.pytorch.org/ao/main/api_ref_quantization.html#inference-apis-for-quantize
      `push_to_hub` (bool): whether to push to huggingface hub or save locally
      `token`: HuggingFace token for pushing to hub
    """
    if token is None and push_to_hub:
        token = get_token()

    has_qat_config = (
        hasattr(self, "_torchao_config") and self._torchao_config is not None
    )

    if torchao_config is not None:
        # PTQ path: user provided a config, model must NOT have QAT config unless PEFT
        assert not has_qat_config, (
            "Unsloth: You passed `torchao_config` but this model was trained with `qat_scheme`. "
            "For QAT models, do not pass `torchao_config` - the quantization config is already "
            "attached to the model from training."
        )
        _unsloth_save_torchao_with_given_config(
            model = self,
            save_directory = save_directory,
            tokenizer = tokenizer,
            torchao_config = torchao_config,
            push_to_hub = push_to_hub,
            token = token,
        )
    else:
        # QAT path: no config provided, model must have QAT config
        assert has_qat_config, (
            "Unsloth: No `torchao_config` provided and model was not trained with `qat_scheme`. "
            "Either train with `qat_scheme` parameter, or provide a `torchao_config` for "
            "post-training quantization."
        )
        _unsloth_save_torchao_with_attached_config(
            model = self,
            save_directory = save_directory,
            tokenizer = tokenizer,
            push_to_hub = push_to_hub,
            token = token,
        )

    for _ in range(3):
        gc.collect()


def not_implemented_save(*args, **kwargs):
    raise NotImplementedError(
        "Unsloth: Sorry GGUF is currently not supported for vision models!"
    )


def patch_saving_functions(model, vision = False):
    import inspect
    import types
    from typing import Callable, Optional, Union, List

    # And now re add our saving methods!
    if model.push_to_hub.__name__ == "unsloth_push_to_hub":
        original_push_to_hub = model.original_push_to_hub
    else:
        original_push_to_hub = model.push_to_hub

    signature = str(inspect.signature(original_push_to_hub)).replace("NoneType", "None")
    signature = signature[1:]
    signature = re.sub("<function save at .+?>", "torch.save", signature)
    docs = original_push_to_hub.__doc__.encode("utf-8").decode("utf-8")

    push_to_hub_text = f'''def unsloth_push_to_hub(self, {signature}:
    """
    {docs}
    """
    arguments = dict(locals())
    del arguments["self"]
    if "tags" in arguments and arguments["tags"] is not None:
        assert(isinstance(arguments["tags"], (list, tuple)))
        arguments["tags"] = list(arguments["tags"]) + ["unsloth",]
    elif "tags" in arguments:
        arguments["tags"] = ["unsloth",]
    elif hasattr(self, "add_model_tags"):
        self.add_model_tags(["unsloth",])

    if "commit_message" in arguments:
        commit_message = arguments["commit_message"]
        if commit_message is not None:
            if not commit_message.endswith(" "): commit_message += " "
            if "Unsloth" not in commit_message:
                commit_message += "(Trained with Unsloth)"
        else:
            commit_message = "Upload model trained with Unsloth"
        arguments["commit_message"] = commit_message

    if "commit_description" in arguments:
        commit_description = arguments["commit_description"]
        if commit_description is not None:
            if not commit_description.endswith(" "): commit_description += " "
            if "Unsloth" not in commit_description:
                commit_description += "(Trained with Unsloth 2x faster)"
        else:
            commit_description = "Upload model trained with Unsloth 2x faster"
        arguments["commit_description"] = commit_description

    # Update model tag
    if hasattr(self, "config"):
        _ = upload_to_huggingface(
            self, arguments["repo_id"], arguments["token"],
            "finetuned", "trl", file_location = None,
            old_username = None, private = arguments["private"],
        )
    pass

    try:
        self.original_push_to_hub(**arguments)
    except:
        del arguments["tags"]
        self.original_push_to_hub(**arguments)
    pass

    if hasattr(self, "config"):
        print("Saved model to https://huggingface.co/" + arguments["repo_id"])
    pass
    '''
    exec(push_to_hub_text, globals())

    original_model = model
    while True:
        # Check if push_to_hub exists before accessing its __name__
        if (
            hasattr(original_model, "push_to_hub")
            and original_model.push_to_hub.__name__ != "unsloth_push_to_hub"
        ):
            original_model.original_push_to_hub = original_model.push_to_hub
            original_model.push_to_hub = types.MethodType(
                unsloth_push_to_hub, original_model
            )
            if hasattr(original_model, "add_model_tags"):
                original_model.add_model_tags(
                    [
                        "unsloth",
                    ]
                )

        if hasattr(original_model, "model"):
            original_model = original_model.model
        else:
            break

    # Add saving methods to top level model
    if not vision:
        if hasattr(model, "config"):
            # Counteract tokenizers
            model.push_to_hub_merged = types.MethodType(
                unsloth_generic_push_to_hub_merged, model
            )
            model.save_pretrained_merged = types.MethodType(
                unsloth_generic_save_pretrained_merged, model
            )
            model.push_to_hub_gguf = types.MethodType(unsloth_push_to_hub_gguf, model)
            model.save_pretrained_gguf = types.MethodType(
                unsloth_save_pretrained_gguf, model
            )
            model.save_pretrained_torchao = types.MethodType(
                unsloth_save_pretrained_torchao, model
            )
            model.push_to_hub_ggml = types.MethodType(
                unsloth_convert_lora_to_ggml_and_push_to_hub, model
            )
            model.save_pretrained_ggml = types.MethodType(
                unsloth_convert_lora_to_ggml_and_save_locally, model
            )
    else:
        # Vision only 1 option
        model.push_to_hub_merged = types.MethodType(
            unsloth_generic_push_to_hub_merged, model
        )
        model.save_pretrained_merged = types.MethodType(
            unsloth_generic_save_pretrained_merged, model
        )
        model.push_to_hub_gguf = types.MethodType(unsloth_push_to_hub_gguf, model)
        model.save_pretrained_gguf = types.MethodType(
            unsloth_save_pretrained_gguf, model
        )
        model.save_pretrained_torchao = types.MethodType(
            unsloth_save_pretrained_torchao, model
        )
    return model


================================================
FILE: unsloth/tokenizer_utils.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from transformers import AutoTokenizer
from transformers.convert_slow_tokenizer import convert_slow_tokenizer
from transformers import PreTrainedTokenizerFast
import re
import os
from transformers.models.llama.modeling_llama import logger
from peft import PeftModelForCausalLM
import torch
import itertools
import collections
import numpy as np
import gc
import subprocess
import psutil

from unsloth_zoo.tokenizer_utils import (
    mean_of_trained_tokens,
    add_new_tokens,
    fix_untrained_tokens,
)
from unsloth_zoo.training_utils import (
    fix_zero_training_loss,
)

__all__ = [
    "load_correct_tokenizer",
    "fix_sentencepiece_tokenizer",
    "check_tokenizer",
    "add_new_tokens",
    "fix_sentencepiece_gguf",
]


IGNORED_TOKENIZER_CHECKING = frozenset(
    (
        "CodeLlamaTokenizerFast",
        "CodeLlamaTokenizer",
    )
)


IGNORED_TOKENIZER_NAMES = [
    # Qwen Coder did not train on tool calling. Math did!
    "unsloth/Qwen2.5-Coder-1.5B-Instruct",
    "unsloth/Qwen2.5-Coder-7B-Instruct",
]
IGNORED_TOKENIZER_NAMES = frozenset(
    [x.lower() for x in IGNORED_TOKENIZER_NAMES]
    + [x.lower() + "-bnb-4bit" for x in IGNORED_TOKENIZER_NAMES]
)
os.environ["UNSLOTH_IGNORED_TOKENIZER_NAMES"] = "\n".join(IGNORED_TOKENIZER_NAMES)

# Check environments
keynames = "\n" + "\n".join(os.environ.keys())
IS_COLAB_ENVIRONMENT = "\nCOLAB_" in keynames
IS_KAGGLE_ENVIRONMENT = "\nKAGGLE_" in keynames
KAGGLE_TMP = "/tmp"
del keynames


def try_fix_tokenizer(tokenizer, prepend = True):
    if hasattr(tokenizer, "_tokenizer"):
        converted_tokenizer = tokenizer._tokenizer
    else:
        converted_tokenizer = convert_slow_tokenizer(tokenizer)

    tokenizer_string = converted_tokenizer.to_str()

    # Llama does _apple. Sometimes this is wrong!!
    prepend_text = '{"type":"Prepend","prepend":"▁"},'
    if not prepend and prepend_text in tokenizer_string:
        tokenizer_string = tokenizer_string.replace(prepend_text, "", 1)

    dir_names = dir(tokenizer)
    # Get eos_token, bos_token etc
    token_names = [x for x in dir_names if x.endswith("_token") and x.count("_") == 1]

    for token_name in token_names:
        token = getattr(tokenizer, token_name, None)
        if token is None:
            continue
        token_id = getattr(tokenizer, token_name + "_id", None)

        # Locate the token's id mapping in the string
        find_text = f'"id":{token_id},"content":"'
        start = tokenizer_string.find(find_text) + len(find_text)
        if start == -1:
            continue
        end = tokenizer_string.find('",', start)

        bad_token = tokenizer_string[start:end]
        # Check if token is the actual same one - if not, edit it
        if bad_token != token:
            bad_text = f'{find_text}{bad_token}",'
            good_text = f'{find_text}{token}",'
            tokenizer_string = tokenizer_string.replace(bad_text, good_text, 1)

            # And replace vocab section
            bad_text = f'"{bad_token}":{token_id},'
            good_text = f'"{token}":{token_id},'
            tokenizer_string = tokenizer_string.replace(bad_text, good_text, 1)

    fixed_tokenizer = converted_tokenizer.from_str(tokenizer_string)
    return fixed_tokenizer


def get_sorted_dict(dictionary):
    sorted_keys = sorted(dictionary.values())
    inverted_dictionary = {value: key for key, value in dictionary.items()}

    sorted_dictionary = {}
    for key in sorted_keys:
        value = inverted_dictionary[key]
        sorted_dictionary[value] = key
    return sorted_dictionary


def convert_to_fast_tokenizer(
    slow_tokenizer,
    temporary_location = "_unsloth_sentencepiece_temp",
):
    is_fast = getattr(slow_tokenizer, "is_fast", False)
    if is_fast:
        return slow_tokenizer

    try:
        tokenizer_name = slow_tokenizer.__class__.__name__
        lowered_tokenizer_name = tokenizer_name.lower()
        if lowered_tokenizer_name.endswith("tokenizer"):
            class_name = lowered_tokenizer_name[: -len("tokenizer")]
            FastTokenizer = eval(
                f'__import__(f"transformers.models.{class_name}").{tokenizer_name}Fast'
            )
        else:
            FastTokenizer = PreTrainedTokenizerFast
    except:
        FastTokenizer = PreTrainedTokenizerFast

    # Get all arguments (bos_token, etc)
    docs = FastTokenizer.__doc__
    docs = docs[docs.find("Args:") :]
    args = re.findall(r"\n[\s]+([^\s]{1,}) \(", docs, flags = re.MULTILINE)
    args = [x for x in args if not x.endswith("_file")]

    # Also some missing maybe!
    docs = PreTrainedTokenizerFast.__doc__
    docs = docs[docs.find("Args:") :]
    args2 = re.findall(r"\n[\s]+([^\s]{1,}) \(", docs, flags = re.MULTILINE)
    args2 = [x for x in args2 if not x.endswith("_file")]
    args = list(set(args + args2))

    kwargs = {}
    for arg in args:
        kwargs[arg] = getattr(slow_tokenizer, arg, None)
    kwargs["tokenizer_object"] = try_fix_tokenizer(slow_tokenizer, prepend = True)
    fast_tokenizer = FastTokenizer(**kwargs)

    # Check if they're similar!
    sorted_slow_tokenizer = get_sorted_dict(slow_tokenizer.get_vocab())
    sorted_fast_tokenizer = get_sorted_dict(fast_tokenizer.get_vocab())

    check_vocab = sorted_slow_tokenizer == sorted_fast_tokenizer
    check_special = (
        slow_tokenizer.all_special_tokens == fast_tokenizer.all_special_tokens
    )

    # Failure so return slow_tokenizer
    if not check_vocab or not check_special:
        return slow_tokenizer

    # Now confirm if they match
    if not assert_same_tokenization(slow_tokenizer, fast_tokenizer):
        # Maybe remove prepending of __apple?
        kwargs["tokenizer_object"] = try_fix_tokenizer(slow_tokenizer, prepend = False)
        fast_tokenizer = FastTokenizer(**kwargs)
        if not assert_same_tokenization(slow_tokenizer, fast_tokenizer):
            # Failure :(
            return slow_tokenizer

    # Also tokenizer.model is missing!
    name = slow_tokenizer.name_or_path.replace("/", "_")
    if not os.path.exists(temporary_location):
        os.makedirs(temporary_location)
    new_location = f"{temporary_location}/{name}"
    slow_tokenizer.save_pretrained(new_location)
    fast_tokenizer.save_pretrained(new_location)

    # Now load it!
    fast_tokenizer = AutoTokenizer.from_pretrained(new_location)
    if assert_same_tokenization(slow_tokenizer, fast_tokenizer):
        return fast_tokenizer
    return slow_tokenizer


# Check Mistral chat template without BOS / EOS
mistral_template = (
    "{% if messages[0]['role'] == 'system' %}"
    "{% if messages[1]['role'] == 'user' %}"
    "{{ '[INST] ' + messages[0]['content'] + ' ' + messages[1]['content'] + ' [/INST]' }}"
    "{% set loop_messages = messages[2:] %}"
    "{% else %}"
    "{{ '[INST] ' + messages[0]['content'] + ' [/INST]' }}"
    "{% set loop_messages = messages[1:] %}"
    "{% endif %}"
    "{% else %}"
    "{% set loop_messages = messages %}"
    "{% endif %}"
    "{% for message in loop_messages %}"
    "{% if message['role'] == 'user' %}"
    "{{ '[INST] ' + message['content'] + ' [/INST]' }}"
    "{% elif message['role'] == 'assistant' %}"
    "{{ message['content'] }}"
    "{% else %}"
    "{{ raise_exception('Only user and assistant roles are supported!') }}"
    "{% endif %}"
    "{% endfor %}"
)

# Check Llama chat template without BOS / EOS
llama_template = (
    "{% if messages[0]['role'] == 'system' %}"
    "{% if messages[1]['role'] == 'user' %}"
    "{{ '[INST] <<SYS>>\n' + messages[0]['content'] + '\n<</SYS>>\n\n' + messages[1]['content'] + ' [/INST]' }}"
    "{% set loop_messages = messages[2:] %}"
    "{% else %}"
    "{{ '[INST] ' + messages[0]['content'] + ' [/INST]' }}"
    "{% set loop_messages = messages[1:] %}"
    "{% endif %}"
    "{% else %}"
    "{% set loop_messages = messages %}"
    "{% endif %}"
    "{% for message in loop_messages %}"
    "{% if message['role'] == 'user' %}"
    "{{ '[INST] ' + message['content'].strip() + ' [/INST]' }}"
    "{% elif message['role'] == 'assistant' %}"
    "{{ ' ' + message['content'].strip() + ' ' }}"
    "{% else %}"
    "{{ raise_exception('Only user and assistant roles are supported!') }}"
    "{% endif %}"
    "{% endfor %}"
)


def assert_same_tokenization(slow_tokenizer, fast_tokenizer):
    # Get eos_token, bos_token etc
    if not hasattr(slow_tokenizer, "all_special_tokens"):
        return True
    dir_names = dir(slow_tokenizer)
    special_tokens = list(
        filter(
            None,
            (
                getattr(slow_tokenizer, x)
                for x in dir_names
                if x.endswith("_token") and x.count("_") == 1
            ),
        )
    )
    all_special_tokens = list(set(special_tokens + slow_tokenizer.all_special_tokens))

    # Remove replacement char for false positive
    replacement_char = b"\xc3\xaf\xc2\xbf\xc2\xbd".decode("utf-8")
    all_special_tokens = [x for x in all_special_tokens if x != replacement_char]

    # Check if chat template is enabled!
    check_chat_template1 = True
    check_chat_template2 = True
    check_chat_template3 = True

    """
    Weirdly Mistral tokenizers are actually correct??
    Ie below will actually load mistral v1 and v3 incorrectly!

    slow_chat_template = getattr(slow_tokenizer, "chat_template", None)
    fast_chat_template = getattr(fast_tokenizer, "chat_template", None)
    messages = [
        {"role": "user", "content": " What is 2+2? "},
        {"role": "assistant", "content": " It's 4. "},
    ]
    # Check the tokenizer's own chat template
    if slow_chat_template is not None and fast_chat_template is not None:
        check_chat_template1 = \
            slow_tokenizer.apply_chat_template(messages) == \
            fast_tokenizer.apply_chat_template(messages)
    pass

    # Check Mistral chat template without BOS / EOS
    slow_tokenizer.chat_template = mistral_template
    fast_tokenizer.chat_template = mistral_template
    check_chat_template2 = \
        slow_tokenizer.apply_chat_template(messages) == \
        fast_tokenizer.apply_chat_template(messages)
    pass

    # Check Llama chat template without BOS / EOS
    slow_tokenizer.chat_template = llama_template
    fast_tokenizer.chat_template = llama_template
    check_chat_template3 = \
        slow_tokenizer.apply_chat_template(messages) == \
        fast_tokenizer.apply_chat_template(messages)
    pass

    # Combine them all and revert chat templates
    slow_tokenizer.chat_template = slow_chat_template
    fast_tokenizer.chat_template = fast_chat_template
    """
    check_chat_template = (
        check_chat_template1 and check_chat_template2 and check_chat_template3
    )

    # Try special tokens
    try:
        string = (
            "\n".join(all_special_tokens)
            + "A quick brown fox jumps over the lazy dog!!\n\nHi</s>\n\n"
            + "".join(all_special_tokens)
        )
        check_special_tokens = (
            slow_tokenizer(string).input_ids == fast_tokenizer(string).input_ids
        )

        return check_chat_template and check_special_tokens
    except:
        # For eg see https://github.com/unslothai/unsloth/issues/292
        # Sometimes tokenizer has weird tokens, causing a combined tokenization to fail.
        # [TODO] We temporarily disable this for CodeLlama tokenizers
        if slow_tokenizer.__repr__().split("(", 1)[0] in IGNORED_TOKENIZER_CHECKING:
            return check_chat_template
        else:
            return False


def fix_sentencepiece_tokenizer(
    old_tokenizer,
    new_tokenizer,
    token_mapping,
    temporary_location = "_unsloth_sentencepiece_temp",
):
    # From https://github.com/google/sentencepiece/issues/121
    # We need to manually edit the sentencepiece tokenizer!
    try:
        from transformers.convert_slow_tokenizer import import_protobuf

        sentencepiece_model_pb2 = import_protobuf()
    except Exception as e:
        try:
            import google.protobuf
            from unsloth_zoo.utils import Version

            protobuf_version = Version(google.protobuf.__version__)
            if protobuf_version > Version("3.20.3"):
                raise RuntimeError(
                    f"Unsloth: Your protobuf version = {protobuf_version} is too new.\n"
                    f"Please downgrade via `pip install --force-reinstall protobuf==3.20.3`"
                )
        except:
            # This will only work for older SentencePiece versions <= 3.20.3
            from transformers.utils import sentencepiece_model_pb2

    if not os.path.exists(temporary_location):
        os.makedirs(temporary_location)

    # Check if tokenizer.model exists
    if not os.path.isfile(f"{temporary_location}/tokenizer.model"):
        return new_tokenizer

    # First save the old tokenizer
    old_tokenizer.save_pretrained(temporary_location)

    tokenizer_file = sentencepiece_model_pb2.ModelProto()
    tokenizer_file.ParseFromString(
        open(f"{temporary_location}/tokenizer.model", "rb").read()
    )

    # Now save the new tokenizer
    new_tokenizer.save_pretrained(temporary_location)

    # Now correct the old tokenizer's .model file
    for old_token, new_token in token_mapping.items():
        ids = old_tokenizer([old_token], add_special_tokens = False).input_ids
        ids = ids[0]
        if len(ids) != 1:
            # Skip this token!
            print(
                f"Skip mapping {old_token} to {new_token} since {new_token} is already in the tokenizer!"
            )
            continue
        ids = ids[0]
        # [TODO] Hack for Starling - try except
        try:
            tokenizer_piece = tokenizer_file.pieces[ids]
        except:
            continue
        assert tokenizer_piece.piece == old_token
        tokenizer_piece.piece = new_token

    # And now write it
    with open(f"{temporary_location}/tokenizer.model", "wb") as file:
        file.write(tokenizer_file.SerializeToString())

    # And load it!
    from transformers import AutoTokenizer

    tokenizer = AutoTokenizer.from_pretrained(
        temporary_location,
        eos_token = new_tokenizer.eos_token,
        pad_token = new_tokenizer.pad_token,
    )
    return tokenizer


def fix_sentencepiece_gguf(saved_location):
    """
    Fixes sentencepiece tokenizers which did not extend the vocabulary with
    user defined tokens.
    Inspiration from https://github.com/ggerganov/llama.cpp/blob/master/convert_hf_to_gguf.py
    """
    from copy import deepcopy
    from transformers.utils import sentencepiece_model_pb2
    import json
    from enum import IntEnum

    class SentencePieceTokenTypes(IntEnum):
        NORMAL = 1
        UNKNOWN = 2
        CONTROL = 3
        USER_DEFINED = 4
        UNUSED = 5
        BYTE = 6

    # Load tokenizer.model
    tokenizer_file = sentencepiece_model_pb2.ModelProto()
    if not os.path.isfile(f"{saved_location}/tokenizer.model"):
        return
    tokenizer_file.ParseFromString(
        open(f"{saved_location}/tokenizer.model", "rb").read()
    )
    sentence_piece_size = len(tokenizer_file.pieces)

    # Load added_tokens_json
    if not os.path.isfile(f"{saved_location}/added_tokens.json"):
        return
    with open(f"{saved_location}/added_tokens.json", "r", encoding = "utf-8") as file:
        added_tokens_json = json.load(file)
    if len(added_tokens_json) == 0:
        return

    added_tokens_json = dict(
        sorted(added_tokens_json.items(), key = lambda item: item[1])
    )
    new_size = sentence_piece_size + len(added_tokens_json)

    # Confirm added_tokens_json is correct
    added_tokens_ids = np.array(list(added_tokens_json.values()))
    diff = np.diff(added_tokens_ids)
    if diff.min() != 1 or diff.max() != 1:
        return
    if added_tokens_ids.min() != sentence_piece_size:
        return

    # Edit sentence piece tokens with added_tokens_json
    logger.warning(
        f"Unsloth: Extending {saved_location}/tokenizer.model with added_tokens.json.\n"
        f"Originally tokenizer.model is of size ({sentence_piece_size}).\n"
        f"But we need to extend to sentencepiece vocab size ({new_size})."
    )
    new_tokens = deepcopy(tokenizer_file.pieces[-len(added_tokens_ids) :])
    for new_token, added_token in zip(new_tokens, added_tokens_json.keys()):
        new_token.piece = added_token.encode("utf-8")
        new_token.score = -1000.0
        new_token.type = SentencePieceTokenTypes.USER_DEFINED

    tokenizer_file.pieces.extend(new_tokens)

    with open(f"{saved_location}/tokenizer.model", "wb") as file:
        file.write(tokenizer_file.SerializeToString())

    # Add padding tokens
    # actual_vocab_size = model.config.vocab_size
    # padding = actual_vocab_size - len(tokenizer_file.pieces)
    return


def _load_correct_tokenizer(
    tokenizer_name,
    model_max_length = None,
    padding_side = "right",
    token = None,
    trust_remote_code = False,
    cache_dir = "huggingface_tokenizers_cache",
    fix_tokenizer = True,
):
    if IS_COLAB_ENVIRONMENT:
        cache_dir = cache_dir
    elif IS_KAGGLE_ENVIRONMENT:
        # /tmp of Kaggle seems has a 80GB limit!
        # Let's utilize them
        cache_dir = os.path.join(KAGGLE_TMP, cache_dir)
    else:
        cache_dir = None

    # Try loading the slow tokenizer. If it fails, then try Fast only
    # Mainly to solve Deepseek models with no tokenizer.model file
    slow_tokenizer = None
    try:
        slow_tokenizer = AutoTokenizer.from_pretrained(
            tokenizer_name,
            model_max_length = model_max_length,
            padding_side = padding_side,
            token = token,
            trust_remote_code = trust_remote_code,
            # Cannot just use use_fast = False as per https://twitter.com/danielhanchen/status/1789659394302718373
            use_fast = False,
            legacy = False,
            from_slow = True,
            cache_dir = cache_dir,
        )
    except:
        slow_tokenizer = None
        # print(
        #     f"Unsloth: {tokenizer_name} has no tokenizer.model file.\n"\
        #     "Just informing you about this - this is not a critical error."
        # )
    # Unsure why this occurs!
    if type(slow_tokenizer) is bool:
        slow_tokenizer = None

    fast_tokenizer = AutoTokenizer.from_pretrained(
        tokenizer_name,
        model_max_length = model_max_length,
        padding_side = padding_side,
        token = token,
        trust_remote_code = trust_remote_code,
        cache_dir = cache_dir,
    )

    if not fix_tokenizer or tokenizer_name in IGNORED_TOKENIZER_NAMES:
        return fast_tokenizer
    # Ignore Mistral ones - they're a bit weird to handle!
    elif "mistral" in tokenizer_name.lower():
        return fast_tokenizer
    # Ignore Phi-4 ones as well
    elif "phi-4" in tokenizer_name.lower():
        return fast_tokenizer
    elif slow_tokenizer is not None:
        if hasattr(fast_tokenizer, "add_bos_token") and hasattr(
            slow_tokenizer, "add_bos_token"
        ):
            fast_tokenizer.add_bos_token = slow_tokenizer.add_bos_token
        if hasattr(fast_tokenizer, "add_eos_token") and hasattr(
            slow_tokenizer, "add_eos_token"
        ):
            fast_tokenizer.add_eos_token = slow_tokenizer.add_eos_token

        # Confirm if slow and fast are equivalent!
        if assert_same_tokenization(slow_tokenizer, fast_tokenizer):
            return fast_tokenizer
        else:
            logger.warning(
                f"Unsloth: Will load {tokenizer_name} as a legacy tokenizer."
            )
            return convert_to_fast_tokenizer(slow_tokenizer)
        pass
    else:
        return fast_tokenizer


def load_correct_tokenizer(
    tokenizer_name,
    model_max_length = None,
    padding_side = "right",
    token = None,
    trust_remote_code = False,
    cache_dir = "huggingface_tokenizers_cache",
    fix_tokenizer = True,
):
    tokenizer = _load_correct_tokenizer(
        tokenizer_name = tokenizer_name,
        model_max_length = model_max_length,
        padding_side = padding_side,
        token = token,
        trust_remote_code = trust_remote_code,
        cache_dir = cache_dir,
        fix_tokenizer = fix_tokenizer,
    )

    ### 1. Fixup tokenizer's chat_template
    old_chat_template = getattr(tokenizer, "chat_template", None)

    # Ignore mistral type models since they don't have an add_generation_prompt
    if any(
        s in str(getattr(tokenizer, "name_or_path", "")).lower()
        for s in ["mistral", "qwen3guard"]
    ):
        chat_template = old_chat_template

    # Also check Llama-2 old style models
    elif (
        old_chat_template is not None
        and "[/INST]" in old_chat_template
        and "[INST]" in old_chat_template
        and "bos_token" in old_chat_template
        and "eos_token" in old_chat_template
    ):
        chat_template = old_chat_template

    else:
        chat_template = fix_chat_template(tokenizer)
        if old_chat_template is not None and chat_template is None:
            raise RuntimeError(
                "Unsloth: Fixing chat template failed - please file a report immediately!"
            )
        pass

    tokenizer.chat_template = chat_template
    return tokenizer


def _find_end_position(template, endfor, endif):
    where_endfor = template.find(endfor)
    where_endif = template.find(endif)
    if where_endfor == where_endif == -1:
        return None
    elif where_endfor > where_endif:
        return endfor
    else:
        return endif


def _fix_chat_template(chat_template):
    endfor = "{% endfor %}"
    endif = "{% endif %}"
    chosen_end = _find_end_position(chat_template, endfor, endif)
    if chosen_end is None:
        endfor = "{%- endfor %}"
        endif = "{%- endif %}"
        chosen_end = _find_end_position(chat_template, endfor, endif)
    if chosen_end is None:
        return chat_template

    where = chat_template.find(chosen_end)

    after_endfor = chat_template[where + len(chosen_end) :]

    dash = "-" if chosen_end.startswith("{%-") else ""

    if (
        "{%" + dash + " if" not in after_endfor
        and "{%" + dash + " set " not in after_endfor
        and after_endfor.startswith("{{")
        and after_endfor.endswith("}}")
        and after_endfor.count("{{") == 1
        and after_endfor.count("}}") == 1
    ):
        after_endfor = (
            "{%" + dash + " if add_generation_prompt %}" + after_endfor + endif
        )

        chat_template = chat_template[: where + len(chosen_end)] + after_endfor
    return chat_template


def fix_chat_template(tokenizer):
    chat_template = getattr(tokenizer, "chat_template", None)
    if chat_template is None:
        return None

    ### 1. Check if add_generation_prompt works
    # Check for ShareGPT style first
    is_sharegpt = None
    try:
        messages = [
            {"role": "user", "content": "Who are you?"},
        ]
        tokenizer.apply_chat_template(
            messages, add_generation_prompt = False, tokenize = False
        )
        is_sharegpt = False
    except:
        try:
            messages = [
                {"from": "human", "value": "Who are you?"},
            ]
            tokenizer.apply_chat_template(
                messages, add_generation_prompt = False, tokenize = False
            )
            is_sharegpt = True
        except:
            is_sharegpt = None

    # Not ShareGPT or HF style - just return
    if is_sharegpt is None:
        return chat_template

    # Tokenize
    messages = [
        {"role": "user", "content": "Who are you?"}
        if not is_sharegpt
        else {"from": "human", "value": "Who are you?"}
    ]
    no = tokenizer.apply_chat_template(
        messages, add_generation_prompt = False, tokenize = False
    )
    yes = tokenizer.apply_chat_template(
        messages, add_generation_prompt = True, tokenize = False
    )

    if no == yes:
        # SAME?! That's not good! We check for add_generation_prompt
        if (
            "{% if add_generation_prompt %}" not in chat_template
            and "{%- if add_generation_prompt %}" not in chat_template
        ):
            # Try fixing it by adding it
            new_chat_template = _fix_chat_template(chat_template)
            if (
                "{% if add_generation_prompt %}" not in new_chat_template
                and "{%- if add_generation_prompt %}" not in new_chat_template
            ):
                raise RuntimeError(
                    f"Unsloth: The tokenizer `{tokenizer.name_or_path}`\n"
                    "does not have a {% if add_generation_prompt %} for generation purposes.\n"
                    f"Please file a bug report to the maintainers of `{tokenizer.name_or_path}` - thanks!"
                )
            else:
                logger.warning_once(
                    "Unsloth: We successfully patched the tokenizer to add a {% if add_generation_prompt %} to the chat_template.\n"
                    f"This is not a bug, but please notify the maintainers of `{tokenizer.name_or_path}` - thanks!"
                )
                chat_template = new_chat_template
        else:
            raise RuntimeError(
                f"Unsloth: The tokenizer `{tokenizer.name_or_path}`\n"
                "has a {% if add_generation_prompt %} for generation purposes, but wasn't provided correctly.\n"
                "Please file a bug report immediately - thanks!"
            )
    return chat_template


def check_tokenizer(
    model,
    tokenizer,
    model_name = "unsloth/llama-2-7b-bnb-4bit",
    model_max_length = 4096,
    padding_side = "right",
    token = None,
    _reload = True,
):
    # Checks tokenizer for out of bounds ids.
    # Mainly a fix for https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha
    # where <sep> had token id=32002.
    # See https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha/discussions/25
    # Seems like the Fast tokenizer in Rust breaks things!

    # We ignore some of them!
    if tokenizer.__repr__().split("(", 1)[0] in IGNORED_TOKENIZER_CHECKING:
        return tokenizer

    max_embedding_size = model.model.embed_tokens.weight.shape[0]
    added_tokens_fast = tokenizer.added_tokens_decoder
    added_tokens_fast = {
        index: str(value) for index, value in added_tokens_fast.items()
    }
    sorted_keys = sorted(added_tokens_fast)
    added_tokens_fast = {key: added_tokens_fast[key] for key in sorted_keys}

    for j, index in enumerate(added_tokens_fast.keys()):
        if index >= max_embedding_size:
            bad_indices = list(added_tokens_fast.keys())[j:]
            bad_tokens = list(added_tokens_fast.values())[j:]
            if not _reload:
                # Try removing the token
                added_tokens = [str(x) for x in tokenizer.added_tokens_decoder.values()]
                special_tokens = tokenizer.special_tokens_map
                import itertools

                special_tokens = frozenset(
                    itertools.chain.from_iterable(
                        [x] if type(x) is str else x for x in special_tokens.values()
                    )
                )
                can_be_removed1 = [x for x in bad_tokens if x not in special_tokens]
                can_be_removed2 = [
                    x
                    for x in can_be_removed1
                    if x in tokenizer._added_tokens_encoder.keys()
                ]

                # Check of extra tokens can in fact we removed!
                can_be_removed = (len(can_be_removed1) == len(bad_tokens)) and (
                    len(can_be_removed2) == len(bad_tokens)
                )

                # Check if sep_token or other generic types
                remove_generic = False
                try_mapper = []
                if not can_be_removed:
                    names = dir(tokenizer)
                    names = (
                        x for x in names if x.endswith("_token") and x.count("_") == 1
                    )
                    generic_tokens = [(x, getattr(tokenizer, x, None)) for x in names]

                    try_removal = []
                    for token in bad_tokens:
                        for name_token, check_token in generic_tokens:
                            if check_token == token:
                                try_removal.append(token)
                                try_mapper.append(name_token)

                    # Recheck!
                    can_be_removed = len(try_removal) == len(bad_tokens)
                    if can_be_removed:
                        remove_generic = True
                    can_be_removed1 = bad_tokens

                if can_be_removed:
                    # Yes it can be fixed!
                    for j, bad_token in enumerate(can_be_removed1):
                        remove_id = tokenizer._added_tokens_encoder[bad_token]
                        del tokenizer._added_tokens_decoder[remove_id]
                        del tokenizer._added_tokens_encoder[bad_token]

                        if remove_generic and (try_removal[j] == bad_token):
                            # Remove sep token for example
                            setattr(tokenizer, try_mapper[j], None)
                            setattr(tokenizer, try_mapper[j] + "_id", None)
                    # Confirm 1 more time!
                    if max(tokenizer.added_tokens_decoder.keys()) < max_embedding_size:
                        logger.warning_once(
                            f"Unsloth loaded a broken tokenizer `{model_name}`, but managed to repair it!\n"
                            f"Tokens {bad_tokens} with ids {bad_indices} exceeds the max vocab size of {max_embedding_size}.\n"
                            "We removed these bad tokens. If you think this is incorrect, fix your tokenizer first."
                        )
                        return convert_to_fast_tokenizer(tokenizer)

                # :( Failure
                raise RuntimeError(
                    f"Unsloth tried to load `{model_name}`, but cannot succeed.\n"
                    f"Tokens {bad_tokens} with ids {bad_indices} exceeds the max vocab size of {max_embedding_size}.\n"
                    f"Fix your tokenizer since it'll perform out of bounds memory accesses."
                )

            if IS_COLAB_ENVIRONMENT or IS_KAGGLE_ENVIRONMENT:
                cache_dir = "huggingface_tokenizers_cache"
            else:
                cache_dir = None

            # Sometimes slow tokenizer does not work like Deepseek
            try:
                # Try slow tokenizer which can fix things!
                tokenizer = AutoTokenizer.from_pretrained(
                    model_name,
                    model_max_length = model_max_length,
                    padding_side = padding_side,
                    token = token,
                    # Cannot just use use_fast = False as per https://twitter.com/danielhanchen/status/1789659394302718373
                    use_fast = False,
                    legacy = False,
                    from_slow = True,
                    cache_dir = cache_dir,
                )
                return check_tokenizer(
                    model = model,
                    tokenizer = tokenizer,
                    model_name = model_name,
                    model_max_length = model_max_length,
                    padding_side = padding_side,
                    token = token,
                    _reload = False,
                )
                break
            except:
                # Tokenizer has out of bounds issues and we can't
                # load the slow tokenizer version :(
                logger.warning_once(
                    "Unsloth: Tokenizer is most likely buggy, and Unsloth failed to repair it.\n"
                    "It will still work, but beware of out of bounds memory accesses.\n"
                    "Please file an issue on the model owner's repo about this issue."
                )
                return tokenizer
    return convert_to_fast_tokenizer(tokenizer)


import inspect
from inspect import getsource
import trl
import trl.trainer.sft_trainer
from trl.trainer.sft_trainer import *
from transformers.trainer import *

try:
    from trl.trainer.sft_trainer import neftune_post_forward_hook
except:

    def neftune_post_forward_hook(module, input, output):
        """
        Implements the NEFTune forward pass for the model using forward hooks. Note this works only for
        torch.nn.Embedding layers. This method is slightly adapted from the original source code
        that can be found here: https://github.com/neelsjain/NEFTune

        Simply add it to your model as follows:
        ```python
        model = ...
        model.embed_tokens.neftune_noise_alpha = 0.1
        model.embed_tokens.register_forward_hook(neftune_post_forward_hook)
        ```

        Args:
            module (`torch.nn.Module`):
                The embedding module where the hook is attached. Note that you need to set
                `module.neftune_noise_alpha` to the desired noise alpha value.
            input (`torch.Tensor`):
                The input tensor to the model.
            output (`torch.Tensor`):
                The output tensor of the model (i.e. the embeddings).
        """
        if module.training:
            dims = torch.tensor(output.size(1) * output.size(2))
            mag_norm = module.neftune_noise_alpha / torch.sqrt(dims)
            output = output + torch.zeros_like(output).uniform_(-mag_norm, mag_norm)
        return output


def patch_sft_trainer_tokenizer():
    """
    Patches the trainer with changes
    """
    try:
        sft_trainer = eval(f"trl.trainer.sft_trainer.SFTTrainer")
    except:
        return
    all_imports = dir(trl.trainer.sft_trainer)

    for (
        function_name,
        replacer,
    ) in (
        # ("_prepare_non_packed_dataloader", "def tokenize(element):",),
        (
            "_prepare_non_packed_dataloader",
            None,
        ),
        (
            "_prepare_dataset",
            None,
        ),
        # ("_prepare_packed_dataloader", "if dataset_text_field is not None",),
    ):
        if not hasattr(sft_trainer, function_name):
            continue

        function = getsource(eval(f"sft_trainer.{function_name}"))
        where = function.find("def")
        function = function.split("\n")
        function = "\n".join(x[where:] for x in function)

        check_text = (
            "\n"
            "if 'tokenizer'          not in locals(): tokenizer = processing_class\n"
            "if 'formatting_func'    not in locals(): raise RuntimeError('Unsloth: Please file a bug report - `formatting_func` does not exist!')\n"
            "if 'dataset_text_field' not in locals() and 'args' in locals(): dataset_text_field = args.dataset_text_field\n"
            "if 'dataset_text_field' not in locals(): raise RuntimeError('Unsloth: Please file a bug report - `dataset_text_field` does not exist!')\n"
            "test_text = dataset[0][dataset_text_field] if (formatting_func is None and dataset_text_field is not None) else formatting_func(dataset[0])[0]\n"
            "chat_template = getattr(tokenizer, 'chat_template', None)\n"
            "chat_template = '' if chat_template is None else chat_template\n"
            "has_bos_token_already = (test_text.startswith(tokenizer.bos_token) or tokenizer.bos_token in chat_template) "
            "if getattr(tokenizer, 'bos_token', None) is not None else False\n"
            "if 'add_special_tokens' not in locals() and has_bos_token_already:\n"
            "    from functools import partial\n"
            "    tokenizer = partial(tokenizer, add_special_tokens = False)\n"
            "    processing_class = tokenizer\n"
            "else:\n"
            "    add_special_tokens = False if has_bos_token_already else add_special_tokens\n\n"
        )

        check_text = check_text.split("\n")
        check_text = "\n".join(" " * where + x for x in check_text)
        check_text = check_text.rstrip() + "\n"

        if replacer is None:
            # .*? matches first match. .+? matches final match.
            replacer = re.findall(
                f"def {function_name}" + r"\(.*?\).*?\:\n",
                function,
                flags = re.MULTILINE | re.DOTALL,
            )
            if len(replacer) == 0:
                continue
            replacer = replacer[0]
            function = function.replace(replacer, replacer + check_text)
        else:
            function = function.replace(replacer, check_text + replacer)

        x = [x for x in all_imports if x in function]
        try:
            exec(f"from trl.trainer.sft_trainer import ({','.join(x)})", locals())
        except ImportError:
            for _item in x:
                try:
                    exec(f"from trl.trainer.sft_trainer import {_item}", locals())
                except ImportError:
                    pass
        exec(function, locals(), globals())
        exec(
            f"trl.trainer.sft_trainer.SFTTrainer.{function_name} = {function_name}",
            globals(),
        )

    # Patch train with fix_untrained_tokens
    for path_to_trainer in (
        "sft_trainer.SFTTrainer",
        "dpo_trainer.DPOTrainer",
        "kto_trainer.KTOTrainer",
    ):
        function_name, replacer = "train", "if resume_from_checkpoint is False:"
        try:
            function = getsource(eval(f"trl.trainer.{path_to_trainer}.{function_name}"))
        except Exception:
            continue
        where = function.find("def")
        function = function.split("\n")
        function = "\n".join(x[where:] for x in function)

        check_text = (
            "\n"
            "import subprocess, re, gc, numpy as np\n"
            "a = np.array([0,])\n"
            "try:\n"
            "    a = subprocess.check_output('nvidia-smi --query-gpu=memory.used --format=csv', shell = True)\n"
            "    a = re.findall(rb'([\\d]{1,})[\\s]{1,}M', a)\n"
            "    a = np.array([int(x.decode('utf-8'))/1024 for x in a])\n"
            "except:\n"
            "    if not torch.cuda.is_available():\n"
            "        raise RuntimeError('Unsloth: We do not support AMD / Intel machines yet - it is a work in progress!')\n"
            "if ((a - PRE_CHECK) >= 1).sum() > 1:\n"
            "    raise RuntimeError('Unsloth currently does not support multi GPU setups - but we are working on it!')\n"
            "for _ in range(3):\n"
            "    gc.collect()\n"
            "    torch.cuda.empty_cache()\n"
            "pass\n"
            "\n"
            "tokenizer = self.processing_class if hasattr(self, 'processing_class') else self.tokenizer\n"
            "fix_untrained_tokens(self.model, tokenizer, self.train_dataset, IGNORED_TOKENIZER_NAMES, eps = 1e-16)\n\n"
            "fix_zero_training_loss(self.model, tokenizer, self.train_dataset)\n\n"
        )

        # Warn on gradient accumulation steps if it's used
        check_text += (
            "\n"
            "try:\n"
            "    gradient_accumulation_steps = self.args.gradient_accumulation_steps\n"
            "    if type(gradient_accumulation_steps) is int and gradient_accumulation_steps > 1:\n"
            "        from transformers import __version__ as transformers_version\n"
            "        from packaging.version import Version\n"
            "        if Version(transformers_version) <= Version('4.45.2'):\n"
            "            print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\\n'\\\n"
            "                  '`pip install --upgrade --no-cache-dir --no-deps unsloth transformers git+https://github.com/huggingface/trl.git`')\n"
            "except:\n"
            "    pass\n"
            "\n\n"
        )

        # Add NEFTune since it doesn't seem to work?? We need to manually inject it
        check_text += (
            "\n"
            "if hasattr(self, 'neftune_hook_handle'):\n"
            "    self.neftune_hook_handle.remove()\n"
            "    if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle\n"
            "\n"
            "if getattr(self, 'neftune_noise_alpha', None) is not None:\n"
            "    self.model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha\n"
            "    self.neftune_hook_handle = self.model.get_input_embeddings().register_forward_hook(neftune_post_forward_hook)\n"
            "pass\n"
            "\n"
        )

        # Also DPO weirdly tokenizes non numeric columns? Delete them!
        check_text += (
            "\n"
            "if hasattr(self.train_dataset, 'column_names'):\n"
            "    column_names = set(self.train_dataset.column_names)\n"
            "    check = ['chosen', 'rejected', 'prompt', 'chosen_input_ids', 'chosen_attention_mask',\n"
            "        'chosen_labels', 'rejected_input_ids', 'rejected_attention_mask', 'rejected_labels',\n"
            "        'prompt_input_ids', 'prompt_attention_mask']\n"
            "    if all(x in column_names for x in check):\n"
            "        self.train_dataset = self.train_dataset.remove_columns(['chosen', 'rejected', 'prompt'])\n"
            "    del check, column_names\n"
            "\n"
        )

        check_text = check_text.split("\n")
        check_text = "\n".join(" " * where + x for x in check_text)

        function = function.replace(replacer, check_text + replacer)
        exec(function, globals())

        exec(
            f"trl.trainer.{path_to_trainer}.{function_name} = {function_name}",
            globals(),
        )


# Finally patch TRL tokenizer things -> moved to RL
# patch_sft_trainer_tokenizer()


================================================
FILE: unsloth/trainer.py
================================================
# Copyright 2023-present Daniel Han-Chen & the Unsloth team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import os
import psutil
import warnings
from dataclasses import dataclass, field
from typing import Optional
from functools import wraps

import trl
import inspect
from trl import SFTTrainer
from . import is_bfloat16_supported
from unsloth.utils import (
    configure_padding_free,
    configure_sample_packing,
    enable_padding_free_metadata,
    enable_sample_packing,
)
from unsloth_zoo.training_utils import (
    unsloth_train as _unsloth_train,
)
from unsloth_zoo.vision_utils import (
    UnslothVisionDataCollator,
)
from unsloth_zoo.hf_utils import get_transformers_model_type
from unsloth_zoo.utils import Version
import dataclasses

__all__ = [
    "UnslothTrainingArguments",
    "UnslothTrainer",
    "unsloth_train",
    "_patch_trl_trainer",
    "UnslothVisionDataCollator",
]

logger = logging.getLogger(__name__)

_AUTO_PADDING_FREE_ENV_DISABLED = os.environ.get(
    "UNSLOTH_DISABLE_AUTO_PADDING_FREE", ""
).strip().lower() in {"1", "true", "yes", "on"}

PADDING_FREE_BLOCKLIST = {
    "gemma2",  # - gemma2:  Uses slow_attention_softcapping which has torch.compile issues
    "gpt_oss",  # - gpt_oss: Uses Flex Attention which doesn't handle padding_free correctly
}


def _should_pack(config) -> bool:
    if config is None or not getattr(config, "packing", False):
        return False
    return not getattr(config, "_unsloth_disable_auto_packing", False)


def _should_auto_padding_free(config) -> bool:
    if (
        config is None
        or _AUTO_PADDING_FREE_ENV_DISABLED
        or getattr(config, "packing", False)
    ):
        return False
    return getattr(config, "padding_free", None) is None


def _disable_sample_packing(config):
    if config is None:
        return
    for attr, value in (("packing", False), ("padding_free", False)):
        if hasattr(config, attr):
            setattr(config, attr, value)
    if hasattr(config, "remove_unused_columns"):
        setattr(config, "remove_unused_columns", True)
    setattr(config, "_unsloth_disable_auto_packing", True)


_AUTO_PACK_SKIP_MESSAGES = (
    "packing is not supported",
    "padding-free training",
    "passing a custom data collator",
)


def _should_skip_auto_packing_error(exc: Exception) -> bool:
    message = str(exc).lower()
    return any(msg in message for msg in _AUTO_PACK_SKIP_MESSAGES)


# Unsloth gradient accumulation fix:
from transformers import __version__ as transformers_version, ProcessorMixin

if Version(transformers_version) > Version("4.45.2"):

    def unsloth_train(trainer, *args, **kwargs):
        return trainer.train(*args, **kwargs)

else:

    def unsloth_train(trainer, *args, **kwargs):
        if len(args) != 0 or len(kwargs) != 0:
            raise RuntimeError(
                "Unsloth: Our custom gradient accumulation fixed trainer does not support other arguments.\n"
                "If you want to use our fix inside of HF, please update `transformers` to the latest version via:\n"
                "`pip uninstall transformers -y && pip install --upgrade --no-cache-dir transformers`"
            )
        print(
            "Unsloth: Using our custom gradient accumulation fixed trainer, which is not feature complete.\n"
            "If you want to use our fix inside of HF, please update `transformers` to the latest version via:\n"
            "`pip uninstall transformers -y && pip install --upgrade --no-cache-dir transformers`"
        )
        return _unsloth_train(trainer)


try:
    from trl import SFTConfig as TrainingArguments
except:
    from transformers import TrainingArguments


class UnslothTrainingArguments(TrainingArguments):
    def __init__(self, embedding_learning_rate: float = None, *args, **kwargs):
        embedding_learning_rate = embedding_learning_rate
        super().__init__(*args, **kwargs)


def _create_unsloth_optimizer(
    model,
    optimizer_cls,
    optimizer_kwargs,
    embedding_lr = 5e-5,
):
    lr = optimizer_kwargs["lr"]
    weight_decay = optimizer_kwargs.get("weight_decay", 0.0)

    param_groups = {
        "non_embeddings": {},
        "embeddings": {},
    }

    for name, param in model.named_parameters():
        if not param.requires_grad:
            continue
        if name.endswith("modules_to_save.default.weight"):
            partial_name = name[: -len(".modules_to_save.default.weight")]
            partial_name = partial_name[partial_name.rfind(".") + 1 :]
            print(
                f"Unsloth: Setting lr = {embedding_lr:.2e} instead of {lr:.2e} for {partial_name}."
            )
            param_groups["embeddings"][name] = param
        else:
            param_groups["non_embeddings"][name] = param

    optimizer_grouped_parameters = [
        {
            "params": list(param_groups["non_embeddings"].values()),
            "weight_decay": weight_decay,
            "lr": lr,
        },
        {
            "params": list(param_groups["embeddings"].values()),
            "weight_decay": weight_decay,
            "lr": embedding_lr,
        },
    ]
    optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs)
    return optimizer


class UnslothTrainer(SFTTrainer):
    def create_optimizer(self):
        embedding_learning_rate = getattr(self.args, "embedding_learning_rate", None)
        if embedding_learning_rate is None:
            return super().create_optimizer()

        if self.optimizer is None:
            optimizer_cls, optimizer_kwargs = SFTTrainer.get_optimizer_cls_and_kwargs(
                self.args
            )
            self.optimizer = _create_unsloth_optimizer(
                self.model,
                optimizer_cls,
                optimizer_kwargs,
                embedding_learning_rate,
            )
        return self.optimizer


# From `trl>=0.13.0`, they changed how to pass several params to the trainer
# We need to patch to make the transition smooth
def _resolve_trainer_params(trainer_class, init_fn):
    """Resolve the real named parameters for a trainer __init__.

    Some TRL trainers (e.g., ORPOTrainer in TRL 0.27.1) are thin wrappers
    with only ``def __init__(self, *args, **kwargs)``.  For those, walk the
    MRO and return the first parent class that has real named parameters.
    """
    params = inspect.signature(init_fn).parameters
    named = {
        k
        for k, v in params.items()
        if v.kind
        in (inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.KEYWORD_ONLY)
        and k != "self"
    }
    if named:
        return set(params.keys())

    # Thin wrapper detected - walk MRO for real signature
    for cls in trainer_class.__mro__[1:]:
        if cls is object:
            continue
        parent_init = cls.__dict__.get("__init__")
        if parent_init is None:
            continue
        try:
            parent_params = inspect.signature(parent_init).parameters
            parent_named = {
                k
                for k, v in parent_params.items()
                if v.kind
                in (
                    inspect.Parameter.POSITIONAL_OR_KEYWORD,
                    inspect.Parameter.KEYWORD_ONLY,
                )
                and k != "self"
            }
            if parent_named:
                return set(parent_params.keys())
        except (ValueError, TypeError):
            continue
    return set(params.keys())


def _backwards_compatible_trainer(trainer_class, config_class):
    original_init = trainer_class.__init__

    @wraps(original_init)
    def new_init(self, *args, **kwargs):
        # All Trainer tokenizer are now called processing_class
        trainer_params = _resolve_trainer_params(trainer_class, original_init)

        if "processing_class" in trainer_params and "tokenizer" in kwargs:
            kwargs["processing_class"] = kwargs.pop("tokenizer")

        if ("args" in kwargs) and (Version(trl) >= Version("0.13.0.dev0")):
            training_args = kwargs.pop("args", None)

            # Get parameters that Trainer.__init__ actually expects
            trainer_params.remove("self")
            trainer_params.remove("args")

            # Get fields that should be passed to Config init
            config_fields = {
                field.name: field
                for field in dataclasses.fields(config_class)
                if field.init
            }

            # Create config dict with valid fields from training_args
            config_dict = {
                name: getattr(training_args, name)
                for name in config_fields
                if hasattr(training_args, name)
            }

            # Get parameters that exist in Config but not in TrainingArguments
            from transformers import TrainingArguments

            moved_params = set(inspect.signature(config_class).parameters.keys()) - set(
                inspect.signature(TrainingArguments).parameters.keys()
            )

            # Separate kwargs into trainer kwargs and config kwargs
            trainer_kwargs = {}
            additional_config_kwargs = {}

            for key, value in kwargs.items():
                if key in trainer_params:
                    trainer_kwargs[key] = value
                elif key in moved_params or key in config_fields:
                    additional_config_kwargs[key] = value
                else:
                    additional_config_kwargs[key] = value

            # Update config_dict with additional kwargs
            config_dict.update(additional_config_kwargs)

            # Create Config with all the collected parameters
            # Reinitialising config class with parameters (that were none initially but populated on first init)
            # causes the 2nd init to fail as there are mutual exclusive checks on pairs of parameters.
            # Refer: https://github.com/huggingface/trl/blob/main/trl/trainer/grpo_config.py#L499-L502 for example
            # So we only create config class if the previous init was not TrainingArguments
            if not isinstance(training_args, TrainingArguments):
                config = config_class(**config_dict)
            else:
                config = training_args

            # Reconstruct kwargs for Trainer
            kwargs = trainer_kwargs
            kwargs["args"] = config
        original_init(self, *args, **kwargs)

    return new_init


def _patch_sft_trainer_auto_packing(trl_module):
    sft_trainer = getattr(trl_module, "SFTTrainer", None)
    if sft_trainer is None:
        return
    if getattr(sft_trainer, "_unsloth_auto_packing_wrapped", False):
        return

    original_init = sft_trainer.__init__

    @wraps(original_init)
    def new_init(self, *args, **kwargs):
        config_arg = None
        if len(args) >= 2:
            config_arg = args[1]
        else:
            config_arg = kwargs.get("args")

        # Check if model type is unsupported for padding_free
        model = kwargs.get("model")
        is_unsupported_model = False
        is_vlm = False
        if model is not None:
            model_config = getattr(model, "config", None)
            if model_config is not None:
                model_types = get_transformers_model_type(model_config)
                # Blocklist: models that don't work correctly with padding_free
                is_unsupported_model = any(
                    x in PADDING_FREE_BLOCKLIST for x in model_types
                )

                # Check if VLM
                architectures = getattr(model_config, "architectures", None)
                if architectures is None:
                    architectures = []
                is_vlm = any(
                    x.endswith("ForConditionalGeneration") for x in architectures
                )
                is_vlm = is_vlm or hasattr(model_config, "vision_config")

        processing_class = kwargs.get("processing_class") or kwargs.get("tokenizer")
        data_collator = kwargs.get("data_collator")

        # We also disable vision language models for padding free collators
        blocked = (
            (data_collator is not None)
            or isinstance(processing_class, ProcessorMixin)
            or is_vlm
            or is_unsupported_model
            or (
                os.environ.get("UNSLOTH_RETURN_LOGITS", "0") == "1"
            )  # Disable padding free on forced logits
        )
        requested_pack = bool(getattr(config_arg, "packing", False))
        if blocked:
            if hasattr(config_arg, "packing"):
                setattr(config_arg, "packing", False)
            if hasattr(config_arg, "padding_free"):
                setattr(config_arg, "padding_free", False)

        if blocked and requested_pack:
            reason = "custom data collator"
            if data_collator is None and isinstance(processing_class, ProcessorMixin):
                reason = "processor-based model"
            elif is_vlm:
                reason = "vision-language model"
            elif is_unsupported_model:
                reason = f"unsupported model type(s): {', '.join(model_types)}"
            message = "Unsloth: Sample packing skipped " f"({reason} detected)."
            print(message)

        packing_active = False
        if _should_pack(config_arg) and not blocked:
            configure_sample_packing(config_arg)
            packing_active = True
            logger.info("Unsloth: Sample packing enabled for SFTTrainer instance.")

        # Resolve padding_free: None (default) = auto-enable unless env-disabled or packing
        auto_padding_free_active = False
        padding_free_requested = getattr(config_arg, "padding_free", None) is True
        if not blocked:
            if padding_free_requested:
                configure_padding_free(config_arg)
            elif _should_auto_padding_free(config_arg):
                configure_padding_free(config_arg)
                auto_padding_free_active = True
                logger.info(
                    "Unsloth: Padding-free batching auto-enabled for SFTTrainer instance."
                )

        try:
            original_init(self, *args, **kwargs)
        except ValueError as exc:
            if packing_active and _should_skip_auto_packing_error(exc):
                logger.info(
                    "Unsloth: Auto sample packing failed because trainer reported an incompatible setup (%s).",
                    exc,
                )
                _disable_sample_packing(config_arg)
                packing_active = False
                original_init(self, *args, **kwargs)
            else:
                raise

        trainer_args = getattr(self, "args", None)
        trainer_packing = bool(trainer_args and getattr(trainer_args, "packing", False))
        trainer_padding_free = bool(
            trainer_args and getattr(trainer_args, "padding_free", False)
        )

        if blocked and trainer_args is not None:
            # Mirror the block on the trainer args to avoid re-enabling later
            setattr(trainer_args, "packing", False)
            setattr(trainer_args, "padding_free", False)

        if (
            not blocked
            and trainer_packing
            and (packing_active or _should_pack(trainer_args))
        ):
            enable_sample_packing(self.model, self)
            print(
                "🦥 Unsloth: Packing enabled - training is >2x faster and uses less VRAM!"
            )
        elif not blocked and trainer_padding_free:
            enable_padding_free_metadata(self.model, self)
            message = (
                "🦥 Unsloth: Padding-free auto-enabled, enabling faster training."
                if auto_padding_free_active
                else "🦥 Unsloth: Padding-free enabled, enabling faster training."
            )
            print(message)

    sft_trainer.__init__ = new_init
    sft_trainer._unsloth_auto_packing_wrapped = True


def _patch_trl_trainer():
    import trl

    if hasattr(trl, "__UNSLOTH_BACKWARDS_COMPATIBLE__"):
        return
    if Version(trl) <= Version("0.11.0"):
        return

    import trl.trainer

    trl_classes = dir(trl.trainer)
    trl_trainers = set(
        x[: -len("Trainer")] for x in trl_classes if x.endswith("Trainer")
    )
    trl_configs = set(x[: -len("Config")] for x in trl_classes if x.endswith("Config"))
    trl_classes = list(trl_trainers & trl_configs)

    for x in trl_classes:
        try:
            exec(
                f"trl.{x}Trainer.__init__ = _backwards_compatible_trainer(trl.{x}Trainer, trl.{x}Config)",
                globals(),
            )
        except:
            continue

    _patch_sft_trainer_auto_packing(trl)

    trl.__UNSLOTH_BACKWARDS_COMPATIBLE__ = True


================================================
FILE: unsloth/utils/__init__.py
================================================
# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

from .packing import (
    configure_padding_free,
    configure_sample_packing,
    enable_padding_free_metadata,
    enable_sample_packing,
    mark_allow_overlength,
)
from .attention_dispatch import (
    AttentionConfig,
    AttentionContext,
    FLASH_DENSE,
    FLASH_VARLEN,
    SDPA,
    XFORMERS,
    run_attention,
    select_attention_backend,
)

__all__ = [
    "configure_sample_packing",
    "configure_padding_free",
    "enable_sample_packing",
    "enable_padding_free_metadata",
    "mark_allow_overlength",
    "AttentionConfig",
    "AttentionContext",
    "FLASH_VARLEN",
    "FLASH_DENSE",
    "XFORMERS",
    "SDPA",
    "run_attention",
    "select_attention_backend",
]


================================================
FILE: unsloth/utils/attention_dispatch.py
================================================
# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""Shared helpers for attention backend selection and execution."""

from __future__ import annotations

from dataclasses import dataclass
from typing import Any, Optional, Tuple

import torch
from torch import Tensor
from torch.nn.functional import scaled_dot_product_attention

from ..models._utils import *
from ..utils.packing import (
    build_sdpa_packed_attention_mask,
    build_xformers_block_causal_mask,
)

if HAS_FLASH_ATTENTION:
    from flash_attn import flash_attn_func, flash_attn_varlen_func
HAS_XFORMERS = xformers is not None

# xformers kernels (FA3, FA2, cutlass) only support compute capability <= 9.0.
# Disable xformers on newer GPUs (e.g. RTX 5070 Ti / sm_120) and fall back to SDPA.
if HAS_XFORMERS and torch.cuda.is_available():
    _cc = torch.cuda.get_device_capability()
    if _cc[0] >= 12:
        HAS_XFORMERS = False
SDPA_HAS_GQA = "enable_gqa" in (scaled_dot_product_attention.__doc__ or "")

FLASH_VARLEN = "flash_varlen"
FLASH_DENSE = "flash_dense"
XFORMERS = "xformers"
SDPA = "sdpa"


XFORMERS_BLOCK_DIAG_CLS = (
    xformers.attn_bias.BlockDiagonalCausalMask if HAS_XFORMERS else None
)


@dataclass
class AttentionConfig:
    """
    Per-layer attention metadata.

    NOTE(djsaunde): I had originally intended this to be populated once per layer, but
        we're currently constructing it on every forward pass since it can possibly be
        invalid from one forward pass to the next (e.g., switching from training to
        inference). For now, I'm keeping separate from AttentionContext for the sake of
        better grouping of params.
    """

    backend: str
    n_kv_heads: int
    n_groups: int
    flash_dense_kwargs: Optional[dict[str, Any]] = None
    flash_varlen_kwargs: Optional[dict[str, Any]] = None
    sdpa_kwargs: Optional[dict[str, Any]] = None
    xformers_kwargs: Optional[dict[str, Any]] = None


@dataclass
class AttentionContext:
    """Per-call info required to run attention."""

    bsz: int
    q_len: int
    kv_seq_len: int
    n_heads: int
    head_dim: int
    requires_grad: bool
    seq_info: Optional[Tuple[Tensor, Tensor, int]]
    attention_mask: Optional[Tensor]
    causal_mask: Optional[Any]
    sliding_window: Optional[int] = None


def select_attention_backend(use_varlen: bool = False) -> str:
    """Return attention backend based on availability / priority order."""

    if HAS_FLASH_ATTENTION:
        if use_varlen:
            return FLASH_VARLEN
        else:
            return FLASH_DENSE
    if HAS_XFORMERS:
        return XFORMERS
    return SDPA


def run_attention(
    *,
    config: AttentionConfig,
    context: AttentionContext,
    Q: Tensor,
    K: Tensor,
    V: Tensor,
) -> Tensor:
    """
    Run attention using config / context info.

    Backend choice is prioritized for speed: FlashAttention when installed
    (`flash_varlen` for packed/variable-length inputs with `seq_info`, otherwise dense
    flash), then xFormers if flash is unavailable, with PyTorch SDPA as the final
    fallback (e.g., CPU or no fused kernels).

    Varlen flash is preferred when packing metadata is present because it avoids padding
    and keeps peak memory low. xFormers and SDPA can also handle packed batches (we
    pass a block-diagonal mask into each).
    """

    backend = config.backend
    if backend == FLASH_VARLEN and context.seq_info is None:
        backend = FLASH_DENSE if HAS_FLASH_ATTENTION else SDPA

    # [TODO] Flash attention does not support arbitrary attention masks (only
    # causal via flag). When a padding mask is present (e.g. left-padded
    # batched generation), fall back to SDPA which consumes attn_mask.
    # xFormers also does not thread context.attention_mask through, so the
    # same fallback applies.
    if context.attention_mask is not None and backend in (
        FLASH_DENSE,
        FLASH_VARLEN,
        XFORMERS,
    ):
        backend = SDPA

    flash_dense_kwargs = config.flash_dense_kwargs or {}
    flash_varlen_kwargs = config.flash_varlen_kwargs or {}
    sdpa_kwargs = config.sdpa_kwargs or {}
    xformers_kwargs = config.xformers_kwargs or {}

    bsz = context.bsz
    n_heads = context.n_heads
    q_len = context.q_len
    head_dim = context.head_dim
    kv_seq_len = context.kv_seq_len
    requires_grad = context.requires_grad
    sliding_window = context.sliding_window

    if backend == FLASH_VARLEN:
        Q_f = Q.transpose(1, 2).reshape(bsz * q_len, n_heads, head_dim)
        K_f = K.transpose(1, 2).reshape(bsz * q_len, config.n_kv_heads, head_dim)
        V_f = V.transpose(1, 2).reshape(bsz * q_len, config.n_kv_heads, head_dim)
        _, cu_seqlens, max_seqlen = context.seq_info
        return flash_attn_varlen_func(
            Q_f,
            K_f,
            V_f,
            cu_seqlens,
            cu_seqlens,
            max_seqlen,
            max_seqlen,
            **flash_varlen_kwargs,
        ).view(bsz, q_len, n_heads, head_dim)
    elif backend == FLASH_DENSE:
        Q_t = Q.transpose(1, 2)
        K_t = K.transpose(1, 2)
        V_t = V.transpose(1, 2)
        return flash_attn_func(Q_t, K_t, V_t, **flash_dense_kwargs).reshape(
            bsz, q_len, n_heads, head_dim
        )
    elif backend == XFORMERS:
        attn_bias = build_xformers_block_causal_mask(
            context.seq_info,
            sliding_window = sliding_window,
            base_mask = context.causal_mask,
        )

        Q_t = Q.transpose(1, 2)
        K_t = K.transpose(1, 2)
        V_t = V.transpose(1, 2)

        K_mod = K_t
        V_mod = V_t
        Q_mod = Q_t

        if config.n_groups != 1:
            K_mod = K_t.view(bsz, kv_seq_len, config.n_kv_heads, 1, head_dim)
            V_mod = V_t.view(bsz, kv_seq_len, config.n_kv_heads, 1, head_dim)
            K_mod = K_mod.expand(
                bsz, kv_seq_len, config.n_kv_heads, config.n_groups, head_dim
            )
            V_mod = V_mod.expand(
                bsz, kv_seq_len, config.n_kv_heads, config.n_groups, head_dim
            )

            if requires_grad:
                K_mod = K_mod.reshape(bsz, kv_seq_len, n_heads, head_dim)
                V_mod = V_mod.reshape(bsz, kv_seq_len, n_heads, head_dim)
            else:
                Q_mod = Q_t.view(
                    bsz, q_len, config.n_kv_heads, config.n_groups, head_dim
                )

        has_block = XFORMERS_BLOCK_DIAG_CLS is not None and isinstance(
            attn_bias, XFORMERS_BLOCK_DIAG_CLS
        )

        if config.n_groups != 1 and has_block:
            if not requires_grad:
                Q_mod = Q_mod.view(
                    1, bsz * q_len, config.n_kv_heads, config.n_groups, head_dim
                )
                K_mod = K_mod.view(
                    1, bsz * kv_seq_len, config.n_kv_heads, config.n_groups, head_dim
                )
                V_mod = V_mod.view(
                    1, bsz * kv_seq_len, config.n_kv_heads, config.n_groups, head_dim
                )
            else:
                Q_mod = Q_mod.view(1, bsz * q_len, n_heads, head_dim)
                K_mod = K_mod.view(1, bsz * kv_seq_len, n_heads, head_dim)
                V_mod = V_mod.view(1, bsz * kv_seq_len, n_heads, head_dim)

        out = xformers_attention(
            Q_mod,
            K_mod,
            V_mod,
            attn_bias = attn_bias,
            **xformers_kwargs,
        )

        if config.n_groups != 1 and not requires_grad:
            out = out.view(bsz, q_len, config.n_kv_heads, config.n_groups, head_dim)
            out = out.reshape(bsz, q_len, n_heads, head_dim)
        else:
            out = out.view(bsz, q_len, n_heads, head_dim)
        return out
    else:
        local_mask = context.attention_mask
        is_causal_local = False
        if context.seq_info is not None and local_mask is None:
            local_mask = build_sdpa_packed_attention_mask(
                context.seq_info,
                dtype = Q.dtype,
                device = Q.device,
                sliding_window = sliding_window,
            )
        else:
            q_len_local = Q.shape[-2]
            k_len_local = K.shape[-2]
            # ---- SDPA mask normalization for left padding / 2D masks ----
            if local_mask is not None and isinstance(local_mask, torch.Tensor):
                local_mask = local_mask.to(device = Q.device)

                if local_mask.dim() == 2:
                    # key padding keep mask: (bsz, k_len), 1/True = real token
                    if local_mask.dtype == torch.bool:
                        key_keep = local_mask
                    else:
                        # tokenizer attention_mask is typically int 0/1
                        key_keep = local_mask != 0

                    past_len = (
                        k_len_local - q_len_local
                    )  # works for prefill (0) and decode
                    q_pos = torch.arange(
                        past_len, past_len + q_len_local, device = Q.device
                    )
                    k_pos = torch.arange(k_len_local, device = Q.device)

                    causal_keep = (
                        k_pos[None, :] <= q_pos[:, None]
                    )  # True = allowed (SDPA)
                    if sliding_window is not None:
                        causal_keep &= k_pos[None, :] >= (
                            q_pos[:, None] - (sliding_window - 1)
                        )

                    # (bsz, 1, q_len, k_len) boolean keep mask
                    local_mask = (
                        causal_keep[None, None, :, :] & key_keep[:, None, None, :]
                    )

                elif local_mask.dim() == 3:
                    # (bsz, q_len, k_len) -> (bsz, 1, q_len, k_len)
                    local_mask = local_mask[:, None, :, :]

                elif local_mask.dim() == 4:
                    if local_mask.dtype != torch.bool:
                        # Use boolean keep masks for better SDPA stability.
                        local_mask = local_mask.eq(0)
                else:
                    raise ValueError(
                        f"Unsupported SDPA attention_mask rank: {local_mask.dim()}"
                    )

                # Avoid NaNs from fully-masked rows (common with left padding).
                if local_mask.dtype == torch.bool:
                    no_allowed = ~local_mask.any(
                        dim = -1, keepdim = True
                    )  # (bsz,1,q_len,1)
                    local_mask = local_mask | no_allowed

            is_causal_local = local_mask is None and q_len_local == k_len_local

        kwargs = dict(sdpa_kwargs)
        kwargs.setdefault("attn_mask", local_mask)
        kwargs.setdefault("is_causal", is_causal_local)

        use_sdpa_gqa = SDPA_HAS_GQA and config.n_groups != 1
        if (
            use_sdpa_gqa
            and (not requires_grad)
            and isinstance(local_mask, torch.Tensor)
            and local_mask.dim() >= 3
            and local_mask.shape[0] > 1
        ):
            # Batched masked inference has shown row-coupled drift with SDPA GQA.
            # Fall back to explicit KV expansion for deterministic row-wise behavior.
            use_sdpa_gqa = False

        if use_sdpa_gqa:
            kwargs.setdefault("enable_gqa", True)
            out = scaled_dot_product_attention(Q, K, V, **kwargs)
            return out.transpose(1, 2)

        K_mod = K
        V_mod = V
        if config.n_groups != 1:
            K_mod = K[:, :, None, :, :].expand(
                bsz, config.n_kv_heads, config.n_groups, kv_seq_len, head_dim
            )
            V_mod = V[:, :, None, :, :].expand(
                bsz, config.n_kv_heads, config.n_groups, kv_seq_len, head_dim
            )
            K_mod = K_mod.reshape(bsz, n_heads, kv_seq_len, head_dim)
            V_mod = V_mod.reshape(bsz, n_heads, kv_seq_len, head_dim)

        out = scaled_dot_product_attention(
            Q.contiguous(),
            K_mod.contiguous(),
            V_mod.contiguous(),
            **kwargs,
        )
        return out.transpose(1, 2).contiguous()


__all__ = [
    "AttentionConfig",
    "AttentionContext",
    "select_attention_backend",
    "run_attention",
]


================================================
FILE: unsloth/utils/hf_hub.py
================================================
from huggingface_hub import HfApi, ModelInfo

_HFAPI: HfApi = None

POPULARITY_PROPERTIES = [
    "downloads",
    "downloadsAllTime",
    "trendingScore",
    "likes",
]
THOUSAND = 1000
MILLION = 1000000
BILLION = 1000000000


def formatted_int(value: int) -> str:
    if value < THOUSAND:
        return str(value)
    elif value < MILLION:
        return f"{float(value) / 1000:,.1f}K"
    elif value < BILLION:
        return f"{float(value) / 1000000:,.1f}M"
    else:
        return f"{float(value) / 1000000000:,.1f}B"


def get_model_info(
    model_id: str, properties: list[str] = ["safetensors", "lastModified"]
) -> ModelInfo:
    """
    Get the model info for a specific model.

    properties: list[str] = See https://huggingface.co/docs/huggingface_hub/api-ref/hf_hub/hf_api/model_info
    Default properties: ["safetensors", "lastModified"], only retrieves minimal information.
    Set to None to retrieve the full model information.
    """
    global _HFAPI
    if _HFAPI is None:
        _HFAPI = HfApi()
    try:
        model_info: ModelInfo = _HFAPI.model_info(model_id, expand = properties)
    except Exception as e:
        print(f"Error getting model info for {model_id}: {e}")
        model_info = None
    return model_info


def list_models(
    properties: list[str] = None,
    full: bool = False,
    sort: str = "downloads",
    author: str = "unsloth",
    search: str = None,
    limit: int = 10,
) -> list[ModelInfo]:
    """
    Retrieve model information from the Hugging Face Hub.

    properties: list[str] = See https://huggingface.co/docs/huggingface_hub/api-ref/hf_hub/hf_api/list_models
    full: bool = Whether to retrieve the full model information, if True properties will be ignored.
    sort: str = The sort order.
    author: str = The author of the model.
    search: str = The search query for filtering models.

    """
    global _HFAPI
    if _HFAPI is None:
        _HFAPI = HfApi()
    if full:
        properties = None

    models: list[ModelInfo] = _HFAPI.list_models(
        author = author,
        search = search,
        sort = sort,
        limit = limit,
        expand = properties,
        full = full,
    )
    return models


================================================
FILE: unsloth/utils/packing.py
================================================
# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""Utilities for enabling packed (padding-free) batches across Unsloth."""

from __future__ import annotations

import logging
from collections import OrderedDict
from typing import Any, Iterable, Optional, Sequence, Tuple

import torch

try:
    from xformers.ops.fmha.attn_bias import (
        BlockDiagonalCausalMask as _XFormersBlockMask,
    )
except Exception:
    try:
        from xformers.attn_bias import BlockDiagonalCausalMask as _XFormersBlockMask
    except Exception:
        _XFormersBlockMask = None

_XFORMERS_MASK_CACHE_MAXSIZE = 32
_XFORMERS_MASK_CACHE: OrderedDict[Tuple[Tuple[int, ...], int], Any] = OrderedDict()

# Cache per device for get_packed_info_from_kwargs to avoid repeated D2H sync across layers
_PACKED_INFO_CACHE: dict = {}

# Cache per device for build_sdpa_packed_attention_mask to avoid repeated D2H sync across layers
_SDPA_MASK_CACHE: dict = {}

# Cache per device for build_xformers_block_causal_mask to avoid repeated D2H sync across layers
_XFORMERS_BLOCK_MASK_CACHE: dict = {}


def _window_cache_key(sliding_window: Optional[int]) -> int:
    if sliding_window is None or sliding_window <= 0:
        return 0
    return int(sliding_window)


def _get_cached_block_mask(
    lengths: Tuple[int, ...],
    sliding_window: Optional[int],
):
    if _XFormersBlockMask is None:
        return None

    window_key = _window_cache_key(sliding_window)
    cache_key = (lengths, window_key)
    cached = _XFORMERS_MASK_CACHE.get(cache_key)
    if cached is not None:
        _XFORMERS_MASK_CACHE.move_to_end(cache_key)
        return cached

    mask = _XFormersBlockMask.from_seqlens(list(lengths))
    if window_key and mask is not None and hasattr(mask, "make_local_attention"):
        mask = mask.make_local_attention(window_size = window_key)

    _XFORMERS_MASK_CACHE[cache_key] = mask
    if len(_XFORMERS_MASK_CACHE) > _XFORMERS_MASK_CACHE_MAXSIZE:
        _XFORMERS_MASK_CACHE.popitem(last = False)
    return mask


class _TrlPackingWarningFilter(logging.Filter):
    to_filter = (
        "attention implementation is not",
        "kernels-community",
    )

    def filter(self, record: logging.LogRecord) -> bool:
        message = record.getMessage()
        return not any(substring in message for substring in self.to_filter)


_TRL_FILTER_INSTALLED = False


def _ensure_trl_warning_filter():
    global _TRL_FILTER_INSTALLED
    if _TRL_FILTER_INSTALLED:
        return
    logging.getLogger("trl.trainer.sft_trainer").addFilter(_TrlPackingWarningFilter())
    _TRL_FILTER_INSTALLED = True


def mark_allow_overlength(module):
    """Mark a module hierarchy so padding-free batches can exceed max_seq_length."""
    if module is None:
        return
    if hasattr(module, "max_seq_length"):
        setattr(module, "_unsloth_allow_packed_overlength", True)
    children = getattr(module, "children", None)
    if children is None:
        return
    for child in children():
        mark_allow_overlength(child)


def configure_sample_packing(config):
    """Mutate an ``SFTConfig`` so TRL prepares packed batches."""
    _ensure_trl_warning_filter()
    setattr(config, "packing", True)
    setattr(config, "padding_free", True)
    setattr(config, "remove_unused_columns", False)


def configure_padding_free(config):
    """Mutate an ``SFTConfig`` so TRL enables padding-free batching without packing."""
    _ensure_trl_warning_filter()
    setattr(config, "padding_free", True)
    setattr(config, "remove_unused_columns", False)


def enable_sample_packing(
    model,
    trainer,
    *,
    sequence_lengths_key: str = "seq_lengths",
) -> None:
    """Enable runtime support for packed batches on an existing trainer."""
    if model is None or trainer is None:
        raise ValueError("model and trainer must not be None")

    mark_allow_overlength(model)

    if hasattr(trainer, "args") and hasattr(trainer.args, "remove_unused_columns"):
        trainer.args.remove_unused_columns = False

    collator = getattr(trainer, "data_collator", None)
    if collator is None or not hasattr(collator, "torch_call"):
        return
    if getattr(collator, "_unsloth_packing_wrapped", False):
        return

    if hasattr(collator, "padding_free"):
        collator.padding_free = True
    if hasattr(collator, "return_position_ids"):
        collator.return_position_ids = True

    original_torch_call = collator.torch_call

    def torch_call_with_lengths(examples: Sequence[dict]):
        batch = original_torch_call(examples)
        if examples and isinstance(examples[0], dict):
            seq_lengths: list[int] = []
            for example in examples:
                lengths = example.get(sequence_lengths_key)
                if isinstance(lengths, Iterable):
                    seq_lengths.extend(int(length) for length in lengths)
            # Fallback: infer lengths from tokenized inputs when metadata is absent
            if not seq_lengths:
                for example in examples:
                    ids = example.get("input_ids")
                    if isinstance(ids, Iterable):
                        seq_lengths.append(len(ids))
            if seq_lengths:
                batch["packed_seq_lengths"] = torch.tensor(
                    seq_lengths, dtype = torch.int32
                )
                if "attention_mask" in batch:
                    batch.pop("attention_mask")
        return batch

    collator.torch_call = torch_call_with_lengths
    collator._unsloth_packing_wrapped = True


def enable_padding_free_metadata(model, trainer):
    """Inject seq-length metadata when padding-free batching is enabled without packing."""
    collator = getattr(trainer, "data_collator", None)
    if (
        collator is None
        or getattr(collator, "_unsloth_padding_free_lengths_wrapped", False)
        or not getattr(collator, "padding_free", False)
    ):
        return

    mark_allow_overlength(model)
    if hasattr(collator, "return_position_ids"):
        collator.return_position_ids = True
    if hasattr(trainer, "args") and hasattr(trainer.args, "remove_unused_columns"):
        trainer.args.remove_unused_columns = False

    original_torch_call = collator.torch_call

    def torch_call_with_padding_free_metadata(examples: Sequence[dict]):
        seq_lengths: list[int] = []
        if examples and isinstance(examples[0], dict):
            for example in examples:
                lengths = example.get("seq_lengths")
                if lengths is None:
                    ids = example.get("input_ids")
                    if ids is None:
                        continue
                    lengths = [len(ids)]
                    example["seq_lengths"] = lengths
                seq_lengths.extend(lengths)

        batch = original_torch_call(examples)
        if seq_lengths:
            batch["packed_seq_lengths"] = torch.tensor(
                seq_lengths,
                dtype = torch.int32,
            )
        return batch

    collator.torch_call = torch_call_with_padding_free_metadata
    collator._unsloth_padding_free_lengths_wrapped = True


def get_packed_info_from_kwargs(
    kwargs: dict,
    device: torch.device,
) -> Optional[Tuple[torch.Tensor, torch.Tensor, int]]:
    """Return packed sequence metadata expected by the attention kernels."""

    seq_lengths = kwargs.get("packed_seq_lengths")
    if seq_lengths is None:
        return None

    entry = _PACKED_INFO_CACHE.get(device)
    if entry is not None and entry["seq_lengths"] is seq_lengths:
        return entry["result"]

    lengths = seq_lengths.to(device = device, dtype = torch.int32, non_blocking = True)
    cu_seqlens = torch.zeros(lengths.numel() + 1, dtype = torch.int32, device = device)
    torch.cumsum(lengths, dim = 0, dtype = torch.int32, out = cu_seqlens[1:])

    max_seqlen = int(lengths.max().item())
    result = (lengths, cu_seqlens, max_seqlen)
    _PACKED_INFO_CACHE[device] = {"seq_lengths": seq_lengths, "result": result}
    return result


def build_xformers_block_causal_mask(
    seq_info: Optional[Tuple[torch.Tensor, torch.Tensor, int]],
    *,
    sliding_window: Optional[int] = None,
    base_mask: Optional[Any] = None,
):
    if _XFormersBlockMask is None:
        return None
    if seq_info is not None:
        seq_lengths, _, _ = seq_info
        # Cache the mask to avoid repeated D2H sync across layers
        device = seq_lengths.device
        params = (sliding_window,)
        entry = _XFORMERS_BLOCK_MASK_CACHE.get(device)
        if (
            entry is not None
            and entry["seq_lengths"] is seq_lengths
            and entry["params"] == params
        ):
            return entry["mask"]

        lengths_tensor = seq_lengths.to("cpu", torch.int32)
        if lengths_tensor.numel() == 0:
            return None
        lengths = tuple(int(x) for x in lengths_tensor.tolist())
        mask = _get_cached_block_mask(lengths, sliding_window)

        _XFORMERS_BLOCK_MASK_CACHE[device] = {
            "seq_lengths": seq_lengths,
            "params": params,
            "mask": mask,
        }
    else:
        mask = base_mask

        if (
            sliding_window is not None
            and sliding_window > 0
            and mask is not None
            and hasattr(mask, "make_local_attention")
        ):
            mask = mask.make_local_attention(window_size = sliding_window)
    return mask


def build_sdpa_packed_attention_mask(
    seq_info: Tuple[torch.Tensor, torch.Tensor, int],
    *,
    dtype: torch.dtype,
    device: torch.device,
    sliding_window: Optional[int] = None,
) -> torch.Tensor:
    seq_lengths, _, _ = seq_info

    params = (dtype, sliding_window)
    entry = _SDPA_MASK_CACHE.get(device)
    if (
        entry is not None
        and entry["seq_lengths"] is seq_lengths
        and entry["params"] == params
    ):
        return entry["mask"]

    total_tokens = int(seq_lengths.sum().item())
    mask = torch.full(
        (total_tokens, total_tokens),
        float("-inf"),
        dtype = dtype,
        device = device,
    )
    offset = 0
    for length in seq_lengths.tolist():
        length = int(length)
        if length <= 0:
            continue
        block = torch.zeros((length, length), dtype = dtype, device = device)
        upper = torch.triu(
            torch.ones((length, length), device = device), diagonal = 1
        ).bool()
        block = block.masked_fill(upper, float("-inf"))
        if (
            sliding_window is not None
            and sliding_window > 0
            and length > sliding_window
        ):
            idx = torch.arange(length, device = device)
            dist = idx.unsqueeze(1) - idx.unsqueeze(0)
            window_mask = dist >= sliding_window
            block = block.masked_fill(window_mask, float("-inf"))
        mask[offset : offset + length, offset : offset + length] = block
        offset += length

    result = mask.unsqueeze(0).unsqueeze(0)
    _SDPA_MASK_CACHE[device] = {
        "seq_lengths": seq_lengths,
        "params": params,
        "mask": result,
    }
    return result


def _normalize_packed_lengths(
    seq_lengths: Any,
    *,
    device: torch.device,
) -> Optional[torch.Tensor]:
    if seq_lengths is None:
        return None
    if isinstance(seq_lengths, torch.Tensor):
        lengths = seq_lengths.to(device = device, dtype = torch.int64)
    else:
        lengths = torch.tensor(seq_lengths, device = device, dtype = torch.int64)
    if lengths.ndim != 1:
        lengths = lengths.reshape(-1)
    if lengths.numel() == 0:
        return None
    return lengths


def mask_packed_sequence_boundaries(
    shift_labels: torch.Tensor,
    seq_lengths: Any,
    *,
    ignore_index: int = -100,
) -> bool:
    """Mark final token of every packed sample so CE ignores boundary predictions."""
    lengths = _normalize_packed_lengths(seq_lengths, device = shift_labels.device)
    if lengths is None:
        return False

    flat = shift_labels.reshape(-1)
    total_tokens = flat.shape[0]
    boundary_positions = torch.cumsum(lengths, dim = 0) - 1
    valid = boundary_positions < total_tokens
    if not torch.all(valid):
        boundary_positions = boundary_positions[valid]
    if boundary_positions.numel() == 0:
        return False
    flat[boundary_positions] = ignore_index
    return True


def clear_packed_caches():
    """Release cached masks/metadata to free device memory."""
    _PACKED_INFO_CACHE.clear()
    _SDPA_MASK_CACHE.clear()
    _XFORMERS_BLOCK_MASK_CACHE.clear()


__all__ = [
    "configure_sample_packing",
    "configure_padding_free",
    "enable_sample_packing",
    "enable_padding_free_metadata",
    "mark_allow_overlength",
    "get_packed_info_from_kwargs",
    "build_xformers_block_causal_mask",
    "build_sdpa_packed_attention_mask",
    "mask_packed_sequence_boundaries",
    "clear_packed_caches",
]


================================================
FILE: unsloth-cli.py
================================================
#!/usr/bin/env python3

"""
🦥 Starter Script for Fine-Tuning FastLanguageModel with Unsloth

This script is designed as a starting point for fine-tuning your models using unsloth.
It includes configurable options for model loading, PEFT parameters, training arguments, 
and model saving/pushing functionalities.

You will likely want to customize this script to suit your specific use case 
and requirements.

Here are a few suggestions for customization:
    - Modify the dataset loading and preprocessing steps to match your data.
    - Customize the model saving and pushing configurations.

Usage: (most of the options have valid default values this is an extended example for demonstration purposes)
    python unsloth-cli.py --model_name "unsloth/llama-3-8b" --max_seq_length 8192 --dtype None --load_in_4bit \
    --r 64 --lora_alpha 32 --lora_dropout 0.1 --bias "none" --use_gradient_checkpointing "unsloth" \
    --random_state 3407 --use_rslora --per_device_train_batch_size 4 --gradient_accumulation_steps 8 \
    --warmup_steps 5 --max_steps 400 --learning_rate 2e-6 --logging_steps 1 --optim "adamw_8bit" \
    --weight_decay 0.005 --lr_scheduler_type "linear" --seed 3407 --output_dir "outputs" \
    --report_to "tensorboard" --save_model --save_path "model" --quantization_method "f16" \
    --push_model --hub_path "hf/model" --hub_token "your_hf_token"

To see a full list of configurable options, use:
    python unsloth-cli.py --help

Happy fine-tuning!
"""

import argparse
import os


def run(args):
    from unsloth import FastLanguageModel
    from datasets import load_dataset
    from transformers.utils import strtobool
    from trl import SFTTrainer, SFTConfig
    from unsloth import is_bfloat16_supported
    from unsloth.models.loader_utils import prepare_device_map
    import logging
    from unsloth import RawTextDataLoader

    logging.getLogger("hf-to-gguf").setLevel(logging.WARNING)

    # Load model and tokenizer
    device_map, distributed = prepare_device_map()
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = args.model_name,
        max_seq_length = args.max_seq_length,
        dtype = args.dtype,
        load_in_4bit = args.load_in_4bit,
        device_map = device_map,
    )

    # Configure PEFT model
    model = FastLanguageModel.get_peft_model(
        model,
        r = args.r,
        target_modules = [
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
        ],
        lora_alpha = args.lora_alpha,
        lora_dropout = args.lora_dropout,
        bias = args.bias,
        use_gradient_checkpointing = args.use_gradient_checkpointing,
        random_state = args.random_state,
        use_rslora = args.use_rslora,
        loftq_config = args.loftq_config,
    )

    alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

    ### Instruction:
    {}

    ### Input:
    {}

    ### Response:
    {}"""

    EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN

    def formatting_prompts_func(examples):
        instructions = examples["instruction"]
        inputs = examples["input"]
        outputs = examples["output"]
        texts = []
        for instruction, input, output in zip(instructions, inputs, outputs):
            text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
            texts.append(text)
        return {"text": texts}

    def load_dataset_smart(args):
        from transformers.utils import strtobool

        if args.raw_text_file:
            # Use raw text loader
            loader = RawTextDataLoader(tokenizer, args.chunk_size, args.stride)
            dataset = loader.load_from_file(args.raw_text_file)
        elif args.dataset.endswith((".txt", ".md", ".json", ".jsonl")):
            # Auto-detect local raw text files
            loader = RawTextDataLoader(tokenizer)
            dataset = loader.load_from_file(args.dataset)
        else:
            # Check for modelscope usage
            use_modelscope = strtobool(
                os.environ.get("UNSLOTH_USE_MODELSCOPE", "False")
            )
            if use_modelscope:
                from modelscope import MsDataset

                dataset = MsDataset.load(args.dataset, split = "train")
            else:
                # Existing HuggingFace dataset logic
                dataset = load_dataset(args.dataset, split = "train")

            # Apply formatting for structured datasets
            dataset = dataset.map(formatting_prompts_func, batched = True)
        return dataset

    # Load dataset using smart loader
    dataset = load_dataset_smart(args)
    print("Data is formatted and ready!")

    # Configure training arguments
    training_args = SFTConfig(
        per_device_train_batch_size = args.per_device_train_batch_size,
        per_device_eval_batch_size = args.per_device_eval_batch_size,
        gradient_accumulation_steps = args.gradient_accumulation_steps,
        warmup_steps = args.warmup_steps,
        max_steps = args.max_steps,
        learning_rate = args.learning_rate,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = args.logging_steps,
        optim = args.optim,
        weight_decay = args.weight_decay,
        lr_scheduler_type = args.lr_scheduler_type,
        seed = args.seed,
        output_dir = args.output_dir,
        report_to = args.report_to,
        max_length = args.max_seq_length,
        dataset_num_proc = 2,
        ddp_find_unused_parameters = False if distributed else None,
        packing = args.packing,
    )

    # Initialize trainer
    trainer = SFTTrainer(
        model = model,
        processing_class = tokenizer,
        train_dataset = dataset,
        args = training_args,
    )

    trainer.train()

    # Save model
    if args.save_model:
        # if args.quantization_method is a list, we will save the model for each quantization method
        if args.save_gguf:
            if isinstance(args.quantization, list):
                for quantization_method in args.quantization:
                    print(
                        f"Saving model with quantization method: {quantization_method}"
                    )
                    model.save_pretrained_gguf(
                        args.save_path,
                        tokenizer,
                        quantization_method = quantization_method,
                    )
                    if args.push_model:
                        model.push_to_hub_gguf(
                            hub_path = args.hub_path,
                            hub_token = args.hub_token,
                            quantization_method = quantization_method,
                        )
            else:
                print(f"Saving model with quantization method: {args.quantization}")
                model.save_pretrained_gguf(
                    args.save_path,
                    tokenizer,
                    quantization_method = args.quantization,
                )
                if args.push_model:
                    model.push_to_hub_gguf(
                        hub_path = args.hub_path,
                        hub_token = args.hub_token,
                        quantization_method = args.quantization,
                    )
        else:
            model.save_pretrained_merged(args.save_path, tokenizer, args.save_method)
            if args.push_model:
                model.push_to_hub_merged(args.save_path, tokenizer, args.hub_token)
    else:
        print("Warning: The model is not saved!")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description = "🦥 Fine-tune your llm faster using unsloth!"
    )

    model_group = parser.add_argument_group("🤖 Model Options")
    model_group.add_argument(
        "--model_name",
        type = str,
        default = "unsloth/llama-3-8b",
        help = "Model name to load",
    )
    model_group.add_argument(
        "--max_seq_length",
        type = int,
        default = 2048,
        help = "Maximum sequence length, default is 2048. We auto support RoPE Scaling internally!",
    )
    model_group.add_argument(
        "--dtype",
        type = str,
        default = None,
        help = "Data type for model (None for auto detection)",
    )
    model_group.add_argument(
        "--load_in_4bit",
        action = "store_true",
        help = "Use 4bit quantization to reduce memory usage",
    )
    model_group.add_argument(
        "--dataset",
        type = str,
        default = "yahma/alpaca-cleaned",
        help = "Huggingface dataset to use for training",
    )

    lora_group = parser.add_argument_group(
        "🧠 LoRA Options",
        "These options are used to configure the LoRA model.",
    )
    lora_group.add_argument(
        "--r",
        type = int,
        default = 16,
        help = "Rank for Lora model, default is 16.  (common values: 8, 16, 32, 64, 128)",
    )
    lora_group.add_argument(
        "--lora_alpha",
        type = int,
        default = 16,
        help = "LoRA alpha parameter, default is 16. (common values: 8, 16, 32, 64, 128)",
    )
    lora_group.add_argument(
        "--lora_dropout",
        type = float,
        default = 0.0,
        help = "LoRA dropout rate, default is 0.0 which is optimized.",
    )
    lora_group.add_argument(
        "--bias",
        type = str,
        default = "none",
        help = "Bias setting for LoRA",
    )
    lora_group.add_argument(
        "--use_gradient_checkpointing",
        type = str,
        default = "unsloth",
        help = "Use gradient checkpointing",
    )
    lora_group.add_argument(
        "--random_state",
        type = int,
        default = 3407,
        help = "Random state for reproducibility, default is 3407.",
    )
    lora_group.add_argument(
        "--use_rslora",
        action = "store_true",
        help = "Use rank stabilized LoRA",
    )
    lora_group.add_argument(
        "--loftq_config",
        type = str,
        default = None,
        help = "Configuration for LoftQ",
    )

    training_group = parser.add_argument_group("🎓 Training Options")
    training_group.add_argument(
        "--per_device_train_batch_size",
        type = int,
        default = 2,
        help = "Batch size per device during training, default is 2.",
    )
    training_group.add_argument(
        "--per_device_eval_batch_size",
        type = int,
        default = 4,
        help = "Batch size per device during evaluation, default is 4.",
    )
    training_group.add_argument(
        "--gradient_accumulation_steps",
        type = int,
        default = 4,
        help = "Number of gradient accumulation steps, default is 4.",
    )
    training_group.add_argument(
        "--warmup_steps",
        type = int,
        default = 5,
        help = "Number of warmup steps, default is 5.",
    )
    training_group.add_argument(
        "--max_steps",
        type = int,
        default = 400,
        help = "Maximum number of training steps.",
    )
    training_group.add_argument(
        "--learning_rate",
        type = float,
        default = 2e-4,
        help = "Learning rate, default is 2e-4.",
    )
    training_group.add_argument(
        "--optim",
        type = str,
        default = "adamw_8bit",
        help = "Optimizer type.",
    )
    training_group.add_argument(
        "--weight_decay",
        type = float,
        default = 0.01,
        help = "Weight decay, default is 0.01.",
    )
    training_group.add_argument(
        "--lr_scheduler_type",
        type = str,
        default = "linear",
        help = "Learning rate scheduler type, default is 'linear'.",
    )
    training_group.add_argument(
        "--seed",
        type = int,
        default = 3407,
        help = "Seed for reproducibility, default is 3407.",
    )
    training_group.add_argument(
        "--packing",
        action = "store_true",
        help = "Enable padding-free sample packing via TRL's bin packer.",
    )

    report_group = parser.add_argument_group("📊 Report Options")
    report_group.add_argument(
        "--report_to",
        type = str,
        default = "tensorboard",
        choices = [
            "azure_ml",
            "clearml",
            "codecarbon",
            "comet_ml",
            "dagshub",
            "dvclive",
            "flyte",
            "mlflow",
            "neptune",
            "tensorboard",
            "wandb",
            "all",
            "none",
        ],
        help = (
            "The list of integrations to report the results and logs to. Supported platforms are:\n\t\t "
            "'azure_ml', 'clearml', 'codecarbon', 'comet_ml', 'dagshub', 'dvclive', 'flyte', "
            "'mlflow', 'neptune', 'tensorboard', and 'wandb'. Use 'all' to report to all integrations "
            "installed, 'none' for no integrations."
        ),
    )
    report_group.add_argument(
        "--logging_steps",
        type = int,
        default = 1,
        help = "Logging steps, default is 1",
    )

    save_group = parser.add_argument_group("💾 Save Model Options")
    save_group.add_argument(
        "--output_dir",
        type = str,
        default = "outputs",
        help = "Output directory",
    )
    save_group.add_argument(
        "--save_model",
        action = "store_true",
        help = "Save the model after training",
    )
    save_group.add_argument(
        "--save_method",
        type = str,
        default = "merged_16bit",
        choices = ["merged_16bit", "merged_4bit", "lora"],
        help = "Save method for the model, default is 'merged_16bit'",
    )
    save_group.add_argument(
        "--save_gguf",
        action = "store_true",
        help = "Convert the model to GGUF after training",
    )
    save_group.add_argument(
        "--save_path",
        type = str,
        default = "model",
        help = "Path to save the model",
    )
    save_group.add_argument(
        "--quantization",
        type = str,
        default = "q8_0",
        nargs = "+",
        help = (
            "Quantization method for saving the model. common values ('f16', 'q4_k_m', 'q8_0'), "
            "Check our wiki for all quantization methods https://github.com/unslothai/unsloth/wiki#saving-to-gguf"
        ),
    )

    push_group = parser.add_argument_group("🚀 Push Model Options")
    push_group.add_argument(
        "--push_model",
        action = "store_true",
        help = "Push the model to Hugging Face hub after training",
    )
    push_group.add_argument(
        "--push_gguf",
        action = "store_true",
        help = "Push the model as GGUF to Hugging Face hub after training",
    )
    push_group.add_argument(
        "--hub_path",
        type = str,
        default = "hf/model",
        help = "Path on Hugging Face hub to push the model",
    )
    push_group.add_argument(
        "--hub_token",
        type = str,
        help = "Token for pushing the model to Hugging Face hub",
    )

    parser.add_argument(
        "--raw_text_file", type = str, help = "Path to raw text file for training"
    )
    parser.add_argument(
        "--chunk_size", type = int, default = 2048, help = "Size of text chunks for training"
    )
    parser.add_argument(
        "--stride", type = int, default = 512, help = "Overlap between chunks"
    )

    args = parser.parse_args()
    run(args)


================================================
FILE: unsloth_cli/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import typer

from unsloth_cli.commands.train import train
from unsloth_cli.commands.inference import inference
from unsloth_cli.commands.export import export, list_checkpoints
from unsloth_cli.commands.ui import ui
from unsloth_cli.commands.studio import studio_app

app = typer.Typer(
    help = "Command-line interface for Unsloth training, inference, and export.",
    context_settings = {"help_option_names": ["-h", "--help"]},
)

app.command()(train)
app.command()(inference)
app.command()(export)
app.command("list-checkpoints")(list_checkpoints)
app.command()(ui)
app.add_typer(studio_app, name = "studio", help = "Unsloth Studio commands.")


================================================
FILE: unsloth_cli/commands/__init__.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0


================================================
FILE: unsloth_cli/commands/export.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from pathlib import Path
from typing import Optional

import typer


EXPORT_FORMATS = ["merged-16bit", "merged-4bit", "gguf", "lora"]
GGUF_QUANTS = ["q4_k_m", "q5_k_m", "q8_0", "f16"]


def list_checkpoints(
    outputs_dir: Path = typer.Option(
        Path("./outputs"), "--outputs-dir", help = "Directory that holds training runs."
    ),
):
    """List checkpoints detected in the outputs directory."""
    from studio.backend.core.export import ExportBackend

    backend = ExportBackend()
    checkpoints = backend.scan_checkpoints(outputs_dir = str(outputs_dir))
    if not checkpoints:
        typer.echo("No checkpoints found.")
        raise typer.Exit()

    for model_name, ckpt_list, metadata in checkpoints:
        typer.echo(f"\n{model_name}:")
        for display, path, loss in ckpt_list:
            loss_str = f" (loss: {loss:.4f})" if loss is not None else ""
            typer.echo(f"  {display}{loss_str}: {path}")


def export(
    checkpoint: Path = typer.Argument(..., help = "Path to checkpoint directory."),
    output_dir: Path = typer.Argument(..., help = "Directory to save exported model."),
    format: str = typer.Option(
        "merged-16bit",
        "--format",
        "-f",
        help = f"Export format: {', '.join(EXPORT_FORMATS)}",
    ),
    quantization: str = typer.Option(
        "q4_k_m",
        "--quantization",
        "-q",
        help = f"GGUF quantization method: {', '.join(GGUF_QUANTS)}",
    ),
    push_to_hub: bool = typer.Option(
        False, "--push-to-hub", help = "Push exported model to HuggingFace Hub."
    ),
    repo_id: Optional[str] = typer.Option(
        None, "--repo-id", help = "HuggingFace repo ID (username/model-name)."
    ),
    hf_token: Optional[str] = typer.Option(
        None, "--hf-token", envvar = "HF_TOKEN", help = "HuggingFace token."
    ),
    private: bool = typer.Option(
        False, "--private", help = "Make the HuggingFace repo private."
    ),
    max_seq_length: int = typer.Option(2048, "--max-seq-length"),
    load_in_4bit: bool = typer.Option(True, "--load-in-4bit/--no-load-in-4bit"),
):
    """Export a checkpoint to various formats (merged, GGUF, LoRA adapter)."""
    if format not in EXPORT_FORMATS:
        typer.echo(
            f"Error: Invalid format '{format}'. Choose from: {', '.join(EXPORT_FORMATS)}",
            err = True,
        )
        raise typer.Exit(code = 2)

    if push_to_hub and not repo_id:
        typer.echo("Error: --repo-id required when using --push-to-hub", err = True)
        raise typer.Exit(code = 2)

    from studio.backend.core.export import ExportBackend

    backend = ExportBackend()

    typer.echo(f"Loading checkpoint: {checkpoint}")
    success, message = backend.load_checkpoint(
        checkpoint_path = str(checkpoint),
        max_seq_length = max_seq_length,
        load_in_4bit = load_in_4bit,
    )
    if not success:
        typer.echo(f"Error: {message}", err = True)
        raise typer.Exit(code = 1)
    typer.echo(message)

    typer.echo(f"Exporting as {format}...")
    if format == "merged-16bit":
        success, message = backend.export_merged_model(
            save_directory = str(output_dir),
            format_type = "16-bit (FP16)",
            push_to_hub = push_to_hub,
            repo_id = repo_id,
            hf_token = hf_token,
            private = private,
        )
    elif format == "merged-4bit":
        success, message = backend.export_merged_model(
            save_directory = str(output_dir),
            format_type = "4-bit (FP4)",
            push_to_hub = push_to_hub,
            repo_id = repo_id,
            hf_token = hf_token,
            private = private,
        )
    elif format == "gguf":
        success, message = backend.export_gguf(
            save_directory = str(output_dir),
            quantization_method = quantization.upper(),
            push_to_hub = push_to_hub,
            repo_id = repo_id,
            hf_token = hf_token,
        )
    elif format == "lora":
        success, message = backend.export_lora_adapter(
            save_directory = str(output_dir),
            push_to_hub = push_to_hub,
            repo_id = repo_id,
            hf_token = hf_token,
            private = private,
        )

    if not success:
        typer.echo(f"Error: {message}", err = True)
        raise typer.Exit(code = 1)

    typer.echo(message)


================================================
FILE: unsloth_cli/commands/inference.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import sys
from typing import Optional

import typer


def inference(
    model: str = typer.Argument(..., help = "HF model id or local path."),
    prompt: str = typer.Argument(..., help = "Prompt to send to the model."),
    hf_token: Optional[str] = typer.Option(
        None, "--hf-token", envvar = "HF_TOKEN", help = "Hugging Face token if needed."
    ),
    temperature: float = typer.Option(0.7, "--temperature"),
    top_p: float = typer.Option(0.9, "--top-p"),
    top_k: int = typer.Option(40, "--top-k"),
    max_new_tokens: int = typer.Option(256, "--max-new-tokens"),
    repetition_penalty: float = typer.Option(1.1, "--repetition-penalty"),
    system_prompt: str = typer.Option(
        "",
        "--system-prompt",
        help = "Optional system prompt to prepend.",
    ),
    max_seq_length: int = typer.Option(2048, "--max-seq-length"),
    load_in_4bit: bool = typer.Option(True, "--load-in-4bit/--no-load-in-4bit"),
):
    """Run a single inference using the specified model."""
    from studio.backend.core import ModelConfig, get_inference_backend

    inference_backend = get_inference_backend()
    model_config = ModelConfig.from_ui_selection(
        dropdown_value = model, search_value = None, hf_token = hf_token, is_lora = False
    )
    if not model_config:
        typer.echo("Could not resolve model config", err = True)
        raise typer.Exit(code = 1)

    if not inference_backend.load_model(
        config = model_config,
        max_seq_length = max_seq_length,
        load_in_4bit = load_in_4bit,
        hf_token = hf_token,
    ):
        typer.echo("Model load failed", err = True)
        raise typer.Exit(code = 1)

    messages = [{"role": "user", "content": prompt}]
    stream = inference_backend.generate_chat_response(
        messages = messages,
        system_prompt = system_prompt,
        temperature = temperature,
        top_p = top_p,
        top_k = top_k,
        max_new_tokens = max_new_tokens,
        repetition_penalty = repetition_penalty,
    )

    typer.echo("Assistant:", nl = True)
    previous = ""
    for chunk in stream:
        delta = chunk[len(previous) :]
        if delta:
            sys.stdout.write(delta)
            sys.stdout.flush()
        previous = chunk
    sys.stdout.write("\n")
    sys.stdout.flush()


================================================
FILE: unsloth_cli/commands/studio.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import os
import platform
import subprocess
import sys
import time
from pathlib import Path
from typing import Optional
import typer

studio_app = typer.Typer(help = "Unsloth Studio commands.")

STUDIO_HOME = Path.home() / ".unsloth" / "studio"

# __file__ is unsloth_cli/commands/studio.py -- two parents up is the package root
# (either site-packages or the repo root for editable installs).
_PACKAGE_ROOT = Path(__file__).resolve().parent.parent.parent


def _studio_venv_python() -> Optional[Path]:
    """Return the studio venv Python binary, or None if not set up."""
    if platform.system() == "Windows":
        p = STUDIO_HOME / ".venv" / "Scripts" / "python.exe"
    else:
        p = STUDIO_HOME / ".venv" / "bin" / "python"
    return p if p.is_file() else None


def _find_run_py() -> Optional[Path]:
    """Find studio/backend/run.py.

    No CWD dependency — works from any directory.
    Since studio/ is now a proper package (has __init__.py), it lives in
    site-packages after pip install, right next to unsloth_cli/.
    """
    # 1. Relative to __file__ (site-packages or editable repo root)
    run_py = _PACKAGE_ROOT / "studio" / "backend" / "run.py"
    if run_py.is_file():
        return run_py
    # 2. Studio venv's site-packages (Linux + Windows layouts)
    for pattern in (
        "lib/python*/site-packages/studio/backend/run.py",
        "Lib/site-packages/studio/backend/run.py",
    ):
        for match in (STUDIO_HOME / ".venv").glob(pattern):
            return match
    return None


def _find_setup_script() -> Optional[Path]:
    """Find studio/setup.sh or studio/setup.ps1.

    No CWD dependency — works from any directory.
    """
    name = "setup.ps1" if platform.system() == "Windows" else "setup.sh"
    # 1. Relative to __file__ (site-packages or editable repo root)
    s = _PACKAGE_ROOT / "studio" / name
    if s.is_file():
        return s
    # 2. Studio venv's site-packages
    for pattern in (
        f"lib/python*/site-packages/studio/{name}",
        f"Lib/site-packages/studio/{name}",
    ):
        for match in (STUDIO_HOME / ".venv").glob(pattern):
            return match
    return None


# ── unsloth studio (server) ──────────────────────────────────────────


@studio_app.callback(invoke_without_command = True)
def studio_default(
    ctx: typer.Context,
    port: int = typer.Option(8888, "--port", "-p"),
    host: str = typer.Option("0.0.0.0", "--host", "-H"),
    frontend: Optional[Path] = typer.Option(None, "--frontend", "-f"),
    silent: bool = typer.Option(False, "--silent", "-q"),
):
    """Launch the Unsloth Studio server."""
    if ctx.invoked_subcommand is not None:
        return

    # Always use the studio venv if it exists and we're not already in it
    studio_venv_dir = STUDIO_HOME / ".venv"
    in_studio_venv = sys.prefix.startswith(str(studio_venv_dir))

    if not in_studio_venv:
        studio_python = _studio_venv_python()
        run_py = _find_run_py()
        if studio_python and run_py:
            if not silent:
                typer.echo("Launching Unsloth Studio... Please wait...")
            args = [
                str(studio_python),
                str(run_py),
                "--host",
                host,
                "--port",
                str(port),
            ]
            if frontend:
                args.extend(["--frontend", str(frontend)])
            if silent:
                args.append("--silent")
            # On Windows, os.execvp() spawns a child but the parent lingers,
            # so Ctrl+C only kills the parent leaving the child orphaned.
            # Use subprocess.run() on Windows so the parent waits for the child.
            if sys.platform == "win32":
                import subprocess as _sp

                proc = _sp.Popen(args)
                try:
                    rc = proc.wait()
                except KeyboardInterrupt:
                    # Child has its own signal handler — let it finish
                    rc = proc.wait()
                raise typer.Exit(rc)
            else:
                os.execvp(str(studio_python), args)
        else:
            typer.echo("Studio not set up. Run 'unsloth studio setup' first.")
            raise typer.Exit(1)

    from studio.backend.run import run_server

    if not silent:
        from studio.backend.run import _resolve_external_ip

        display_host = _resolve_external_ip() if host == "0.0.0.0" else host
        typer.echo(f"Starting Unsloth Studio on http://{display_host}:{port}")

    run_server(
        host = host,
        port = port,
        frontend_path = frontend,
        silent = silent,
    )

    from studio.backend.run import _shutdown_event

    try:
        if _shutdown_event is not None:
            # NOTE: Event.wait() without a timeout blocks at the C level
            # on Linux, preventing Python from delivering SIGINT (Ctrl+C).
            while not _shutdown_event.is_set():
                _shutdown_event.wait(timeout = 1)
        else:
            while True:
                time.sleep(1)
    except KeyboardInterrupt:
        from studio.backend.run import _graceful_shutdown, _server

        _graceful_shutdown(_server)
        typer.echo("\nShutting down...")


# ── unsloth studio setup ─────────────────────────────────────────────


@studio_app.command()
def setup():
    """Run one-time Studio environment setup."""
    script = _find_setup_script()
    if not script:
        typer.echo("Error: Could not find setup script (setup.sh / setup.ps1).")
        raise typer.Exit(1)

    if platform.system() == "Windows":
        result = subprocess.run(
            ["powershell", "-ExecutionPolicy", "Bypass", "-File", str(script)],
        )
    else:
        result = subprocess.run(["bash", str(script)])

    if result.returncode != 0:
        raise typer.Exit(result.returncode)


# ── unsloth studio reset-password ────────────────────────────────────


@studio_app.command("reset-password")
def reset_password():
    """Reset the Studio admin password.

    Deletes the auth database so that a fresh admin account with a new
    random password is created on the next server start.  The Studio
    server must be restarted after running this command.
    """
    auth_dir = STUDIO_HOME / "auth"
    db_file = auth_dir / "auth.db"
    pw_file = auth_dir / ".bootstrap_password"

    if not db_file.exists():
        typer.echo("No auth database found -- nothing to reset.")
        raise typer.Exit(0)

    db_file.unlink(missing_ok = True)
    pw_file.unlink(missing_ok = True)

    typer.echo("Auth database deleted. Restart Unsloth Studio to get a new password.")


================================================
FILE: unsloth_cli/commands/train.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import time
from pathlib import Path
from typing import Optional

import typer

from unsloth_cli.config import Config, load_config
from unsloth_cli.options import add_options_from_config


@add_options_from_config(Config)
def train(
    config: Optional[Path] = typer.Option(
        None,
        "--config",
        "-c",
        help = "Path to YAML/JSON config file. CLI flags override config values.",
    ),
    hf_token: Optional[str] = typer.Option(
        None, "--hf-token", envvar = "HF_TOKEN", help = "Hugging Face token if needed."
    ),
    wandb_token: Optional[str] = typer.Option(
        None, "--wandb-token", envvar = "WANDB_API_KEY", help = "Weights & Biases API key."
    ),
    dry_run: bool = typer.Option(
        False,
        "--dry-run",
        help = "Show resolved config and exit without training.",
    ),
    config_overrides: dict = None,
):
    """Launch training using the existing Unsloth training backend."""
    try:
        cfg = load_config(config)
    except FileNotFoundError as e:
        typer.echo(f"Error: {e}", err = True)
        raise typer.Exit(code = 2)

    cfg.apply_overrides(**config_overrides)

    # CLI/env tokens take precedence over config
    # Handle case where typer.Option isn't resolved (decorator interaction)
    from typer.models import OptionInfo

    if isinstance(hf_token, OptionInfo):
        hf_token = None
    if isinstance(wandb_token, OptionInfo):
        wandb_token = None
    hf_token = hf_token or cfg.logging.hf_token
    wandb_token = wandb_token or cfg.logging.wandb_token

    if dry_run:
        import yaml

        data = cfg.model_dump()
        data["training"]["output_dir"] = str(data["training"]["output_dir"])
        typer.echo(yaml.dump(data, default_flow_style = False, sort_keys = False))
        raise typer.Exit(code = 0)

    if not cfg.model:
        typer.echo("Error: provide --model or set model in --config", err = True)
        raise typer.Exit(code = 2)

    if not cfg.data.dataset and not cfg.data.local_dataset:
        typer.echo(
            "Error: provide --dataset or --local-dataset (or via --config)", err = True
        )
        raise typer.Exit(code = 2)

    # Check if the model path is a LoRA adapter (has adapter_config.json)
    model_path = Path(cfg.model) if cfg.model else None
    model_is_lora = (
        model_path
        and model_path.is_dir()
        and (model_path / "adapter_config.json").exists()
    )
    use_lora = cfg.training.training_type.lower() == "lora"

    if model_is_lora and not use_lora:
        typer.echo(
            "Error: Cannot do full finetuning on a LoRA adapter. "
            "Use --training-type lora or provide a base model.",
            err = True,
        )
        raise typer.Exit(code = 2)

    from studio.backend.core.training.trainer import UnslothTrainer

    trainer = UnslothTrainer()

    # Load model (trainer.is_vlm is set after this)
    if not trainer.load_model(
        model_name = cfg.model,
        max_seq_length = cfg.training.max_seq_length,
        load_in_4bit = cfg.training.load_in_4bit if use_lora else False,
        hf_token = hf_token,
    ):
        typer.echo("Model load failed", err = True)
        raise typer.Exit(code = 1)

    is_vision = trainer.is_vlm

    if not trainer.prepare_model_for_training(**cfg.model_kwargs(use_lora, is_vision)):
        typer.echo("Model preparation failed", err = True)
        raise typer.Exit(code = 1)

    result = trainer.load_and_format_dataset(
        dataset_source = cfg.data.dataset or "",
        format_type = cfg.data.format_type,
        local_datasets = cfg.data.local_dataset,
    )
    if result is None:
        typer.echo("Dataset load failed", err = True)
        raise typer.Exit(code = 1)

    ds, eval_ds = result

    training_kwargs = cfg.training_kwargs()
    training_kwargs["wandb_token"] = wandb_token  # CLI/env takes precedence
    started = trainer.start_training(
        dataset = ds, eval_dataset = eval_ds, **training_kwargs
    )

    if not started:
        typer.echo("Training failed to start", err = True)
        raise typer.Exit(code = 1)

    try:
        while trainer.training_thread and trainer.training_thread.is_alive():
            time.sleep(1)
    except KeyboardInterrupt:
        typer.echo("Stopping training (Ctrl+C detected)...")
        trainer.stop_training()
    finally:
        if trainer.training_thread:
            trainer.training_thread.join()

    final = trainer.get_training_progress()
    if getattr(final, "error", None):
        typer.echo(f"Training error: {final.error}", err = True)
        raise typer.Exit(code = 1)


================================================
FILE: unsloth_cli/commands/ui.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

import os
import sys
import time
from pathlib import Path
from typing import Optional

import typer


def ui(
    port: int = typer.Option(
        8888, "--port", "-p", help = "Port to run the UI server on."
    ),
    host: str = typer.Option(
        "0.0.0.0", "--host", "-H", help = "Host address to bind to."
    ),
    frontend: Optional[Path] = typer.Option(
        None, "--frontend", "-f", help = "Path to frontend build directory."
    ),
    silent: bool = typer.Option(
        False, "--silent", "-q", help = "Suppress startup messages."
    ),
):
    """Launch the Unsloth web UI backend server (alias for 'unsloth studio')."""
    from unsloth_cli.commands.studio import (
        _studio_venv_python,
        _find_run_py,
        STUDIO_HOME,
    )

    # Re-execute in studio venv if available and not already inside it
    studio_venv_dir = STUDIO_HOME / ".venv"
    in_studio_venv = sys.prefix.startswith(str(studio_venv_dir))

    if not in_studio_venv:
        studio_python = _studio_venv_python()
        run_py = _find_run_py()
        if studio_python and run_py:
            if not silent:
                typer.echo("Launching Unsloth Studio... Please wait...")
            args = [
                str(studio_python),
                str(run_py),
                "--host",
                host,
                "--port",
                str(port),
            ]
            if frontend:
                args.extend(["--frontend", str(frontend)])
            if silent:
                args.append("--silent")
            # On Windows, os.execvp() spawns a child but the parent lingers,
            # so Ctrl+C only kills the parent leaving the child orphaned.
            # Use subprocess.run() on Windows so the parent waits for the child.
            if sys.platform == "win32":
                import subprocess as _sp

                proc = _sp.Popen(args)
                try:
                    rc = proc.wait()
                except KeyboardInterrupt:
                    # Child has its own signal handler — let it finish
                    rc = proc.wait()
                raise typer.Exit(rc)
            else:
                os.execvp(str(studio_python), args)
        else:
            typer.echo("Studio not set up. Run 'unsloth studio setup' first.")
            raise typer.Exit(1)

    from studio.backend.run import run_server

    if not silent:
        from studio.backend.run import _resolve_external_ip

        display_host = _resolve_external_ip() if host == "0.0.0.0" else host
        typer.echo(f"Starting Unsloth Studio on http://{display_host}:{port}")

    run_server(
        host = host,
        port = port,
        frontend_path = frontend,
        silent = silent,
    )

    from studio.backend.run import _shutdown_event

    try:
        if _shutdown_event is not None:
            _shutdown_event.wait()
        else:
            while True:
                time.sleep(1)
    except KeyboardInterrupt:
        from studio.backend.run import _graceful_shutdown, _server

        _graceful_shutdown(_server)
        typer.echo("\nShutting down...")


================================================
FILE: unsloth_cli/config.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

from pathlib import Path
from typing import Literal, Optional, List

import yaml
from pydantic import BaseModel, Field


class DataConfig(BaseModel):
    dataset: Optional[str] = None
    local_dataset: Optional[List[str]] = None
    format_type: Literal["auto", "alpaca", "chatml", "sharegpt"] = "auto"


class TrainingConfig(BaseModel):
    training_type: Literal["lora", "full"] = "lora"
    max_seq_length: int = 2048
    load_in_4bit: bool = True
    output_dir: Path = Path("./outputs")
    num_epochs: int = 3
    learning_rate: float = 2e-4
    batch_size: int = 2
    gradient_accumulation_steps: int = 4
    warmup_steps: int = 5
    max_steps: int = 0
    save_steps: int = 0
    weight_decay: float = 0.01
    random_seed: int = 3407
    packing: bool = False
    train_on_completions: bool = False
    gradient_checkpointing: Literal["unsloth", "true", "none"] = "unsloth"


class LoraConfig(BaseModel):
    lora_r: int = 64
    lora_alpha: int = 16
    lora_dropout: float = 0.0
    target_modules: str = "q_proj,k_proj,v_proj,o_proj,gate_proj,up_proj,down_proj"
    vision_all_linear: bool = False
    use_rslora: bool = False
    use_loftq: bool = False
    finetune_vision_layers: bool = True
    finetune_language_layers: bool = True
    finetune_attention_modules: bool = True
    finetune_mlp_modules: bool = True


class LoggingConfig(BaseModel):
    enable_wandb: bool = False
    wandb_project: str = "unsloth-training"
    wandb_token: Optional[str] = None
    enable_tensorboard: bool = False
    tensorboard_dir: str = "runs"
    hf_token: Optional[str] = None


class Config(BaseModel):
    model: Optional[str] = None
    data: DataConfig = Field(default_factory = DataConfig)
    training: TrainingConfig = Field(default_factory = TrainingConfig)
    lora: LoraConfig = Field(default_factory = LoraConfig)
    logging: LoggingConfig = Field(default_factory = LoggingConfig)

    def apply_overrides(self, **kwargs):
        """Apply CLI overrides by matching arg names to config fields."""
        for key, value in kwargs.items():
            if value is None:
                continue
            if hasattr(self, key):
                setattr(self, key, value)
            else:
                for section in (self.data, self.training, self.lora, self.logging):
                    if hasattr(section, key):
                        setattr(section, key, value)
                        break

    def model_kwargs(self, use_lora: bool, is_vision: bool) -> dict:
        """Return kwargs for trainer.prepare_model_for_training()."""
        # Determine target modules based on model type
        if use_lora and is_vision:
            # Vision models expect a string (e.g., "all-linear"); fall back to None to use trainer defaults
            target_modules = "all-linear" if self.lora.vision_all_linear else None
        else:
            parsed = [
                m.strip()
                for m in str(self.lora.target_modules).split(",")
                if m and m.strip()
            ]
            target_modules = parsed or None

        return {
            "use_lora": use_lora,
            "finetune_vision_layers": self.lora.finetune_vision_layers,
            "finetune_language_layers": self.lora.finetune_language_layers,
            "finetune_attention_modules": self.lora.finetune_attention_modules,
            "finetune_mlp_modules": self.lora.finetune_mlp_modules,
            "target_modules": target_modules,
            "lora_r": self.lora.lora_r,
            "lora_alpha": self.lora.lora_alpha,
            "lora_dropout": self.lora.lora_dropout,
            "use_gradient_checkpointing": self.training.gradient_checkpointing,
            "use_rslora": self.lora.use_rslora,
            "use_loftq": self.lora.use_loftq,
        }

    def training_kwargs(self) -> dict:
        """Return kwargs for trainer.start_training()."""
        return {
            "output_dir": str(self.training.output_dir),
            "num_epochs": self.training.num_epochs,
            "learning_rate": self.training.learning_rate,
            "batch_size": self.training.batch_size,
            "gradient_accumulation_steps": self.training.gradient_accumulation_steps,
            "warmup_steps": self.training.warmup_steps,
            "max_steps": self.training.max_steps,
            "save_steps": self.training.save_steps,
            "weight_decay": self.training.weight_decay,
            "random_seed": self.training.random_seed,
            "packing": self.training.packing,
            "train_on_completions": self.training.train_on_completions,
            "max_seq_length": self.training.max_seq_length,
            "enable_wandb": self.logging.enable_wandb,
            "wandb_project": self.logging.wandb_project,
            "wandb_token": self.logging.wandb_token,
            "enable_tensorboard": self.logging.enable_tensorboard,
            "tensorboard_dir": self.logging.tensorboard_dir,
        }


def load_config(path: Optional[Path]) -> Config:
    """Load config from YAML/JSON file, or return defaults if no path given."""
    if not path:
        return Config()

    path = Path(path)
    if not path.exists():
        raise FileNotFoundError(f"Config file not found: {path}")

    text = path.read_text(encoding = "utf-8")
    if path.suffix.lower() in {".yaml", ".yml"}:
        data = yaml.safe_load(text) or {}
    else:
        import json

        data = json.loads(text or "{}")

    return Config(**data)


================================================
FILE: unsloth_cli/options.py
================================================
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0

"""Generate Typer CLI options from Pydantic models."""

import functools
import inspect
from pathlib import Path
from typing import Any, Callable, Optional, get_args, get_origin

import typer
from pydantic import BaseModel


def _python_name_to_cli_flag(name: str) -> str:
    """Convert python_name to --cli-flag."""
    return "--" + name.replace("_", "-")


def _unwrap_optional(annotation: Any) -> Any:
    """Unwrap Optional[X] to X."""
    origin = get_origin(annotation)
    if origin is not None:
        args = get_args(annotation)
        if type(None) in args:
            non_none = [a for a in args if a is not type(None)]
            if non_none:
                return non_none[0]
    return annotation


def _is_bool_field(annotation: Any) -> bool:
    """Check if field is a boolean (including Optional[bool])."""
    return _unwrap_optional(annotation) is bool


def _is_list_type(annotation: Any) -> bool:
    """Check if type is a List."""
    return get_origin(annotation) is list


def _get_python_type(annotation: Any) -> type:
    """Get the Python type for annotation."""
    unwrapped = _unwrap_optional(annotation)
    if unwrapped in (str, int, float, bool, Path):
        return unwrapped
    return str


def _collect_config_fields(config_class: type[BaseModel]) -> list[tuple[str, Any]]:
    """
    Collect all fields from a config class, flattening nested models. Returns list of
    (name, field_info) tuples. Raises ValueError on duplicate field names.
    """
    fields = []
    seen_names: set[str] = set()

    for name, field_info in config_class.model_fields.items():
        annotation = field_info.annotation
        # Skip nested models - recurse into them
        if isinstance(annotation, type) and issubclass(annotation, BaseModel):
            for nested_name, nested_field in annotation.model_fields.items():
                if nested_name in seen_names:
                    raise ValueError(f"Duplicate field name '{nested_name}' in config")
                seen_names.add(nested_name)
                fields.append((nested_name, nested_field))
        else:
            if name in seen_names:
                raise ValueError(f"Duplicate field name '{name}' in config")
            seen_names.add(name)
            fields.append((name, field_info))
    return fields


def add_options_from_config(config_class: type[BaseModel]) -> Callable:
    """
    Decorator that adds CLI options for all fields in a Pydantic config model.

    The decorated function should declare a `config_overrides: dict = None` parameter
    which will receive a dict of all CLI-provided config values.
    """
    fields = _collect_config_fields(config_class)
    field_names = {
        name for name, field_info in fields if not _is_list_type(field_info.annotation)
    }

    def decorator(func: Callable) -> Callable:
        sig = inspect.signature(func)
        original_params = list(sig.parameters.values())
        original_param_names = {p.name for p in original_params}

        # Build new parameters: config fields first, then original params
        new_params = []

        for field_name, field_info in fields:
            # Skip fields already defined in function signature (e.g., with envvar)
            if field_name in original_param_names:
                continue
            annotation = field_info.annotation
            if _is_list_type(annotation):
                continue

            flag_name = _python_name_to_cli_flag(field_name)
            help_text = field_info.description or ""

            if _is_bool_field(annotation):
                default = typer.Option(
                    None,
                    f"{flag_name}/--no-{field_name.replace('_', '-')}",
                    help = help_text,
                )
                param = inspect.Parameter(
                    field_name,
                    inspect.Parameter.POSITIONAL_OR_KEYWORD,
                    default = default,
                    annotation = Optional[bool],
                )
            else:
                py_type = _get_python_type(annotation)
                default = typer.Option(None, flag_name, help = help_text)
                param = inspect.Parameter(
                    field_name,
                    inspect.Parameter.POSITIONAL_OR_KEYWORD,
                    default = default,
                    annotation = Optional[py_type],
                )
            new_params.append(param)

        # Add original params, excluding config_overrides (will be injected)
        for param in original_params:
            if param.name != "config_overrides":
                new_params.append(param)

        new_sig = sig.replace(parameters = new_params)

        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            config_overrides = {}
            for key in list(kwargs.keys()):
                if key in field_names:
                    if kwargs[key] is not None:
                        config_overrides[key] = kwargs[key]
                    # Only delete if not an explicitly declared parameter
                    if key not in original_param_names:
                        del kwargs[key]

            kwargs["config_overrides"] = config_overrides
            return func(*args, **kwargs)

        wrapper.__signature__ = new_sig
        return wrapper

    return decorator